xref: /aosp_15_r20/external/XNNPACK/test/f16-pavgpool-minmax.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2020 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/f16-pavgpool-minmax.yaml
11 //   Generator: tools/generate-avgpool-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/avgpool.h>
20 #include <xnnpack/pavgpool.h>
21 #include "avgpool-microkernel-tester.h"
22 
23 
24 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile)25   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile) {
26     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
27     AvgPoolMicrokernelTester()
28       .pooling_elements(17)
29       .pooling_tile(9, 8)
30       .channels(8)
31       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
32   }
33 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_input_offset)34   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_input_offset) {
35     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
36     AvgPoolMicrokernelTester()
37       .pooling_elements(17)
38       .pooling_tile(9, 8)
39       .channels(8)
40       .input_offset(11)
41       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
42   }
43 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_zero)44   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_zero) {
45     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
46     for (size_t zero_index = 0; zero_index < 17; zero_index++) {
47       AvgPoolMicrokernelTester()
48         .pooling_elements(17)
49         .pooling_tile(9, 8)
50         .channels(8)
51         .input_offset(11)
52         .zero_index(zero_index)
53         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
54     }
55   }
56 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_qmin)57   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_qmin) {
58     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
59     AvgPoolMicrokernelTester()
60       .pooling_elements(17)
61       .pooling_tile(9, 8)
62       .channels(8)
63       .qmin(128)
64       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
65   }
66 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_qmax)67   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_qmax) {
68     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
69     AvgPoolMicrokernelTester()
70       .pooling_elements(17)
71       .pooling_tile(9, 8)
72       .channels(8)
73       .qmax(128)
74       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
75   }
76 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile)77   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile) {
78     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
79     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
80       AvgPoolMicrokernelTester()
81         .pooling_elements(pooling_elements)
82         .pooling_tile(9, 8)
83         .channels(8)
84         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
85     }
86   }
87 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile_with_input_offset)88   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile_with_input_offset) {
89     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
90     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
91       AvgPoolMicrokernelTester()
92         .pooling_elements(pooling_elements)
93         .pooling_tile(9, 8)
94         .channels(8)
95         .input_offset(11)
96         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
97     }
98   }
99 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile_with_zero)100   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile_with_zero) {
101     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
102     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
103       for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
104         AvgPoolMicrokernelTester()
105           .pooling_elements(pooling_elements)
106           .pooling_tile(9, 8)
107           .channels(8)
108           .input_offset(11)
109           .zero_index(zero_index)
110           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
111       }
112     }
113   }
114 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile)115   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile) {
116     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
117     for (size_t channels = 16; channels < 64; channels += 8) {
118       AvgPoolMicrokernelTester()
119         .pooling_elements(17)
120         .pooling_tile(9, 8)
121         .channels(channels)
122         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
123     }
124   }
125 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_input_offset)126   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_input_offset) {
127     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
128     for (size_t channels = 16; channels < 64; channels += 8) {
129       AvgPoolMicrokernelTester()
130         .pooling_elements(17)
131         .pooling_tile(9, 8)
132         .channels(channels)
133         .input_offset(41)
134         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
135     }
136   }
137 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_zero)138   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_zero) {
139     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
140     for (size_t channels = 16; channels < 64; channels += 8) {
141       for (size_t zero_index = 0; zero_index < 17; zero_index++) {
142         AvgPoolMicrokernelTester()
143           .pooling_elements(17)
144           .pooling_tile(9, 8)
145           .channels(channels)
146           .input_offset(41)
147           .zero_index(zero_index)
148           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
149       }
150     }
151   }
152 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_qmin)153   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_qmin) {
154     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
155     for (size_t channels = 16; channels < 64; channels += 8) {
156       AvgPoolMicrokernelTester()
157         .pooling_elements(17)
158         .pooling_tile(9, 8)
159         .channels(channels)
160         .qmin(128)
161         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
162     }
163   }
164 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_qmax)165   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_qmax) {
166     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
167     for (size_t channels = 16; channels < 64; channels += 8) {
168       AvgPoolMicrokernelTester()
169         .pooling_elements(17)
170         .pooling_tile(9, 8)
171         .channels(channels)
172         .qmax(128)
173         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
174     }
175   }
176 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile)177   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile) {
178     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
179     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
180       for (size_t channels = 16; channels < 64; channels += 8) {
181         AvgPoolMicrokernelTester()
182           .pooling_elements(pooling_elements)
183           .pooling_tile(9, 8)
184           .channels(channels)
185           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
186       }
187     }
188   }
189 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile_with_input_offset)190   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile_with_input_offset) {
191     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
192     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
193       for (size_t channels = 16; channels < 64; channels += 8) {
194         AvgPoolMicrokernelTester()
195           .pooling_elements(pooling_elements)
196           .pooling_tile(9, 8)
197           .channels(channels)
198           .input_offset(67)
199           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
200       }
201     }
202   }
203 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile_with_zero)204   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile_with_zero) {
205     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
206     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
207       for (size_t channels = 16; channels < 64; channels += 8) {
208         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
209           AvgPoolMicrokernelTester()
210             .pooling_elements(pooling_elements)
211             .pooling_tile(9, 8)
212             .channels(channels)
213             .input_offset(67)
214             .zero_index(zero_index)
215             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
216         }
217       }
218     }
219   }
220 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile)221   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile) {
222     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
223     for (size_t channels = 1; channels < 8; channels++) {
224       AvgPoolMicrokernelTester()
225         .pooling_elements(17)
226         .pooling_tile(9, 8)
227         .channels(channels)
228         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
229     }
230   }
231 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_input_offset)232   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_input_offset) {
233     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
234     for (size_t channels = 1; channels < 8; channels++) {
235       AvgPoolMicrokernelTester()
236         .pooling_elements(17)
237         .pooling_tile(9, 8)
238         .channels(channels)
239         .input_offset(11)
240         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
241     }
242   }
243 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_zero_index)244   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_zero_index) {
245     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
246     for (size_t channels = 1; channels < 8; channels++) {
247       for (size_t zero_index = 0; zero_index < 17; zero_index++) {
248         AvgPoolMicrokernelTester()
249           .pooling_elements(17)
250           .pooling_tile(9, 8)
251           .channels(channels)
252           .input_offset(11)
253           .zero_index(zero_index)
254           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
255       }
256     }
257   }
258 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_qmin)259   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_qmin) {
260     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
261     for (size_t channels = 1; channels < 8; channels++) {
262       AvgPoolMicrokernelTester()
263         .pooling_elements(17)
264         .pooling_tile(9, 8)
265         .channels(channels)
266         .qmin(128)
267         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
268     }
269   }
270 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_qmax)271   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_qmax) {
272     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
273     for (size_t channels = 1; channels < 8; channels++) {
274       AvgPoolMicrokernelTester()
275         .pooling_elements(17)
276         .pooling_tile(9, 8)
277         .channels(channels)
278         .qmax(128)
279         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
280     }
281   }
282 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile)283   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile) {
284     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
285     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
286       for (size_t channels = 1; channels < 8; channels++) {
287         AvgPoolMicrokernelTester()
288           .pooling_elements(pooling_elements)
289           .pooling_tile(9, 8)
290           .channels(channels)
291           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
292       }
293     }
294   }
295 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile_with_input_offset)296   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile_with_input_offset) {
297     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
298     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
299       for (size_t channels = 1; channels < 8; channels++) {
300         AvgPoolMicrokernelTester()
301           .pooling_elements(pooling_elements)
302           .pooling_tile(9, 8)
303           .channels(channels)
304           .input_offset(11)
305           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
306       }
307     }
308   }
309 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile_with_zero)310   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile_with_zero) {
311     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
312     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
313       for (size_t channels = 1; channels < 8; channels++) {
314         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
315           AvgPoolMicrokernelTester()
316             .pooling_elements(pooling_elements)
317             .pooling_tile(9, 8)
318             .channels(channels)
319             .input_offset(11)
320             .zero_index(zero_index)
321             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
322         }
323       }
324     }
325   }
326 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile)327   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile) {
328     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
329     for (size_t channels = 9; channels < 16; channels++) {
330       AvgPoolMicrokernelTester()
331         .pooling_elements(17)
332         .pooling_tile(9, 8)
333         .channels(channels)
334         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
335     }
336   }
337 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_input_offset)338   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_input_offset) {
339     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
340     for (size_t channels = 9; channels < 16; channels++) {
341       AvgPoolMicrokernelTester()
342         .pooling_elements(17)
343         .pooling_tile(9, 8)
344         .channels(channels)
345         .input_offset(17)
346         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
347     }
348   }
349 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_zero)350   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_zero) {
351     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
352     for (size_t channels = 9; channels < 16; channels++) {
353       for (size_t zero_index = 0; zero_index < 17; zero_index++) {
354         AvgPoolMicrokernelTester()
355           .pooling_elements(17)
356           .pooling_tile(9, 8)
357           .channels(channels)
358           .input_offset(17)
359           .zero_index(zero_index)
360           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
361       }
362     }
363   }
364 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_qmin)365   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_qmin) {
366     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
367     for (size_t channels = 9; channels < 16; channels++) {
368       AvgPoolMicrokernelTester()
369         .pooling_elements(17)
370         .pooling_tile(9, 8)
371         .channels(channels)
372         .qmin(128)
373         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
374     }
375   }
376 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_qmax)377   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_qmax) {
378     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
379     for (size_t channels = 9; channels < 16; channels++) {
380       AvgPoolMicrokernelTester()
381         .pooling_elements(17)
382         .pooling_tile(9, 8)
383         .channels(channels)
384         .qmax(128)
385         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
386     }
387   }
388 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile)389   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile) {
390     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
391     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
392       for (size_t channels = 9; channels < 16; channels++) {
393         AvgPoolMicrokernelTester()
394           .pooling_elements(pooling_elements)
395           .pooling_tile(9, 8)
396           .channels(channels)
397           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
398       }
399     }
400   }
401 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile_with_input_offset)402   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile_with_input_offset) {
403     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
404     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
405       for (size_t channels = 9; channels < 16; channels++) {
406         AvgPoolMicrokernelTester()
407           .pooling_elements(pooling_elements)
408           .pooling_tile(9, 8)
409           .channels(channels)
410           .input_offset(17)
411           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
412       }
413     }
414   }
415 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile_with_zero)416   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile_with_zero) {
417     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
418     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
419       for (size_t channels = 9; channels < 16; channels++) {
420         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
421           AvgPoolMicrokernelTester()
422             .pooling_elements(pooling_elements)
423             .pooling_tile(9, 8)
424             .channels(channels)
425             .input_offset(17)
426             .zero_index(zero_index)
427             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
428         }
429       }
430     }
431   }
432 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass)433   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass) {
434     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
435     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
436       AvgPoolMicrokernelTester()
437         .pooling_elements(pooling_elements)
438         .pooling_tile(9, 8)
439         .channels(8)
440         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
441     }
442   }
443 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_input_offset)444   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_input_offset) {
445     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
446     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
447       AvgPoolMicrokernelTester()
448         .pooling_elements(pooling_elements)
449         .pooling_tile(9, 8)
450         .channels(8)
451         .input_offset(11)
452         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
453     }
454   }
455 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_zero)456   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_zero) {
457     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
458     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
459       for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
460         AvgPoolMicrokernelTester()
461           .pooling_elements(pooling_elements)
462           .pooling_tile(9, 8)
463           .channels(8)
464           .input_offset(11)
465           .zero_index(zero_index)
466           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
467       }
468     }
469   }
470 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_qmin)471   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_qmin) {
472     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
473     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
474       AvgPoolMicrokernelTester()
475         .pooling_elements(pooling_elements)
476         .pooling_tile(9, 8)
477         .channels(8)
478         .qmin(128)
479         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
480     }
481   }
482 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_qmax)483   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_qmax) {
484     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
485     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
486       AvgPoolMicrokernelTester()
487         .pooling_elements(pooling_elements)
488         .pooling_tile(9, 8)
489         .channels(8)
490         .qmax(128)
491         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
492     }
493   }
494 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass)495   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass) {
496     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
497     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
498       for (size_t channels = 16; channels < 64; channels += 8) {
499         AvgPoolMicrokernelTester()
500           .pooling_elements(pooling_elements)
501           .pooling_tile(9, 8)
502           .channels(channels)
503           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
504       }
505     }
506   }
507 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_input_offset)508   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_input_offset) {
509     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
510     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
511       for (size_t channels = 16; channels < 64; channels += 8) {
512         AvgPoolMicrokernelTester()
513           .pooling_elements(pooling_elements)
514           .pooling_tile(9, 8)
515           .channels(channels)
516           .input_offset(67)
517           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
518       }
519     }
520   }
521 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_zero)522   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_zero) {
523     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
524     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
525       for (size_t channels = 16; channels < 64; channels += 8) {
526         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
527           AvgPoolMicrokernelTester()
528             .pooling_elements(pooling_elements)
529             .pooling_tile(9, 8)
530             .channels(channels)
531             .input_offset(67)
532             .zero_index(zero_index)
533             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
534         }
535       }
536     }
537   }
538 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_qmin)539   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_qmin) {
540     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
541     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
542       for (size_t channels = 16; channels < 64; channels += 8) {
543         AvgPoolMicrokernelTester()
544           .pooling_elements(pooling_elements)
545           .pooling_tile(9, 8)
546           .channels(channels)
547           .qmin(128)
548           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
549       }
550     }
551   }
552 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_qmax)553   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_qmax) {
554     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
555     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
556       for (size_t channels = 16; channels < 64; channels += 8) {
557         AvgPoolMicrokernelTester()
558           .pooling_elements(pooling_elements)
559           .pooling_tile(9, 8)
560           .channels(channels)
561           .qmax(128)
562           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
563       }
564     }
565   }
566 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass)567   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass) {
568     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
569     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
570       for (size_t channels = 1; channels < 8; channels++) {
571         AvgPoolMicrokernelTester()
572           .pooling_elements(pooling_elements)
573           .pooling_tile(9, 8)
574           .channels(channels)
575           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
576       }
577     }
578   }
579 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_input_offset)580   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_input_offset) {
581     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
582     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
583       for (size_t channels = 1; channels < 8; channels++) {
584         AvgPoolMicrokernelTester()
585           .pooling_elements(pooling_elements)
586           .pooling_tile(9, 8)
587           .channels(channels)
588           .input_offset(8)
589           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
590       }
591     }
592   }
593 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_zero)594   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_zero) {
595     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
596     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
597       for (size_t channels = 1; channels < 8; channels++) {
598         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
599           AvgPoolMicrokernelTester()
600             .pooling_elements(pooling_elements)
601             .pooling_tile(9, 8)
602             .channels(channels)
603             .input_offset(8)
604             .zero_index(zero_index)
605             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
606         }
607       }
608     }
609   }
610 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_qmin)611   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_qmin) {
612     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
613     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
614       for (size_t channels = 1; channels < 8; channels++) {
615         AvgPoolMicrokernelTester()
616           .pooling_elements(pooling_elements)
617           .pooling_tile(9, 8)
618           .channels(channels)
619           .qmin(128)
620           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
621       }
622     }
623   }
624 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_qmax)625   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_qmax) {
626     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
627     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
628       for (size_t channels = 1; channels < 8; channels++) {
629         AvgPoolMicrokernelTester()
630           .pooling_elements(pooling_elements)
631           .pooling_tile(9, 8)
632           .channels(channels)
633           .qmax(128)
634           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
635       }
636     }
637   }
638 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass)639   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass) {
640     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
641     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
642       for (size_t channels = 9; channels < 16; channels++) {
643         AvgPoolMicrokernelTester()
644           .pooling_elements(pooling_elements)
645           .pooling_tile(9, 8)
646           .channels(channels)
647           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
648       }
649     }
650   }
651 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_input_offset)652   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_input_offset) {
653     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
654     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
655       for (size_t channels = 9; channels < 16; channels++) {
656         AvgPoolMicrokernelTester()
657           .pooling_elements(pooling_elements)
658           .pooling_tile(9, 8)
659           .channels(channels)
660           .input_offset(17)
661           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
662       }
663     }
664   }
665 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_zero)666   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_zero) {
667     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
668     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
669       for (size_t channels = 9; channels < 16; channels++) {
670         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
671           AvgPoolMicrokernelTester()
672             .pooling_elements(pooling_elements)
673             .pooling_tile(9, 8)
674             .channels(channels)
675             .input_offset(17)
676             .zero_index(zero_index)
677             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
678         }
679       }
680     }
681   }
682 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_qmin)683   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_qmin) {
684     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
685     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
686       for (size_t channels = 9; channels < 16; channels++) {
687         AvgPoolMicrokernelTester()
688           .pooling_elements(pooling_elements)
689           .pooling_tile(9, 8)
690           .channels(channels)
691           .qmin(128)
692           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
693       }
694     }
695   }
696 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_qmax)697   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_qmax) {
698     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
699     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
700       for (size_t channels = 9; channels < 16; channels++) {
701         AvgPoolMicrokernelTester()
702           .pooling_elements(pooling_elements)
703           .pooling_tile(9, 8)
704           .channels(channels)
705           .qmax(128)
706           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
707       }
708     }
709   }
710 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels)711   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels) {
712     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
713     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
714       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
715         for (size_t channels = 1; channels <= 40; channels += 7) {
716           AvgPoolMicrokernelTester()
717             .output_pixels(output_pixels)
718             .pooling_elements(pooling_elements)
719             .pooling_tile(9, 8)
720             .channels(channels)
721             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
722         }
723       }
724     }
725   }
726 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_input_offset)727   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_input_offset) {
728     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
729     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
730       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
731         for (size_t channels = 1; channels <= 40; channels += 7) {
732           AvgPoolMicrokernelTester()
733             .output_pixels(output_pixels)
734             .pooling_elements(pooling_elements)
735             .pooling_tile(9, 8)
736             .channels(channels)
737             .input_offset(43)
738             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
739         }
740       }
741     }
742   }
743 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_zero)744   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_zero) {
745     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
746     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
747       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
748         for (size_t channels = 1; channels <= 40; channels += 7) {
749           for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
750             AvgPoolMicrokernelTester()
751               .output_pixels(output_pixels)
752               .pooling_elements(pooling_elements)
753               .pooling_tile(9, 8)
754               .channels(channels)
755               .input_offset(43)
756               .zero_index(zero_index)
757               .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
758           }
759         }
760       }
761     }
762   }
763 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_qmin)764   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_qmin) {
765     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
766     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
767       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
768         for (size_t channels = 1; channels <= 40; channels += 7) {
769           AvgPoolMicrokernelTester()
770             .output_pixels(output_pixels)
771             .pooling_elements(pooling_elements)
772             .pooling_tile(9, 8)
773             .channels(channels)
774             .qmin(128)
775             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
776         }
777       }
778     }
779   }
780 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_qmax)781   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_qmax) {
782     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
783     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
784       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
785         for (size_t channels = 1; channels <= 40; channels += 7) {
786           AvgPoolMicrokernelTester()
787             .output_pixels(output_pixels)
788             .pooling_elements(pooling_elements)
789             .pooling_tile(9, 8)
790             .channels(channels)
791             .qmax(128)
792             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
793         }
794       }
795     }
796   }
797 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_output_stride)798   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_output_stride) {
799     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
800     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
801       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
802         for (size_t channels = 1; channels <= 40; channels += 7) {
803           AvgPoolMicrokernelTester()
804             .output_pixels(output_pixels)
805             .pooling_elements(pooling_elements)
806             .pooling_tile(9, 8)
807             .channels(channels)
808             .output_stride(43)
809             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
810         }
811       }
812     }
813   }
814 
TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_step)815   TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_step) {
816     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
817     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
818       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
819         for (size_t channels = 1; channels <= 40; channels += 7) {
820           for (size_t step = 2; step <= pooling_elements; step++) {
821             AvgPoolMicrokernelTester()
822               .output_pixels(output_pixels)
823               .pooling_elements(pooling_elements)
824               .pooling_tile(9, 8)
825               .step(step)
826               .channels(channels)
827               .output_stride(43)
828               .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
829           }
830         }
831       }
832     }
833   }
834 #endif  // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64
835 
836 
837 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile)838   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile) {
839     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
840     AvgPoolMicrokernelTester()
841       .pooling_elements(9)
842       .pooling_tile(9)
843       .channels(8)
844       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
845   }
846 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_input_offset)847   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_input_offset) {
848     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
849     AvgPoolMicrokernelTester()
850       .pooling_elements(9)
851       .pooling_tile(9)
852       .channels(8)
853       .input_offset(11)
854       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
855   }
856 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_zero)857   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_zero) {
858     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
859     for (size_t zero_index = 0; zero_index < 9; zero_index++) {
860       AvgPoolMicrokernelTester()
861         .pooling_elements(9)
862         .pooling_tile(9)
863         .channels(8)
864         .input_offset(11)
865         .zero_index(zero_index)
866         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
867     }
868   }
869 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_qmin)870   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_qmin) {
871     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
872     AvgPoolMicrokernelTester()
873       .pooling_elements(9)
874       .pooling_tile(9)
875       .channels(8)
876       .qmin(128)
877       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
878   }
879 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_qmax)880   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_qmax) {
881     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
882     AvgPoolMicrokernelTester()
883       .pooling_elements(9)
884       .pooling_tile(9)
885       .channels(8)
886       .qmax(128)
887       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
888   }
889 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile)890   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile) {
891     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
892     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
893       AvgPoolMicrokernelTester()
894         .pooling_elements(pooling_elements)
895         .pooling_tile(9)
896         .channels(8)
897         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
898     }
899   }
900 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile_with_input_offset)901   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile_with_input_offset) {
902     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
903     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
904       AvgPoolMicrokernelTester()
905         .pooling_elements(pooling_elements)
906         .pooling_tile(9)
907         .channels(8)
908         .input_offset(11)
909         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
910     }
911   }
912 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile_with_zero)913   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile_with_zero) {
914     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
915     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
916       for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
917         AvgPoolMicrokernelTester()
918           .pooling_elements(pooling_elements)
919           .pooling_tile(9)
920           .channels(8)
921           .input_offset(11)
922           .zero_index(zero_index)
923           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
924       }
925     }
926   }
927 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile)928   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile) {
929     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
930     for (size_t channels = 16; channels < 64; channels += 8) {
931       AvgPoolMicrokernelTester()
932         .pooling_elements(9)
933         .pooling_tile(9)
934         .channels(channels)
935         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
936     }
937   }
938 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_input_offset)939   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_input_offset) {
940     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
941     for (size_t channels = 16; channels < 64; channels += 8) {
942       AvgPoolMicrokernelTester()
943         .pooling_elements(9)
944         .pooling_tile(9)
945         .channels(channels)
946         .input_offset(67)
947         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
948     }
949   }
950 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_zero)951   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_zero) {
952     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
953     for (size_t channels = 16; channels < 64; channels += 8) {
954       for (size_t zero_index = 0; zero_index < 9; zero_index++) {
955         AvgPoolMicrokernelTester()
956           .pooling_elements(9)
957           .pooling_tile(9)
958           .channels(channels)
959           .input_offset(67)
960           .zero_index(zero_index)
961           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
962       }
963     }
964   }
965 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_qmin)966   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_qmin) {
967     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
968     for (size_t channels = 16; channels < 64; channels += 8) {
969       AvgPoolMicrokernelTester()
970         .pooling_elements(9)
971         .pooling_tile(9)
972         .channels(channels)
973         .qmin(128)
974         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
975     }
976   }
977 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_qmax)978   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_qmax) {
979     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
980     for (size_t channels = 16; channels < 64; channels += 8) {
981       AvgPoolMicrokernelTester()
982         .pooling_elements(9)
983         .pooling_tile(9)
984         .channels(channels)
985         .qmax(128)
986         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
987     }
988   }
989 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile)990   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile) {
991     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
992     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
993       for (size_t channels = 16; channels < 64; channels += 8) {
994         AvgPoolMicrokernelTester()
995           .pooling_elements(pooling_elements)
996           .pooling_tile(9)
997           .channels(channels)
998           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
999       }
1000     }
1001   }
1002 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile_with_input_offset)1003   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile_with_input_offset) {
1004     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1005     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1006       for (size_t channels = 16; channels < 64; channels += 8) {
1007         AvgPoolMicrokernelTester()
1008           .pooling_elements(pooling_elements)
1009           .pooling_tile(9)
1010           .channels(channels)
1011           .input_offset(67)
1012           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1013       }
1014     }
1015   }
1016 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile_with_zero)1017   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile_with_zero) {
1018     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1019     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1020       for (size_t channels = 16; channels < 64; channels += 8) {
1021         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1022           AvgPoolMicrokernelTester()
1023             .pooling_elements(pooling_elements)
1024             .pooling_tile(9)
1025             .channels(channels)
1026             .input_offset(67)
1027             .zero_index(zero_index)
1028             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1029         }
1030       }
1031     }
1032   }
1033 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile)1034   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile) {
1035     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1036     for (size_t channels = 1; channels < 8; channels++) {
1037       AvgPoolMicrokernelTester()
1038         .pooling_elements(9)
1039         .pooling_tile(9)
1040         .channels(channels)
1041         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1042     }
1043   }
1044 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_input_offset)1045   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_input_offset) {
1046     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1047     for (size_t channels = 1; channels < 8; channels++) {
1048       AvgPoolMicrokernelTester()
1049         .pooling_elements(9)
1050         .pooling_tile(9)
1051         .channels(channels)
1052         .input_offset(11)
1053         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1054     }
1055   }
1056 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_zero)1057   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_zero) {
1058     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1059     for (size_t channels = 1; channels < 8; channels++) {
1060       for (size_t zero_index = 0; zero_index < 9; zero_index++) {
1061         AvgPoolMicrokernelTester()
1062           .pooling_elements(9)
1063           .pooling_tile(9)
1064           .channels(channels)
1065           .input_offset(11)
1066           .zero_index(zero_index)
1067           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1068       }
1069     }
1070   }
1071 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_qmin)1072   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_qmin) {
1073     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1074     for (size_t channels = 1; channels < 8; channels++) {
1075       AvgPoolMicrokernelTester()
1076         .pooling_elements(9)
1077         .pooling_tile(9)
1078         .channels(channels)
1079         .qmin(128)
1080         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1081     }
1082   }
1083 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_qmax)1084   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_qmax) {
1085     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1086     for (size_t channels = 1; channels < 8; channels++) {
1087       AvgPoolMicrokernelTester()
1088         .pooling_elements(9)
1089         .pooling_tile(9)
1090         .channels(channels)
1091         .qmax(128)
1092         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1093     }
1094   }
1095 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile)1096   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile) {
1097     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1098     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1099       for (size_t channels = 1; channels < 8; channels++) {
1100         AvgPoolMicrokernelTester()
1101           .pooling_elements(pooling_elements)
1102           .pooling_tile(9)
1103           .channels(channels)
1104           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1105       }
1106     }
1107   }
1108 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile_with_input_offset)1109   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile_with_input_offset) {
1110     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1111     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1112       for (size_t channels = 1; channels < 8; channels++) {
1113         AvgPoolMicrokernelTester()
1114           .pooling_elements(pooling_elements)
1115           .pooling_tile(9)
1116           .channels(channels)
1117           .input_offset(11)
1118           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1119       }
1120     }
1121   }
1122 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile_with_zero)1123   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile_with_zero) {
1124     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1125     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1126       for (size_t channels = 1; channels < 8; channels++) {
1127         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1128           AvgPoolMicrokernelTester()
1129             .pooling_elements(pooling_elements)
1130             .pooling_tile(9)
1131             .channels(channels)
1132             .input_offset(11)
1133             .zero_index(zero_index)
1134             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1135         }
1136       }
1137     }
1138   }
1139 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile)1140   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile) {
1141     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1142     for (size_t channels = 9; channels < 16; channels++) {
1143       AvgPoolMicrokernelTester()
1144         .pooling_elements(9)
1145         .pooling_tile(9)
1146         .channels(channels)
1147         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1148     }
1149   }
1150 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_input_offset)1151   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_input_offset) {
1152     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1153     for (size_t channels = 9; channels < 16; channels++) {
1154       AvgPoolMicrokernelTester()
1155         .pooling_elements(9)
1156         .pooling_tile(9)
1157         .channels(channels)
1158         .input_offset(17)
1159         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1160     }
1161   }
1162 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_zero)1163   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_zero) {
1164     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1165     for (size_t channels = 9; channels < 16; channels++) {
1166       for (size_t zero_index = 0; zero_index < 9; zero_index++) {
1167         AvgPoolMicrokernelTester()
1168           .pooling_elements(9)
1169           .pooling_tile(9)
1170           .channels(channels)
1171           .input_offset(17)
1172           .zero_index(zero_index)
1173           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1174       }
1175     }
1176   }
1177 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_qmin)1178   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_qmin) {
1179     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1180     for (size_t channels = 9; channels < 16; channels++) {
1181       AvgPoolMicrokernelTester()
1182         .pooling_elements(9)
1183         .pooling_tile(9)
1184         .channels(channels)
1185         .qmin(128)
1186         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1187     }
1188   }
1189 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_qmax)1190   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_qmax) {
1191     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1192     for (size_t channels = 9; channels < 16; channels++) {
1193       AvgPoolMicrokernelTester()
1194         .pooling_elements(9)
1195         .pooling_tile(9)
1196         .channels(channels)
1197         .qmax(128)
1198         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1199     }
1200   }
1201 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile)1202   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile) {
1203     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1204     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1205       for (size_t channels = 9; channels < 16; channels++) {
1206         AvgPoolMicrokernelTester()
1207           .pooling_elements(pooling_elements)
1208           .pooling_tile(9)
1209           .channels(channels)
1210           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1211       }
1212     }
1213   }
1214 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile_with_input_offset)1215   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile_with_input_offset) {
1216     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1217     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1218       for (size_t channels = 9; channels < 16; channels++) {
1219         AvgPoolMicrokernelTester()
1220           .pooling_elements(pooling_elements)
1221           .pooling_tile(9)
1222           .channels(channels)
1223           .input_offset(17)
1224           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1225       }
1226     }
1227   }
1228 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile_with_zero)1229   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile_with_zero) {
1230     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1231     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1232       for (size_t channels = 9; channels < 16; channels++) {
1233         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1234           AvgPoolMicrokernelTester()
1235             .pooling_elements(pooling_elements)
1236             .pooling_tile(9)
1237             .channels(channels)
1238             .input_offset(17)
1239             .zero_index(zero_index)
1240             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1241         }
1242       }
1243     }
1244   }
1245 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels)1246   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels) {
1247     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1248     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1249       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
1250         for (size_t channels = 1; channels <= 40; channels += 7) {
1251           AvgPoolMicrokernelTester()
1252             .output_pixels(output_pixels)
1253             .pooling_elements(pooling_elements)
1254             .pooling_tile(9, 0)
1255             .channels(channels)
1256             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1257         }
1258       }
1259     }
1260   }
1261 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_input_offset)1262   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_input_offset) {
1263     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1264     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1265       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
1266         for (size_t channels = 1; channels <= 40; channels += 7) {
1267           AvgPoolMicrokernelTester()
1268             .output_pixels(output_pixels)
1269             .pooling_elements(pooling_elements)
1270             .pooling_tile(9, 0)
1271             .channels(channels)
1272             .input_offset(43)
1273             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1274         }
1275       }
1276     }
1277   }
1278 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_zero)1279   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_zero) {
1280     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1281     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1282       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
1283         for (size_t channels = 1; channels <= 40; channels += 7) {
1284           for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1285             AvgPoolMicrokernelTester()
1286               .output_pixels(output_pixels)
1287               .pooling_elements(pooling_elements)
1288               .pooling_tile(9, 0)
1289               .channels(channels)
1290               .input_offset(43)
1291               .zero_index(zero_index)
1292               .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1293           }
1294         }
1295       }
1296     }
1297   }
1298 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_qmin)1299   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_qmin) {
1300     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1301     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1302       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
1303         for (size_t channels = 1; channels <= 40; channels += 7) {
1304           AvgPoolMicrokernelTester()
1305             .output_pixels(output_pixels)
1306             .pooling_elements(pooling_elements)
1307             .pooling_tile(9, 0)
1308             .channels(channels)
1309             .qmin(128)
1310             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1311         }
1312       }
1313     }
1314   }
1315 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_qmax)1316   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_qmax) {
1317     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1318     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1319       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
1320         for (size_t channels = 1; channels <= 40; channels += 7) {
1321           AvgPoolMicrokernelTester()
1322             .output_pixels(output_pixels)
1323             .pooling_elements(pooling_elements)
1324             .pooling_tile(9, 0)
1325             .channels(channels)
1326             .qmax(128)
1327             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1328         }
1329       }
1330     }
1331   }
1332 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_output_stride)1333   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_output_stride) {
1334     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1335     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1336       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
1337         for (size_t channels = 1; channels <= 40; channels += 7) {
1338           AvgPoolMicrokernelTester()
1339             .output_pixels(output_pixels)
1340             .pooling_elements(pooling_elements)
1341             .pooling_tile(9, 0)
1342             .channels(channels)
1343             .output_stride(43)
1344             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1345         }
1346       }
1347     }
1348   }
1349 
TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_step)1350   TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_step) {
1351     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
1352     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1353       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
1354         for (size_t channels = 1; channels <= 40; channels += 7) {
1355           for (size_t step = 2; step <= pooling_elements; step++) {
1356             AvgPoolMicrokernelTester()
1357               .output_pixels(output_pixels)
1358               .pooling_elements(pooling_elements)
1359               .pooling_tile(9, 0)
1360               .step(step)
1361               .channels(channels)
1362               .output_stride(43)
1363               .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
1364           }
1365         }
1366       }
1367     }
1368   }
1369 #endif  // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64
1370 
1371 
1372 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile)1373   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile) {
1374     TEST_REQUIRES_X86_AVX2;
1375     AvgPoolMicrokernelTester()
1376       .pooling_elements(17)
1377       .pooling_tile(9, 8)
1378       .channels(8)
1379       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1380   }
1381 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_input_offset)1382   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_input_offset) {
1383     TEST_REQUIRES_X86_AVX2;
1384     AvgPoolMicrokernelTester()
1385       .pooling_elements(17)
1386       .pooling_tile(9, 8)
1387       .channels(8)
1388       .input_offset(11)
1389       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1390   }
1391 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_zero)1392   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_zero) {
1393     TEST_REQUIRES_X86_AVX2;
1394     for (size_t zero_index = 0; zero_index < 17; zero_index++) {
1395       AvgPoolMicrokernelTester()
1396         .pooling_elements(17)
1397         .pooling_tile(9, 8)
1398         .channels(8)
1399         .input_offset(11)
1400         .zero_index(zero_index)
1401         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1402     }
1403   }
1404 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_qmin)1405   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_qmin) {
1406     TEST_REQUIRES_X86_AVX2;
1407     AvgPoolMicrokernelTester()
1408       .pooling_elements(17)
1409       .pooling_tile(9, 8)
1410       .channels(8)
1411       .qmin(128)
1412       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1413   }
1414 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_qmax)1415   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_qmax) {
1416     TEST_REQUIRES_X86_AVX2;
1417     AvgPoolMicrokernelTester()
1418       .pooling_elements(17)
1419       .pooling_tile(9, 8)
1420       .channels(8)
1421       .qmax(128)
1422       .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1423   }
1424 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_subtile)1425   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_subtile) {
1426     TEST_REQUIRES_X86_AVX2;
1427     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1428       AvgPoolMicrokernelTester()
1429         .pooling_elements(pooling_elements)
1430         .pooling_tile(9, 8)
1431         .channels(8)
1432         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1433     }
1434   }
1435 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_subtile_with_input_offset)1436   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_subtile_with_input_offset) {
1437     TEST_REQUIRES_X86_AVX2;
1438     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1439       AvgPoolMicrokernelTester()
1440         .pooling_elements(pooling_elements)
1441         .pooling_tile(9, 8)
1442         .channels(8)
1443         .input_offset(11)
1444         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1445     }
1446   }
1447 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_subtile_with_zero)1448   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_subtile_with_zero) {
1449     TEST_REQUIRES_X86_AVX2;
1450     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1451       for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1452         AvgPoolMicrokernelTester()
1453           .pooling_elements(pooling_elements)
1454           .pooling_tile(9, 8)
1455           .channels(8)
1456           .input_offset(11)
1457           .zero_index(zero_index)
1458           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1459       }
1460     }
1461   }
1462 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile)1463   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile) {
1464     TEST_REQUIRES_X86_AVX2;
1465     for (size_t channels = 16; channels < 64; channels += 8) {
1466       AvgPoolMicrokernelTester()
1467         .pooling_elements(17)
1468         .pooling_tile(9, 8)
1469         .channels(channels)
1470         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1471     }
1472   }
1473 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_input_offset)1474   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_input_offset) {
1475     TEST_REQUIRES_X86_AVX2;
1476     for (size_t channels = 16; channels < 64; channels += 8) {
1477       AvgPoolMicrokernelTester()
1478         .pooling_elements(17)
1479         .pooling_tile(9, 8)
1480         .channels(channels)
1481         .input_offset(41)
1482         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1483     }
1484   }
1485 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_zero)1486   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_zero) {
1487     TEST_REQUIRES_X86_AVX2;
1488     for (size_t channels = 16; channels < 64; channels += 8) {
1489       for (size_t zero_index = 0; zero_index < 17; zero_index++) {
1490         AvgPoolMicrokernelTester()
1491           .pooling_elements(17)
1492           .pooling_tile(9, 8)
1493           .channels(channels)
1494           .input_offset(41)
1495           .zero_index(zero_index)
1496           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1497       }
1498     }
1499   }
1500 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_qmin)1501   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_qmin) {
1502     TEST_REQUIRES_X86_AVX2;
1503     for (size_t channels = 16; channels < 64; channels += 8) {
1504       AvgPoolMicrokernelTester()
1505         .pooling_elements(17)
1506         .pooling_tile(9, 8)
1507         .channels(channels)
1508         .qmin(128)
1509         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1510     }
1511   }
1512 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_qmax)1513   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_qmax) {
1514     TEST_REQUIRES_X86_AVX2;
1515     for (size_t channels = 16; channels < 64; channels += 8) {
1516       AvgPoolMicrokernelTester()
1517         .pooling_elements(17)
1518         .pooling_tile(9, 8)
1519         .channels(channels)
1520         .qmax(128)
1521         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1522     }
1523   }
1524 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_subtile)1525   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_subtile) {
1526     TEST_REQUIRES_X86_AVX2;
1527     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1528       for (size_t channels = 16; channels < 64; channels += 8) {
1529         AvgPoolMicrokernelTester()
1530           .pooling_elements(pooling_elements)
1531           .pooling_tile(9, 8)
1532           .channels(channels)
1533           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1534       }
1535     }
1536   }
1537 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_subtile_with_input_offset)1538   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_subtile_with_input_offset) {
1539     TEST_REQUIRES_X86_AVX2;
1540     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1541       for (size_t channels = 16; channels < 64; channels += 8) {
1542         AvgPoolMicrokernelTester()
1543           .pooling_elements(pooling_elements)
1544           .pooling_tile(9, 8)
1545           .channels(channels)
1546           .input_offset(67)
1547           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1548       }
1549     }
1550   }
1551 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_subtile_with_zero)1552   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_subtile_with_zero) {
1553     TEST_REQUIRES_X86_AVX2;
1554     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1555       for (size_t channels = 16; channels < 64; channels += 8) {
1556         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1557           AvgPoolMicrokernelTester()
1558             .pooling_elements(pooling_elements)
1559             .pooling_tile(9, 8)
1560             .channels(channels)
1561             .input_offset(67)
1562             .zero_index(zero_index)
1563             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1564         }
1565       }
1566     }
1567   }
1568 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile)1569   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile) {
1570     TEST_REQUIRES_X86_AVX2;
1571     for (size_t channels = 1; channels < 8; channels++) {
1572       AvgPoolMicrokernelTester()
1573         .pooling_elements(17)
1574         .pooling_tile(9, 8)
1575         .channels(channels)
1576         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1577     }
1578   }
1579 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_input_offset)1580   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_input_offset) {
1581     TEST_REQUIRES_X86_AVX2;
1582     for (size_t channels = 1; channels < 8; channels++) {
1583       AvgPoolMicrokernelTester()
1584         .pooling_elements(17)
1585         .pooling_tile(9, 8)
1586         .channels(channels)
1587         .input_offset(11)
1588         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1589     }
1590   }
1591 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_zero_index)1592   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_zero_index) {
1593     TEST_REQUIRES_X86_AVX2;
1594     for (size_t channels = 1; channels < 8; channels++) {
1595       for (size_t zero_index = 0; zero_index < 17; zero_index++) {
1596         AvgPoolMicrokernelTester()
1597           .pooling_elements(17)
1598           .pooling_tile(9, 8)
1599           .channels(channels)
1600           .input_offset(11)
1601           .zero_index(zero_index)
1602           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1603       }
1604     }
1605   }
1606 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_qmin)1607   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_qmin) {
1608     TEST_REQUIRES_X86_AVX2;
1609     for (size_t channels = 1; channels < 8; channels++) {
1610       AvgPoolMicrokernelTester()
1611         .pooling_elements(17)
1612         .pooling_tile(9, 8)
1613         .channels(channels)
1614         .qmin(128)
1615         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1616     }
1617   }
1618 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_qmax)1619   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_qmax) {
1620     TEST_REQUIRES_X86_AVX2;
1621     for (size_t channels = 1; channels < 8; channels++) {
1622       AvgPoolMicrokernelTester()
1623         .pooling_elements(17)
1624         .pooling_tile(9, 8)
1625         .channels(channels)
1626         .qmax(128)
1627         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1628     }
1629   }
1630 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_subtile)1631   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_subtile) {
1632     TEST_REQUIRES_X86_AVX2;
1633     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1634       for (size_t channels = 1; channels < 8; channels++) {
1635         AvgPoolMicrokernelTester()
1636           .pooling_elements(pooling_elements)
1637           .pooling_tile(9, 8)
1638           .channels(channels)
1639           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1640       }
1641     }
1642   }
1643 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_subtile_with_input_offset)1644   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_subtile_with_input_offset) {
1645     TEST_REQUIRES_X86_AVX2;
1646     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1647       for (size_t channels = 1; channels < 8; channels++) {
1648         AvgPoolMicrokernelTester()
1649           .pooling_elements(pooling_elements)
1650           .pooling_tile(9, 8)
1651           .channels(channels)
1652           .input_offset(11)
1653           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1654       }
1655     }
1656   }
1657 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_subtile_with_zero)1658   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_subtile_with_zero) {
1659     TEST_REQUIRES_X86_AVX2;
1660     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1661       for (size_t channels = 1; channels < 8; channels++) {
1662         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1663           AvgPoolMicrokernelTester()
1664             .pooling_elements(pooling_elements)
1665             .pooling_tile(9, 8)
1666             .channels(channels)
1667             .input_offset(11)
1668             .zero_index(zero_index)
1669             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1670         }
1671       }
1672     }
1673   }
1674 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile)1675   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile) {
1676     TEST_REQUIRES_X86_AVX2;
1677     for (size_t channels = 9; channels < 16; channels++) {
1678       AvgPoolMicrokernelTester()
1679         .pooling_elements(17)
1680         .pooling_tile(9, 8)
1681         .channels(channels)
1682         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1683     }
1684   }
1685 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_input_offset)1686   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_input_offset) {
1687     TEST_REQUIRES_X86_AVX2;
1688     for (size_t channels = 9; channels < 16; channels++) {
1689       AvgPoolMicrokernelTester()
1690         .pooling_elements(17)
1691         .pooling_tile(9, 8)
1692         .channels(channels)
1693         .input_offset(17)
1694         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1695     }
1696   }
1697 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_zero)1698   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_zero) {
1699     TEST_REQUIRES_X86_AVX2;
1700     for (size_t channels = 9; channels < 16; channels++) {
1701       for (size_t zero_index = 0; zero_index < 17; zero_index++) {
1702         AvgPoolMicrokernelTester()
1703           .pooling_elements(17)
1704           .pooling_tile(9, 8)
1705           .channels(channels)
1706           .input_offset(17)
1707           .zero_index(zero_index)
1708           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1709       }
1710     }
1711   }
1712 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_qmin)1713   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_qmin) {
1714     TEST_REQUIRES_X86_AVX2;
1715     for (size_t channels = 9; channels < 16; channels++) {
1716       AvgPoolMicrokernelTester()
1717         .pooling_elements(17)
1718         .pooling_tile(9, 8)
1719         .channels(channels)
1720         .qmin(128)
1721         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1722     }
1723   }
1724 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_qmax)1725   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_qmax) {
1726     TEST_REQUIRES_X86_AVX2;
1727     for (size_t channels = 9; channels < 16; channels++) {
1728       AvgPoolMicrokernelTester()
1729         .pooling_elements(17)
1730         .pooling_tile(9, 8)
1731         .channels(channels)
1732         .qmax(128)
1733         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1734     }
1735   }
1736 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_subtile)1737   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_subtile) {
1738     TEST_REQUIRES_X86_AVX2;
1739     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1740       for (size_t channels = 9; channels < 16; channels++) {
1741         AvgPoolMicrokernelTester()
1742           .pooling_elements(pooling_elements)
1743           .pooling_tile(9, 8)
1744           .channels(channels)
1745           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1746       }
1747     }
1748   }
1749 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_subtile_with_input_offset)1750   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_subtile_with_input_offset) {
1751     TEST_REQUIRES_X86_AVX2;
1752     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1753       for (size_t channels = 9; channels < 16; channels++) {
1754         AvgPoolMicrokernelTester()
1755           .pooling_elements(pooling_elements)
1756           .pooling_tile(9, 8)
1757           .channels(channels)
1758           .input_offset(17)
1759           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1760       }
1761     }
1762   }
1763 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_subtile_with_zero)1764   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_subtile_with_zero) {
1765     TEST_REQUIRES_X86_AVX2;
1766     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1767       for (size_t channels = 9; channels < 16; channels++) {
1768         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1769           AvgPoolMicrokernelTester()
1770             .pooling_elements(pooling_elements)
1771             .pooling_tile(9, 8)
1772             .channels(channels)
1773             .input_offset(17)
1774             .zero_index(zero_index)
1775             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1776         }
1777       }
1778     }
1779   }
1780 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass)1781   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass) {
1782     TEST_REQUIRES_X86_AVX2;
1783     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1784       AvgPoolMicrokernelTester()
1785         .pooling_elements(pooling_elements)
1786         .pooling_tile(9, 8)
1787         .channels(8)
1788         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1789     }
1790   }
1791 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_input_offset)1792   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_input_offset) {
1793     TEST_REQUIRES_X86_AVX2;
1794     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1795       AvgPoolMicrokernelTester()
1796         .pooling_elements(pooling_elements)
1797         .pooling_tile(9, 8)
1798         .channels(8)
1799         .input_offset(11)
1800         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1801     }
1802   }
1803 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_zero)1804   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_zero) {
1805     TEST_REQUIRES_X86_AVX2;
1806     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1807       for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1808         AvgPoolMicrokernelTester()
1809           .pooling_elements(pooling_elements)
1810           .pooling_tile(9, 8)
1811           .channels(8)
1812           .input_offset(11)
1813           .zero_index(zero_index)
1814           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1815       }
1816     }
1817   }
1818 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_qmin)1819   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_qmin) {
1820     TEST_REQUIRES_X86_AVX2;
1821     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1822       AvgPoolMicrokernelTester()
1823         .pooling_elements(pooling_elements)
1824         .pooling_tile(9, 8)
1825         .channels(8)
1826         .qmin(128)
1827         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1828     }
1829   }
1830 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_qmax)1831   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_qmax) {
1832     TEST_REQUIRES_X86_AVX2;
1833     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1834       AvgPoolMicrokernelTester()
1835         .pooling_elements(pooling_elements)
1836         .pooling_tile(9, 8)
1837         .channels(8)
1838         .qmax(128)
1839         .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1840     }
1841   }
1842 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass)1843   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass) {
1844     TEST_REQUIRES_X86_AVX2;
1845     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1846       for (size_t channels = 16; channels < 64; channels += 8) {
1847         AvgPoolMicrokernelTester()
1848           .pooling_elements(pooling_elements)
1849           .pooling_tile(9, 8)
1850           .channels(channels)
1851           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1852       }
1853     }
1854   }
1855 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_input_offset)1856   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_input_offset) {
1857     TEST_REQUIRES_X86_AVX2;
1858     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1859       for (size_t channels = 16; channels < 64; channels += 8) {
1860         AvgPoolMicrokernelTester()
1861           .pooling_elements(pooling_elements)
1862           .pooling_tile(9, 8)
1863           .channels(channels)
1864           .input_offset(67)
1865           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1866       }
1867     }
1868   }
1869 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_zero)1870   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_zero) {
1871     TEST_REQUIRES_X86_AVX2;
1872     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1873       for (size_t channels = 16; channels < 64; channels += 8) {
1874         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1875           AvgPoolMicrokernelTester()
1876             .pooling_elements(pooling_elements)
1877             .pooling_tile(9, 8)
1878             .channels(channels)
1879             .input_offset(67)
1880             .zero_index(zero_index)
1881             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1882         }
1883       }
1884     }
1885   }
1886 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_qmin)1887   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_qmin) {
1888     TEST_REQUIRES_X86_AVX2;
1889     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1890       for (size_t channels = 16; channels < 64; channels += 8) {
1891         AvgPoolMicrokernelTester()
1892           .pooling_elements(pooling_elements)
1893           .pooling_tile(9, 8)
1894           .channels(channels)
1895           .qmin(128)
1896           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1897       }
1898     }
1899   }
1900 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_qmax)1901   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_qmax) {
1902     TEST_REQUIRES_X86_AVX2;
1903     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1904       for (size_t channels = 16; channels < 64; channels += 8) {
1905         AvgPoolMicrokernelTester()
1906           .pooling_elements(pooling_elements)
1907           .pooling_tile(9, 8)
1908           .channels(channels)
1909           .qmax(128)
1910           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1911       }
1912     }
1913   }
1914 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass)1915   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass) {
1916     TEST_REQUIRES_X86_AVX2;
1917     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1918       for (size_t channels = 1; channels < 8; channels++) {
1919         AvgPoolMicrokernelTester()
1920           .pooling_elements(pooling_elements)
1921           .pooling_tile(9, 8)
1922           .channels(channels)
1923           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1924       }
1925     }
1926   }
1927 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_input_offset)1928   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_input_offset) {
1929     TEST_REQUIRES_X86_AVX2;
1930     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1931       for (size_t channels = 1; channels < 8; channels++) {
1932         AvgPoolMicrokernelTester()
1933           .pooling_elements(pooling_elements)
1934           .pooling_tile(9, 8)
1935           .channels(channels)
1936           .input_offset(8)
1937           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1938       }
1939     }
1940   }
1941 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_zero)1942   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_zero) {
1943     TEST_REQUIRES_X86_AVX2;
1944     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1945       for (size_t channels = 1; channels < 8; channels++) {
1946         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
1947           AvgPoolMicrokernelTester()
1948             .pooling_elements(pooling_elements)
1949             .pooling_tile(9, 8)
1950             .channels(channels)
1951             .input_offset(8)
1952             .zero_index(zero_index)
1953             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1954         }
1955       }
1956     }
1957   }
1958 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_qmin)1959   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_qmin) {
1960     TEST_REQUIRES_X86_AVX2;
1961     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1962       for (size_t channels = 1; channels < 8; channels++) {
1963         AvgPoolMicrokernelTester()
1964           .pooling_elements(pooling_elements)
1965           .pooling_tile(9, 8)
1966           .channels(channels)
1967           .qmin(128)
1968           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1969       }
1970     }
1971   }
1972 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_qmax)1973   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_qmax) {
1974     TEST_REQUIRES_X86_AVX2;
1975     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1976       for (size_t channels = 1; channels < 8; channels++) {
1977         AvgPoolMicrokernelTester()
1978           .pooling_elements(pooling_elements)
1979           .pooling_tile(9, 8)
1980           .channels(channels)
1981           .qmax(128)
1982           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1983       }
1984     }
1985   }
1986 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass)1987   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass) {
1988     TEST_REQUIRES_X86_AVX2;
1989     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1990       for (size_t channels = 9; channels < 16; channels++) {
1991         AvgPoolMicrokernelTester()
1992           .pooling_elements(pooling_elements)
1993           .pooling_tile(9, 8)
1994           .channels(channels)
1995           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
1996       }
1997     }
1998   }
1999 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_input_offset)2000   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_input_offset) {
2001     TEST_REQUIRES_X86_AVX2;
2002     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2003       for (size_t channels = 9; channels < 16; channels++) {
2004         AvgPoolMicrokernelTester()
2005           .pooling_elements(pooling_elements)
2006           .pooling_tile(9, 8)
2007           .channels(channels)
2008           .input_offset(17)
2009           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2010       }
2011     }
2012   }
2013 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_zero)2014   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_zero) {
2015     TEST_REQUIRES_X86_AVX2;
2016     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2017       for (size_t channels = 9; channels < 16; channels++) {
2018         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
2019           AvgPoolMicrokernelTester()
2020             .pooling_elements(pooling_elements)
2021             .pooling_tile(9, 8)
2022             .channels(channels)
2023             .input_offset(17)
2024             .zero_index(zero_index)
2025             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2026         }
2027       }
2028     }
2029   }
2030 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_qmin)2031   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_qmin) {
2032     TEST_REQUIRES_X86_AVX2;
2033     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2034       for (size_t channels = 9; channels < 16; channels++) {
2035         AvgPoolMicrokernelTester()
2036           .pooling_elements(pooling_elements)
2037           .pooling_tile(9, 8)
2038           .channels(channels)
2039           .qmin(128)
2040           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2041       }
2042     }
2043   }
2044 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_qmax)2045   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_qmax) {
2046     TEST_REQUIRES_X86_AVX2;
2047     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
2048       for (size_t channels = 9; channels < 16; channels++) {
2049         AvgPoolMicrokernelTester()
2050           .pooling_elements(pooling_elements)
2051           .pooling_tile(9, 8)
2052           .channels(channels)
2053           .qmax(128)
2054           .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2055       }
2056     }
2057   }
2058 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels)2059   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels) {
2060     TEST_REQUIRES_X86_AVX2;
2061     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2062       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
2063         for (size_t channels = 1; channels <= 40; channels += 7) {
2064           AvgPoolMicrokernelTester()
2065             .output_pixels(output_pixels)
2066             .pooling_elements(pooling_elements)
2067             .pooling_tile(9, 8)
2068             .channels(channels)
2069             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2070         }
2071       }
2072     }
2073   }
2074 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_input_offset)2075   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_input_offset) {
2076     TEST_REQUIRES_X86_AVX2;
2077     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2078       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
2079         for (size_t channels = 1; channels <= 40; channels += 7) {
2080           AvgPoolMicrokernelTester()
2081             .output_pixels(output_pixels)
2082             .pooling_elements(pooling_elements)
2083             .pooling_tile(9, 8)
2084             .channels(channels)
2085             .input_offset(43)
2086             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2087         }
2088       }
2089     }
2090   }
2091 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_zero)2092   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_zero) {
2093     TEST_REQUIRES_X86_AVX2;
2094     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2095       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
2096         for (size_t channels = 1; channels <= 40; channels += 7) {
2097           for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
2098             AvgPoolMicrokernelTester()
2099               .output_pixels(output_pixels)
2100               .pooling_elements(pooling_elements)
2101               .pooling_tile(9, 8)
2102               .channels(channels)
2103               .input_offset(43)
2104               .zero_index(zero_index)
2105               .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2106           }
2107         }
2108       }
2109     }
2110   }
2111 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_qmin)2112   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_qmin) {
2113     TEST_REQUIRES_X86_AVX2;
2114     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2115       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
2116         for (size_t channels = 1; channels <= 40; channels += 7) {
2117           AvgPoolMicrokernelTester()
2118             .output_pixels(output_pixels)
2119             .pooling_elements(pooling_elements)
2120             .pooling_tile(9, 8)
2121             .channels(channels)
2122             .qmin(128)
2123             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2124         }
2125       }
2126     }
2127   }
2128 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_qmax)2129   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_qmax) {
2130     TEST_REQUIRES_X86_AVX2;
2131     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2132       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
2133         for (size_t channels = 1; channels <= 40; channels += 7) {
2134           AvgPoolMicrokernelTester()
2135             .output_pixels(output_pixels)
2136             .pooling_elements(pooling_elements)
2137             .pooling_tile(9, 8)
2138             .channels(channels)
2139             .qmax(128)
2140             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2141         }
2142       }
2143     }
2144   }
2145 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_output_stride)2146   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_output_stride) {
2147     TEST_REQUIRES_X86_AVX2;
2148     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2149       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
2150         for (size_t channels = 1; channels <= 40; channels += 7) {
2151           AvgPoolMicrokernelTester()
2152             .output_pixels(output_pixels)
2153             .pooling_elements(pooling_elements)
2154             .pooling_tile(9, 8)
2155             .channels(channels)
2156             .output_stride(43)
2157             .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2158         }
2159       }
2160     }
2161   }
2162 
TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_step)2163   TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_step) {
2164     TEST_REQUIRES_X86_AVX2;
2165     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2166       for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) {
2167         for (size_t channels = 1; channels <= 40; channels += 7) {
2168           for (size_t step = 2; step <= pooling_elements; step++) {
2169             AvgPoolMicrokernelTester()
2170               .output_pixels(output_pixels)
2171               .pooling_elements(pooling_elements)
2172               .pooling_tile(9, 8)
2173               .step(step)
2174               .channels(channels)
2175               .output_stride(43)
2176               .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params);
2177           }
2178         }
2179       }
2180     }
2181   }
2182 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2183 
2184 
2185 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile)2186   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile) {
2187     TEST_REQUIRES_X86_AVX2;
2188     AvgPoolMicrokernelTester()
2189       .pooling_elements(9)
2190       .pooling_tile(9)
2191       .channels(8)
2192       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2193   }
2194 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_input_offset)2195   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_input_offset) {
2196     TEST_REQUIRES_X86_AVX2;
2197     AvgPoolMicrokernelTester()
2198       .pooling_elements(9)
2199       .pooling_tile(9)
2200       .channels(8)
2201       .input_offset(11)
2202       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2203   }
2204 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_zero)2205   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_zero) {
2206     TEST_REQUIRES_X86_AVX2;
2207     for (size_t zero_index = 0; zero_index < 9; zero_index++) {
2208       AvgPoolMicrokernelTester()
2209         .pooling_elements(9)
2210         .pooling_tile(9)
2211         .channels(8)
2212         .input_offset(11)
2213         .zero_index(zero_index)
2214         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2215     }
2216   }
2217 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_qmin)2218   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_qmin) {
2219     TEST_REQUIRES_X86_AVX2;
2220     AvgPoolMicrokernelTester()
2221       .pooling_elements(9)
2222       .pooling_tile(9)
2223       .channels(8)
2224       .qmin(128)
2225       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2226   }
2227 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_qmax)2228   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_qmax) {
2229     TEST_REQUIRES_X86_AVX2;
2230     AvgPoolMicrokernelTester()
2231       .pooling_elements(9)
2232       .pooling_tile(9)
2233       .channels(8)
2234       .qmax(128)
2235       .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2236   }
2237 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_subtile)2238   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_subtile) {
2239     TEST_REQUIRES_X86_AVX2;
2240     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2241       AvgPoolMicrokernelTester()
2242         .pooling_elements(pooling_elements)
2243         .pooling_tile(9)
2244         .channels(8)
2245         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2246     }
2247   }
2248 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_subtile_with_input_offset)2249   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_subtile_with_input_offset) {
2250     TEST_REQUIRES_X86_AVX2;
2251     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2252       AvgPoolMicrokernelTester()
2253         .pooling_elements(pooling_elements)
2254         .pooling_tile(9)
2255         .channels(8)
2256         .input_offset(11)
2257         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2258     }
2259   }
2260 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_subtile_with_zero)2261   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_subtile_with_zero) {
2262     TEST_REQUIRES_X86_AVX2;
2263     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2264       for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
2265         AvgPoolMicrokernelTester()
2266           .pooling_elements(pooling_elements)
2267           .pooling_tile(9)
2268           .channels(8)
2269           .input_offset(11)
2270           .zero_index(zero_index)
2271           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2272       }
2273     }
2274   }
2275 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile)2276   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile) {
2277     TEST_REQUIRES_X86_AVX2;
2278     for (size_t channels = 16; channels < 64; channels += 8) {
2279       AvgPoolMicrokernelTester()
2280         .pooling_elements(9)
2281         .pooling_tile(9)
2282         .channels(channels)
2283         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2284     }
2285   }
2286 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_input_offset)2287   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_input_offset) {
2288     TEST_REQUIRES_X86_AVX2;
2289     for (size_t channels = 16; channels < 64; channels += 8) {
2290       AvgPoolMicrokernelTester()
2291         .pooling_elements(9)
2292         .pooling_tile(9)
2293         .channels(channels)
2294         .input_offset(67)
2295         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2296     }
2297   }
2298 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_zero)2299   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_zero) {
2300     TEST_REQUIRES_X86_AVX2;
2301     for (size_t channels = 16; channels < 64; channels += 8) {
2302       for (size_t zero_index = 0; zero_index < 9; zero_index++) {
2303         AvgPoolMicrokernelTester()
2304           .pooling_elements(9)
2305           .pooling_tile(9)
2306           .channels(channels)
2307           .input_offset(67)
2308           .zero_index(zero_index)
2309           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2310       }
2311     }
2312   }
2313 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_qmin)2314   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_qmin) {
2315     TEST_REQUIRES_X86_AVX2;
2316     for (size_t channels = 16; channels < 64; channels += 8) {
2317       AvgPoolMicrokernelTester()
2318         .pooling_elements(9)
2319         .pooling_tile(9)
2320         .channels(channels)
2321         .qmin(128)
2322         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2323     }
2324   }
2325 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_qmax)2326   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_qmax) {
2327     TEST_REQUIRES_X86_AVX2;
2328     for (size_t channels = 16; channels < 64; channels += 8) {
2329       AvgPoolMicrokernelTester()
2330         .pooling_elements(9)
2331         .pooling_tile(9)
2332         .channels(channels)
2333         .qmax(128)
2334         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2335     }
2336   }
2337 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_subtile)2338   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_subtile) {
2339     TEST_REQUIRES_X86_AVX2;
2340     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2341       for (size_t channels = 16; channels < 64; channels += 8) {
2342         AvgPoolMicrokernelTester()
2343           .pooling_elements(pooling_elements)
2344           .pooling_tile(9)
2345           .channels(channels)
2346           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2347       }
2348     }
2349   }
2350 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_subtile_with_input_offset)2351   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_subtile_with_input_offset) {
2352     TEST_REQUIRES_X86_AVX2;
2353     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2354       for (size_t channels = 16; channels < 64; channels += 8) {
2355         AvgPoolMicrokernelTester()
2356           .pooling_elements(pooling_elements)
2357           .pooling_tile(9)
2358           .channels(channels)
2359           .input_offset(67)
2360           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2361       }
2362     }
2363   }
2364 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_subtile_with_zero)2365   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_subtile_with_zero) {
2366     TEST_REQUIRES_X86_AVX2;
2367     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2368       for (size_t channels = 16; channels < 64; channels += 8) {
2369         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
2370           AvgPoolMicrokernelTester()
2371             .pooling_elements(pooling_elements)
2372             .pooling_tile(9)
2373             .channels(channels)
2374             .input_offset(67)
2375             .zero_index(zero_index)
2376             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2377         }
2378       }
2379     }
2380   }
2381 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile)2382   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile) {
2383     TEST_REQUIRES_X86_AVX2;
2384     for (size_t channels = 1; channels < 8; channels++) {
2385       AvgPoolMicrokernelTester()
2386         .pooling_elements(9)
2387         .pooling_tile(9)
2388         .channels(channels)
2389         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2390     }
2391   }
2392 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_input_offset)2393   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_input_offset) {
2394     TEST_REQUIRES_X86_AVX2;
2395     for (size_t channels = 1; channels < 8; channels++) {
2396       AvgPoolMicrokernelTester()
2397         .pooling_elements(9)
2398         .pooling_tile(9)
2399         .channels(channels)
2400         .input_offset(11)
2401         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2402     }
2403   }
2404 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_zero)2405   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_zero) {
2406     TEST_REQUIRES_X86_AVX2;
2407     for (size_t channels = 1; channels < 8; channels++) {
2408       for (size_t zero_index = 0; zero_index < 9; zero_index++) {
2409         AvgPoolMicrokernelTester()
2410           .pooling_elements(9)
2411           .pooling_tile(9)
2412           .channels(channels)
2413           .input_offset(11)
2414           .zero_index(zero_index)
2415           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2416       }
2417     }
2418   }
2419 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_qmin)2420   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_qmin) {
2421     TEST_REQUIRES_X86_AVX2;
2422     for (size_t channels = 1; channels < 8; channels++) {
2423       AvgPoolMicrokernelTester()
2424         .pooling_elements(9)
2425         .pooling_tile(9)
2426         .channels(channels)
2427         .qmin(128)
2428         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2429     }
2430   }
2431 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_qmax)2432   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_qmax) {
2433     TEST_REQUIRES_X86_AVX2;
2434     for (size_t channels = 1; channels < 8; channels++) {
2435       AvgPoolMicrokernelTester()
2436         .pooling_elements(9)
2437         .pooling_tile(9)
2438         .channels(channels)
2439         .qmax(128)
2440         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2441     }
2442   }
2443 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_subtile)2444   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_subtile) {
2445     TEST_REQUIRES_X86_AVX2;
2446     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2447       for (size_t channels = 1; channels < 8; channels++) {
2448         AvgPoolMicrokernelTester()
2449           .pooling_elements(pooling_elements)
2450           .pooling_tile(9)
2451           .channels(channels)
2452           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2453       }
2454     }
2455   }
2456 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_subtile_with_input_offset)2457   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_subtile_with_input_offset) {
2458     TEST_REQUIRES_X86_AVX2;
2459     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2460       for (size_t channels = 1; channels < 8; channels++) {
2461         AvgPoolMicrokernelTester()
2462           .pooling_elements(pooling_elements)
2463           .pooling_tile(9)
2464           .channels(channels)
2465           .input_offset(11)
2466           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2467       }
2468     }
2469   }
2470 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_subtile_with_zero)2471   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_subtile_with_zero) {
2472     TEST_REQUIRES_X86_AVX2;
2473     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2474       for (size_t channels = 1; channels < 8; channels++) {
2475         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
2476           AvgPoolMicrokernelTester()
2477             .pooling_elements(pooling_elements)
2478             .pooling_tile(9)
2479             .channels(channels)
2480             .input_offset(11)
2481             .zero_index(zero_index)
2482             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2483         }
2484       }
2485     }
2486   }
2487 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile)2488   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile) {
2489     TEST_REQUIRES_X86_AVX2;
2490     for (size_t channels = 9; channels < 16; channels++) {
2491       AvgPoolMicrokernelTester()
2492         .pooling_elements(9)
2493         .pooling_tile(9)
2494         .channels(channels)
2495         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2496     }
2497   }
2498 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_input_offset)2499   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_input_offset) {
2500     TEST_REQUIRES_X86_AVX2;
2501     for (size_t channels = 9; channels < 16; channels++) {
2502       AvgPoolMicrokernelTester()
2503         .pooling_elements(9)
2504         .pooling_tile(9)
2505         .channels(channels)
2506         .input_offset(17)
2507         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2508     }
2509   }
2510 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_zero)2511   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_zero) {
2512     TEST_REQUIRES_X86_AVX2;
2513     for (size_t channels = 9; channels < 16; channels++) {
2514       for (size_t zero_index = 0; zero_index < 9; zero_index++) {
2515         AvgPoolMicrokernelTester()
2516           .pooling_elements(9)
2517           .pooling_tile(9)
2518           .channels(channels)
2519           .input_offset(17)
2520           .zero_index(zero_index)
2521           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2522       }
2523     }
2524   }
2525 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_qmin)2526   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_qmin) {
2527     TEST_REQUIRES_X86_AVX2;
2528     for (size_t channels = 9; channels < 16; channels++) {
2529       AvgPoolMicrokernelTester()
2530         .pooling_elements(9)
2531         .pooling_tile(9)
2532         .channels(channels)
2533         .qmin(128)
2534         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2535     }
2536   }
2537 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_qmax)2538   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_qmax) {
2539     TEST_REQUIRES_X86_AVX2;
2540     for (size_t channels = 9; channels < 16; channels++) {
2541       AvgPoolMicrokernelTester()
2542         .pooling_elements(9)
2543         .pooling_tile(9)
2544         .channels(channels)
2545         .qmax(128)
2546         .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2547     }
2548   }
2549 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_subtile)2550   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_subtile) {
2551     TEST_REQUIRES_X86_AVX2;
2552     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2553       for (size_t channels = 9; channels < 16; channels++) {
2554         AvgPoolMicrokernelTester()
2555           .pooling_elements(pooling_elements)
2556           .pooling_tile(9)
2557           .channels(channels)
2558           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2559       }
2560     }
2561   }
2562 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_subtile_with_input_offset)2563   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_subtile_with_input_offset) {
2564     TEST_REQUIRES_X86_AVX2;
2565     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2566       for (size_t channels = 9; channels < 16; channels++) {
2567         AvgPoolMicrokernelTester()
2568           .pooling_elements(pooling_elements)
2569           .pooling_tile(9)
2570           .channels(channels)
2571           .input_offset(17)
2572           .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2573       }
2574     }
2575   }
2576 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_subtile_with_zero)2577   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_subtile_with_zero) {
2578     TEST_REQUIRES_X86_AVX2;
2579     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
2580       for (size_t channels = 9; channels < 16; channels++) {
2581         for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
2582           AvgPoolMicrokernelTester()
2583             .pooling_elements(pooling_elements)
2584             .pooling_tile(9)
2585             .channels(channels)
2586             .input_offset(17)
2587             .zero_index(zero_index)
2588             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2589         }
2590       }
2591     }
2592   }
2593 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels)2594   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels) {
2595     TEST_REQUIRES_X86_AVX2;
2596     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2597       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
2598         for (size_t channels = 1; channels <= 40; channels += 7) {
2599           AvgPoolMicrokernelTester()
2600             .output_pixels(output_pixels)
2601             .pooling_elements(pooling_elements)
2602             .pooling_tile(9, 0)
2603             .channels(channels)
2604             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2605         }
2606       }
2607     }
2608   }
2609 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_input_offset)2610   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_input_offset) {
2611     TEST_REQUIRES_X86_AVX2;
2612     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2613       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
2614         for (size_t channels = 1; channels <= 40; channels += 7) {
2615           AvgPoolMicrokernelTester()
2616             .output_pixels(output_pixels)
2617             .pooling_elements(pooling_elements)
2618             .pooling_tile(9, 0)
2619             .channels(channels)
2620             .input_offset(43)
2621             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2622         }
2623       }
2624     }
2625   }
2626 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_zero)2627   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_zero) {
2628     TEST_REQUIRES_X86_AVX2;
2629     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2630       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
2631         for (size_t channels = 1; channels <= 40; channels += 7) {
2632           for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) {
2633             AvgPoolMicrokernelTester()
2634               .output_pixels(output_pixels)
2635               .pooling_elements(pooling_elements)
2636               .pooling_tile(9, 0)
2637               .channels(channels)
2638               .input_offset(43)
2639               .zero_index(zero_index)
2640               .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2641           }
2642         }
2643       }
2644     }
2645   }
2646 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_qmin)2647   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_qmin) {
2648     TEST_REQUIRES_X86_AVX2;
2649     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2650       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
2651         for (size_t channels = 1; channels <= 40; channels += 7) {
2652           AvgPoolMicrokernelTester()
2653             .output_pixels(output_pixels)
2654             .pooling_elements(pooling_elements)
2655             .pooling_tile(9, 0)
2656             .channels(channels)
2657             .qmin(128)
2658             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2659         }
2660       }
2661     }
2662   }
2663 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_qmax)2664   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_qmax) {
2665     TEST_REQUIRES_X86_AVX2;
2666     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2667       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
2668         for (size_t channels = 1; channels <= 40; channels += 7) {
2669           AvgPoolMicrokernelTester()
2670             .output_pixels(output_pixels)
2671             .pooling_elements(pooling_elements)
2672             .pooling_tile(9, 0)
2673             .channels(channels)
2674             .qmax(128)
2675             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2676         }
2677       }
2678     }
2679   }
2680 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_output_stride)2681   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_output_stride) {
2682     TEST_REQUIRES_X86_AVX2;
2683     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2684       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
2685         for (size_t channels = 1; channels <= 40; channels += 7) {
2686           AvgPoolMicrokernelTester()
2687             .output_pixels(output_pixels)
2688             .pooling_elements(pooling_elements)
2689             .pooling_tile(9, 0)
2690             .channels(channels)
2691             .output_stride(43)
2692             .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2693         }
2694       }
2695     }
2696   }
2697 
TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_step)2698   TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_step) {
2699     TEST_REQUIRES_X86_AVX2;
2700     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
2701       for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) {
2702         for (size_t channels = 1; channels <= 40; channels += 7) {
2703           for (size_t step = 2; step <= pooling_elements; step++) {
2704             AvgPoolMicrokernelTester()
2705               .output_pixels(output_pixels)
2706               .pooling_elements(pooling_elements)
2707               .pooling_tile(9, 0)
2708               .step(step)
2709               .channels(channels)
2710               .output_stride(43)
2711               .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params);
2712           }
2713         }
2714       }
2715     }
2716   }
2717 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2718