xref: /aosp_15_r20/external/XNNPACK/test/f16-maxpool-minmax.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 //   Specification: test/f16-maxpool-minmax.yaml
11 //   Generator: tools/generate-maxpool-test.py
12 
13 
14 #include <gtest/gtest.h>
15 
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18 
19 #include <xnnpack/maxpool.h>
20 #include "maxpool-microkernel-tester.h"
21 
22 
23 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile)24   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile) {
25     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
26     MaxPoolMicrokernelTester()
27       .pooling_elements(9)
28       .pooling_tile(9, 8)
29       .channels(8)
30       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
31   }
32 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_input_offset)33   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_input_offset) {
34     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
35     MaxPoolMicrokernelTester()
36       .pooling_elements(9)
37       .pooling_tile(9, 8)
38       .channels(8)
39       .input_offset(11)
40       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
41   }
42 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_qmin)43   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_qmin) {
44     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
45     MaxPoolMicrokernelTester()
46       .pooling_elements(9)
47       .pooling_tile(9, 8)
48       .channels(8)
49       .qmin(-16384)
50       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
51   }
52 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_qmax)53   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_qmax) {
54     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
55     MaxPoolMicrokernelTester()
56       .pooling_elements(9)
57       .pooling_tile(9, 8)
58       .channels(8)
59       .qmax(16384)
60       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
61   }
62 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile)63   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile) {
64     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
65     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
66       MaxPoolMicrokernelTester()
67         .pooling_elements(pooling_elements)
68         .pooling_tile(9, 8)
69         .channels(8)
70         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
71     }
72   }
73 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile_with_input_offset)74   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile_with_input_offset) {
75     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
76     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
77       MaxPoolMicrokernelTester()
78         .pooling_elements(pooling_elements)
79         .pooling_tile(9, 8)
80         .channels(8)
81         .input_offset(11)
82         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
83     }
84   }
85 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile)86   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile) {
87     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
88     for (size_t channels = 16; channels < 64; channels += 8) {
89       MaxPoolMicrokernelTester()
90         .pooling_elements(9)
91         .pooling_tile(9, 8)
92         .channels(channels)
93         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
94     }
95   }
96 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_input_offset)97   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_input_offset) {
98     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
99     for (size_t channels = 16; channels < 64; channels += 8) {
100       MaxPoolMicrokernelTester()
101         .pooling_elements(9)
102         .pooling_tile(9, 8)
103         .channels(channels)
104         .input_offset(67)
105         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
106     }
107   }
108 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_qmin)109   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_qmin) {
110     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
111     for (size_t channels = 16; channels < 64; channels += 8) {
112       MaxPoolMicrokernelTester()
113         .pooling_elements(9)
114         .pooling_tile(9, 8)
115         .channels(channels)
116         .qmin(-16384)
117         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
118     }
119   }
120 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_qmax)121   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_qmax) {
122     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
123     for (size_t channels = 16; channels < 64; channels += 8) {
124       MaxPoolMicrokernelTester()
125         .pooling_elements(9)
126         .pooling_tile(9, 8)
127         .channels(channels)
128         .qmax(16384)
129         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
130     }
131   }
132 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile)133   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile) {
134     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
135     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
136       for (size_t channels = 16; channels < 64; channels += 8) {
137         MaxPoolMicrokernelTester()
138           .pooling_elements(pooling_elements)
139           .pooling_tile(9, 8)
140           .channels(channels)
141           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
142       }
143     }
144   }
145 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile_with_input_offset)146   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile_with_input_offset) {
147     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
148     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
149       for (size_t channels = 16; channels < 64; channels += 8) {
150         MaxPoolMicrokernelTester()
151           .pooling_elements(pooling_elements)
152           .pooling_tile(9, 8)
153           .channels(channels)
154           .input_offset(67)
155           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
156       }
157     }
158   }
159 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile)160   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile) {
161     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
162     for (size_t channels = 1; channels < 8; channels++) {
163       MaxPoolMicrokernelTester()
164         .pooling_elements(9)
165         .pooling_tile(9, 8)
166         .channels(channels)
167         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
168     }
169   }
170 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_input_offset)171   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_input_offset) {
172     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
173     for (size_t channels = 1; channels < 8; channels++) {
174       MaxPoolMicrokernelTester()
175         .pooling_elements(9)
176         .pooling_tile(9, 8)
177         .channels(channels)
178         .input_offset(11)
179         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
180     }
181   }
182 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_qmin)183   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_qmin) {
184     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
185     for (size_t channels = 1; channels < 8; channels++) {
186       MaxPoolMicrokernelTester()
187         .pooling_elements(9)
188         .pooling_tile(9, 8)
189         .channels(channels)
190         .qmin(-16384)
191         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
192     }
193   }
194 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_qmax)195   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_qmax) {
196     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
197     for (size_t channels = 1; channels < 8; channels++) {
198       MaxPoolMicrokernelTester()
199         .pooling_elements(9)
200         .pooling_tile(9, 8)
201         .channels(channels)
202         .qmax(16384)
203         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
204     }
205   }
206 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile)207   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile) {
208     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
209     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
210       for (size_t channels = 1; channels < 8; channels++) {
211         MaxPoolMicrokernelTester()
212           .pooling_elements(pooling_elements)
213           .pooling_tile(9, 8)
214           .channels(channels)
215           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
216       }
217     }
218   }
219 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile_with_input_offset)220   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile_with_input_offset) {
221     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
222     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
223       for (size_t channels = 1; channels < 8; channels++) {
224         MaxPoolMicrokernelTester()
225           .pooling_elements(pooling_elements)
226           .pooling_tile(9, 8)
227           .channels(channels)
228           .input_offset(11)
229           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
230       }
231     }
232   }
233 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile)234   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile) {
235     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
236     for (size_t channels = 9; channels < 16; channels++) {
237       MaxPoolMicrokernelTester()
238         .pooling_elements(9)
239         .pooling_tile(9, 8)
240         .channels(channels)
241         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
242     }
243   }
244 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_input_offset)245   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_input_offset) {
246     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
247     for (size_t channels = 9; channels < 16; channels++) {
248       MaxPoolMicrokernelTester()
249         .pooling_elements(9)
250         .pooling_tile(9, 8)
251         .channels(channels)
252         .input_offset(17)
253         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
254     }
255   }
256 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_qmin)257   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_qmin) {
258     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
259     for (size_t channels = 9; channels < 16; channels++) {
260       MaxPoolMicrokernelTester()
261         .pooling_elements(9)
262         .pooling_tile(9, 8)
263         .channels(channels)
264         .qmin(-16384)
265         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
266     }
267   }
268 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_qmax)269   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_qmax) {
270     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
271     for (size_t channels = 9; channels < 16; channels++) {
272       MaxPoolMicrokernelTester()
273         .pooling_elements(9)
274         .pooling_tile(9, 8)
275         .channels(channels)
276         .qmax(16384)
277         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
278     }
279   }
280 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile)281   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile) {
282     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
283     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
284       for (size_t channels = 9; channels < 16; channels++) {
285         MaxPoolMicrokernelTester()
286           .pooling_elements(pooling_elements)
287           .pooling_tile(9, 8)
288           .channels(channels)
289           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
290       }
291     }
292   }
293 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile_with_input_offset)294   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile_with_input_offset) {
295     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
296     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
297       for (size_t channels = 9; channels < 16; channels++) {
298         MaxPoolMicrokernelTester()
299           .pooling_elements(pooling_elements)
300           .pooling_tile(9, 8)
301           .channels(channels)
302           .input_offset(17)
303           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
304       }
305     }
306   }
307 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile)308   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile) {
309     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
310     MaxPoolMicrokernelTester()
311       .pooling_elements(17)
312       .pooling_tile(9, 8)
313       .channels(8)
314       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
315   }
316 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_input_offset)317   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_input_offset) {
318     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
319     MaxPoolMicrokernelTester()
320       .pooling_elements(17)
321       .pooling_tile(9, 8)
322       .channels(8)
323       .input_offset(11)
324       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
325   }
326 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_qmin)327   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_qmin) {
328     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
329     MaxPoolMicrokernelTester()
330       .pooling_elements(17)
331       .pooling_tile(9, 8)
332       .channels(8)
333       .qmin(-16384)
334       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
335   }
336 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_qmax)337   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_qmax) {
338     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
339     MaxPoolMicrokernelTester()
340       .pooling_elements(17)
341       .pooling_tile(9, 8)
342       .channels(8)
343       .qmax(16384)
344       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
345   }
346 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile)347   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile) {
348     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
349     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
350       MaxPoolMicrokernelTester()
351         .pooling_elements(pooling_elements)
352         .pooling_tile(9, 8)
353         .channels(8)
354         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
355     }
356   }
357 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile_with_input_offset)358   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile_with_input_offset) {
359     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
360     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
361       MaxPoolMicrokernelTester()
362         .pooling_elements(pooling_elements)
363         .pooling_tile(9, 8)
364         .channels(8)
365         .input_offset(11)
366         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
367     }
368   }
369 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile)370   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile) {
371     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
372     for (size_t channels = 16; channels < 64; channels += 8) {
373       MaxPoolMicrokernelTester()
374         .pooling_elements(17)
375         .pooling_tile(9, 8)
376         .channels(channels)
377         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
378     }
379   }
380 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_input_offset)381   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_input_offset) {
382     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
383     for (size_t channels = 16; channels < 64; channels += 8) {
384       MaxPoolMicrokernelTester()
385         .pooling_elements(17)
386         .pooling_tile(9, 8)
387         .channels(channels)
388         .input_offset(41)
389         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
390     }
391   }
392 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_qmin)393   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_qmin) {
394     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
395     for (size_t channels = 16; channels < 64; channels += 8) {
396       MaxPoolMicrokernelTester()
397         .pooling_elements(17)
398         .pooling_tile(9, 8)
399         .channels(channels)
400         .qmin(-16384)
401         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
402     }
403   }
404 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_qmax)405   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_qmax) {
406     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
407     for (size_t channels = 16; channels < 64; channels += 8) {
408       MaxPoolMicrokernelTester()
409         .pooling_elements(17)
410         .pooling_tile(9, 8)
411         .channels(channels)
412         .qmax(16384)
413         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
414     }
415   }
416 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile)417   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile) {
418     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
419     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
420       for (size_t channels = 16; channels < 64; channels += 8) {
421         MaxPoolMicrokernelTester()
422           .pooling_elements(pooling_elements)
423           .pooling_tile(9, 8)
424           .channels(channels)
425           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
426       }
427     }
428   }
429 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile_with_input_offset)430   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile_with_input_offset) {
431     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
432     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
433       for (size_t channels = 16; channels < 64; channels += 8) {
434         MaxPoolMicrokernelTester()
435           .pooling_elements(pooling_elements)
436           .pooling_tile(9, 8)
437           .channels(channels)
438           .input_offset(67)
439           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
440       }
441     }
442   }
443 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile)444   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile) {
445     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
446     for (size_t channels = 1; channels < 8; channels++) {
447       MaxPoolMicrokernelTester()
448         .pooling_elements(17)
449         .pooling_tile(9, 8)
450         .channels(channels)
451         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
452     }
453   }
454 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_input_offset)455   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_input_offset) {
456     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
457     for (size_t channels = 1; channels < 8; channels++) {
458       MaxPoolMicrokernelTester()
459         .pooling_elements(17)
460         .pooling_tile(9, 8)
461         .channels(channels)
462         .input_offset(11)
463         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
464     }
465   }
466 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_qmin)467   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_qmin) {
468     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
469     for (size_t channels = 1; channels < 8; channels++) {
470       MaxPoolMicrokernelTester()
471         .pooling_elements(17)
472         .pooling_tile(9, 8)
473         .channels(channels)
474         .qmin(-16384)
475         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
476     }
477   }
478 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_qmax)479   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_qmax) {
480     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
481     for (size_t channels = 1; channels < 8; channels++) {
482       MaxPoolMicrokernelTester()
483         .pooling_elements(17)
484         .pooling_tile(9, 8)
485         .channels(channels)
486         .qmax(16384)
487         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
488     }
489   }
490 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile)491   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile) {
492     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
493     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
494       for (size_t channels = 1; channels < 8; channels++) {
495         MaxPoolMicrokernelTester()
496           .pooling_elements(pooling_elements)
497           .pooling_tile(9, 8)
498           .channels(channels)
499           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
500       }
501     }
502   }
503 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile_with_input_offset)504   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile_with_input_offset) {
505     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
506     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
507       for (size_t channels = 1; channels < 8; channels++) {
508         MaxPoolMicrokernelTester()
509           .pooling_elements(pooling_elements)
510           .pooling_tile(9, 8)
511           .channels(channels)
512           .input_offset(11)
513           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
514       }
515     }
516   }
517 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile)518   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile) {
519     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
520     for (size_t channels = 9; channels < 16; channels++) {
521       MaxPoolMicrokernelTester()
522         .pooling_elements(17)
523         .pooling_tile(9, 8)
524         .channels(channels)
525         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
526     }
527   }
528 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_input_offset)529   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_input_offset) {
530     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
531     for (size_t channels = 9; channels < 16; channels++) {
532       MaxPoolMicrokernelTester()
533         .pooling_elements(17)
534         .pooling_tile(9, 8)
535         .channels(channels)
536         .input_offset(17)
537         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
538     }
539   }
540 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_qmin)541   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_qmin) {
542     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
543     for (size_t channels = 9; channels < 16; channels++) {
544       MaxPoolMicrokernelTester()
545         .pooling_elements(17)
546         .pooling_tile(9, 8)
547         .channels(channels)
548         .qmin(-16384)
549         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
550     }
551   }
552 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_qmax)553   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_qmax) {
554     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
555     for (size_t channels = 9; channels < 16; channels++) {
556       MaxPoolMicrokernelTester()
557         .pooling_elements(17)
558         .pooling_tile(9, 8)
559         .channels(channels)
560         .qmax(16384)
561         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
562     }
563   }
564 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile)565   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile) {
566     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
567     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
568       for (size_t channels = 9; channels < 16; channels++) {
569         MaxPoolMicrokernelTester()
570           .pooling_elements(pooling_elements)
571           .pooling_tile(9, 8)
572           .channels(channels)
573           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
574       }
575     }
576   }
577 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile_with_input_offset)578   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile_with_input_offset) {
579     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
580     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
581       for (size_t channels = 9; channels < 16; channels++) {
582         MaxPoolMicrokernelTester()
583           .pooling_elements(pooling_elements)
584           .pooling_tile(9, 8)
585           .channels(channels)
586           .input_offset(17)
587           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
588       }
589     }
590   }
591 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass)592   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass) {
593     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
594     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
595       MaxPoolMicrokernelTester()
596         .pooling_elements(pooling_elements)
597         .pooling_tile(9, 8)
598         .channels(8)
599         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
600     }
601   }
602 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_input_offset)603   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_input_offset) {
604     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
605     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
606       MaxPoolMicrokernelTester()
607         .pooling_elements(pooling_elements)
608         .pooling_tile(9, 8)
609         .channels(8)
610         .input_offset(11)
611         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
612     }
613   }
614 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_qmin)615   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_qmin) {
616     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
617     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
618       MaxPoolMicrokernelTester()
619         .pooling_elements(pooling_elements)
620         .pooling_tile(9, 8)
621         .channels(8)
622         .qmin(-16384)
623         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
624     }
625   }
626 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_qmax)627   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_qmax) {
628     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
629     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
630       MaxPoolMicrokernelTester()
631         .pooling_elements(pooling_elements)
632         .pooling_tile(9, 8)
633         .channels(8)
634         .qmax(16384)
635         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
636     }
637   }
638 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass)639   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass) {
640     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
641     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
642       for (size_t channels = 16; channels < 64; channels += 8) {
643         MaxPoolMicrokernelTester()
644           .pooling_elements(pooling_elements)
645           .pooling_tile(9, 8)
646           .channels(channels)
647           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
648       }
649     }
650   }
651 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_input_offset)652   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_input_offset) {
653     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
654     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
655       for (size_t channels = 16; channels < 64; channels += 8) {
656         MaxPoolMicrokernelTester()
657           .pooling_elements(pooling_elements)
658           .pooling_tile(9, 8)
659           .channels(channels)
660           .input_offset(67)
661           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
662       }
663     }
664   }
665 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_qmin)666   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_qmin) {
667     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
668     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
669       for (size_t channels = 16; channels < 64; channels += 8) {
670         MaxPoolMicrokernelTester()
671           .pooling_elements(pooling_elements)
672           .pooling_tile(9, 8)
673           .channels(channels)
674           .qmin(-16384)
675           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
676       }
677     }
678   }
679 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_qmax)680   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_qmax) {
681     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
682     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
683       for (size_t channels = 16; channels < 64; channels += 8) {
684         MaxPoolMicrokernelTester()
685           .pooling_elements(pooling_elements)
686           .pooling_tile(9, 8)
687           .channels(channels)
688           .qmax(16384)
689           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
690       }
691     }
692   }
693 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass)694   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass) {
695     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
696     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
697       for (size_t channels = 1; channels < 8; channels++) {
698         MaxPoolMicrokernelTester()
699           .pooling_elements(pooling_elements)
700           .pooling_tile(9, 8)
701           .channels(channels)
702           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
703       }
704     }
705   }
706 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_input_offset)707   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_input_offset) {
708     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
709     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
710       for (size_t channels = 1; channels < 8; channels++) {
711         MaxPoolMicrokernelTester()
712           .pooling_elements(pooling_elements)
713           .pooling_tile(9, 8)
714           .channels(channels)
715           .input_offset(8)
716           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
717       }
718     }
719   }
720 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_qmin)721   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_qmin) {
722     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
723     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
724       for (size_t channels = 1; channels < 8; channels++) {
725         MaxPoolMicrokernelTester()
726           .pooling_elements(pooling_elements)
727           .pooling_tile(9, 8)
728           .channels(channels)
729           .qmin(-16384)
730           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
731       }
732     }
733   }
734 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_qmax)735   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_qmax) {
736     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
737     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
738       for (size_t channels = 1; channels < 8; channels++) {
739         MaxPoolMicrokernelTester()
740           .pooling_elements(pooling_elements)
741           .pooling_tile(9, 8)
742           .channels(channels)
743           .qmax(16384)
744           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
745       }
746     }
747   }
748 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass)749   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass) {
750     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
751     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
752       for (size_t channels = 9; channels < 16; channels++) {
753         MaxPoolMicrokernelTester()
754           .pooling_elements(pooling_elements)
755           .pooling_tile(9, 8)
756           .channels(channels)
757           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
758       }
759     }
760   }
761 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_input_offset)762   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_input_offset) {
763     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
764     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
765       for (size_t channels = 9; channels < 16; channels++) {
766         MaxPoolMicrokernelTester()
767           .pooling_elements(pooling_elements)
768           .pooling_tile(9, 8)
769           .channels(channels)
770           .input_offset(17)
771           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
772       }
773     }
774   }
775 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_qmin)776   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_qmin) {
777     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
778     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
779       for (size_t channels = 9; channels < 16; channels++) {
780         MaxPoolMicrokernelTester()
781           .pooling_elements(pooling_elements)
782           .pooling_tile(9, 8)
783           .channels(channels)
784           .qmin(-16384)
785           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
786       }
787     }
788   }
789 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_qmax)790   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_qmax) {
791     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
792     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
793       for (size_t channels = 9; channels < 16; channels++) {
794         MaxPoolMicrokernelTester()
795           .pooling_elements(pooling_elements)
796           .pooling_tile(9, 8)
797           .channels(channels)
798           .qmax(16384)
799           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
800       }
801     }
802   }
803 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels)804   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels) {
805     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
806     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
807       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
808         for (size_t channels = 1; channels <= 40; channels += 7) {
809           MaxPoolMicrokernelTester()
810             .output_pixels(output_pixels)
811             .pooling_elements(pooling_elements)
812             .pooling_tile(9, 8)
813             .channels(channels)
814             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
815         }
816       }
817     }
818   }
819 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_input_offset)820   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_input_offset) {
821     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
822     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
823       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
824         for (size_t channels = 1; channels <= 40; channels += 7) {
825           MaxPoolMicrokernelTester()
826             .output_pixels(output_pixels)
827             .pooling_elements(pooling_elements)
828             .pooling_tile(9, 8)
829             .channels(channels)
830             .input_offset(43)
831             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
832         }
833       }
834     }
835   }
836 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_qmin)837   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_qmin) {
838     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
839     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
840       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
841         for (size_t channels = 1; channels <= 40; channels += 7) {
842           MaxPoolMicrokernelTester()
843             .output_pixels(output_pixels)
844             .pooling_elements(pooling_elements)
845             .pooling_tile(9, 8)
846             .channels(channels)
847             .qmin(-16384)
848             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
849         }
850       }
851     }
852   }
853 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_qmax)854   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_qmax) {
855     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
856     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
857       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
858         for (size_t channels = 1; channels <= 40; channels += 7) {
859           MaxPoolMicrokernelTester()
860             .output_pixels(output_pixels)
861             .pooling_elements(pooling_elements)
862             .pooling_tile(9, 8)
863             .channels(channels)
864             .qmax(16384)
865             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
866         }
867       }
868     }
869   }
870 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_output_stride)871   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_output_stride) {
872     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
873     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
874       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
875         for (size_t channels = 1; channels <= 40; channels += 7) {
876           MaxPoolMicrokernelTester()
877             .output_pixels(output_pixels)
878             .pooling_elements(pooling_elements)
879             .pooling_tile(9, 8)
880             .channels(channels)
881             .output_stride(43)
882             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
883         }
884       }
885     }
886   }
887 
TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_step)888   TEST(F16_MAXPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_step) {
889     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
890     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
891       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
892         for (size_t channels = 1; channels <= 40; channels += 7) {
893           for (size_t step = 2; step <= pooling_elements; step++) {
894             MaxPoolMicrokernelTester()
895               .output_pixels(output_pixels)
896               .pooling_elements(pooling_elements)
897               .pooling_tile(9, 8)
898               .step(step)
899               .channels(channels)
900               .output_stride(43)
901               .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params);
902           }
903         }
904       }
905     }
906   }
907 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
908 
909 
910 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_unipass_fulltile)911   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_unipass_fulltile) {
912     TEST_REQUIRES_X86_F16C;
913     MaxPoolMicrokernelTester()
914       .pooling_elements(9)
915       .pooling_tile(9, 8)
916       .channels(8)
917       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
918   }
919 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_unipass_fulltile_with_input_offset)920   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_unipass_fulltile_with_input_offset) {
921     TEST_REQUIRES_X86_F16C;
922     MaxPoolMicrokernelTester()
923       .pooling_elements(9)
924       .pooling_tile(9, 8)
925       .channels(8)
926       .input_offset(11)
927       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
928   }
929 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_unipass_fulltile_with_qmin)930   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_unipass_fulltile_with_qmin) {
931     TEST_REQUIRES_X86_F16C;
932     MaxPoolMicrokernelTester()
933       .pooling_elements(9)
934       .pooling_tile(9, 8)
935       .channels(8)
936       .qmin(-16384)
937       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
938   }
939 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_unipass_fulltile_with_qmax)940   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_unipass_fulltile_with_qmax) {
941     TEST_REQUIRES_X86_F16C;
942     MaxPoolMicrokernelTester()
943       .pooling_elements(9)
944       .pooling_tile(9, 8)
945       .channels(8)
946       .qmax(16384)
947       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
948   }
949 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_unipass_subtile)950   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_unipass_subtile) {
951     TEST_REQUIRES_X86_F16C;
952     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
953       MaxPoolMicrokernelTester()
954         .pooling_elements(pooling_elements)
955         .pooling_tile(9, 8)
956         .channels(8)
957         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
958     }
959   }
960 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_unipass_subtile_with_input_offset)961   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_unipass_subtile_with_input_offset) {
962     TEST_REQUIRES_X86_F16C;
963     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
964       MaxPoolMicrokernelTester()
965         .pooling_elements(pooling_elements)
966         .pooling_tile(9, 8)
967         .channels(8)
968         .input_offset(11)
969         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
970     }
971   }
972 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_unipass_fulltile)973   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_unipass_fulltile) {
974     TEST_REQUIRES_X86_F16C;
975     for (size_t channels = 16; channels < 64; channels += 8) {
976       MaxPoolMicrokernelTester()
977         .pooling_elements(9)
978         .pooling_tile(9, 8)
979         .channels(channels)
980         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
981     }
982   }
983 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_unipass_fulltile_with_input_offset)984   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_unipass_fulltile_with_input_offset) {
985     TEST_REQUIRES_X86_F16C;
986     for (size_t channels = 16; channels < 64; channels += 8) {
987       MaxPoolMicrokernelTester()
988         .pooling_elements(9)
989         .pooling_tile(9, 8)
990         .channels(channels)
991         .input_offset(67)
992         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
993     }
994   }
995 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_unipass_fulltile_with_qmin)996   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_unipass_fulltile_with_qmin) {
997     TEST_REQUIRES_X86_F16C;
998     for (size_t channels = 16; channels < 64; channels += 8) {
999       MaxPoolMicrokernelTester()
1000         .pooling_elements(9)
1001         .pooling_tile(9, 8)
1002         .channels(channels)
1003         .qmin(-16384)
1004         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1005     }
1006   }
1007 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_unipass_fulltile_with_qmax)1008   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_unipass_fulltile_with_qmax) {
1009     TEST_REQUIRES_X86_F16C;
1010     for (size_t channels = 16; channels < 64; channels += 8) {
1011       MaxPoolMicrokernelTester()
1012         .pooling_elements(9)
1013         .pooling_tile(9, 8)
1014         .channels(channels)
1015         .qmax(16384)
1016         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1017     }
1018   }
1019 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_unipass_subtile)1020   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_unipass_subtile) {
1021     TEST_REQUIRES_X86_F16C;
1022     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1023       for (size_t channels = 16; channels < 64; channels += 8) {
1024         MaxPoolMicrokernelTester()
1025           .pooling_elements(pooling_elements)
1026           .pooling_tile(9, 8)
1027           .channels(channels)
1028           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1029       }
1030     }
1031   }
1032 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_unipass_subtile_with_input_offset)1033   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_unipass_subtile_with_input_offset) {
1034     TEST_REQUIRES_X86_F16C;
1035     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1036       for (size_t channels = 16; channels < 64; channels += 8) {
1037         MaxPoolMicrokernelTester()
1038           .pooling_elements(pooling_elements)
1039           .pooling_tile(9, 8)
1040           .channels(channels)
1041           .input_offset(67)
1042           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1043       }
1044     }
1045   }
1046 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_unipass_fulltile)1047   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_unipass_fulltile) {
1048     TEST_REQUIRES_X86_F16C;
1049     for (size_t channels = 1; channels < 8; channels++) {
1050       MaxPoolMicrokernelTester()
1051         .pooling_elements(9)
1052         .pooling_tile(9, 8)
1053         .channels(channels)
1054         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1055     }
1056   }
1057 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_unipass_fulltile_with_input_offset)1058   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_unipass_fulltile_with_input_offset) {
1059     TEST_REQUIRES_X86_F16C;
1060     for (size_t channels = 1; channels < 8; channels++) {
1061       MaxPoolMicrokernelTester()
1062         .pooling_elements(9)
1063         .pooling_tile(9, 8)
1064         .channels(channels)
1065         .input_offset(11)
1066         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1067     }
1068   }
1069 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_unipass_fulltile_with_qmin)1070   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_unipass_fulltile_with_qmin) {
1071     TEST_REQUIRES_X86_F16C;
1072     for (size_t channels = 1; channels < 8; channels++) {
1073       MaxPoolMicrokernelTester()
1074         .pooling_elements(9)
1075         .pooling_tile(9, 8)
1076         .channels(channels)
1077         .qmin(-16384)
1078         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1079     }
1080   }
1081 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_unipass_fulltile_with_qmax)1082   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_unipass_fulltile_with_qmax) {
1083     TEST_REQUIRES_X86_F16C;
1084     for (size_t channels = 1; channels < 8; channels++) {
1085       MaxPoolMicrokernelTester()
1086         .pooling_elements(9)
1087         .pooling_tile(9, 8)
1088         .channels(channels)
1089         .qmax(16384)
1090         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1091     }
1092   }
1093 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_unipass_subtile)1094   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_unipass_subtile) {
1095     TEST_REQUIRES_X86_F16C;
1096     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1097       for (size_t channels = 1; channels < 8; channels++) {
1098         MaxPoolMicrokernelTester()
1099           .pooling_elements(pooling_elements)
1100           .pooling_tile(9, 8)
1101           .channels(channels)
1102           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1103       }
1104     }
1105   }
1106 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_unipass_subtile_with_input_offset)1107   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_unipass_subtile_with_input_offset) {
1108     TEST_REQUIRES_X86_F16C;
1109     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1110       for (size_t channels = 1; channels < 8; channels++) {
1111         MaxPoolMicrokernelTester()
1112           .pooling_elements(pooling_elements)
1113           .pooling_tile(9, 8)
1114           .channels(channels)
1115           .input_offset(11)
1116           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1117       }
1118     }
1119   }
1120 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_unipass_fulltile)1121   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_unipass_fulltile) {
1122     TEST_REQUIRES_X86_F16C;
1123     for (size_t channels = 9; channels < 16; channels++) {
1124       MaxPoolMicrokernelTester()
1125         .pooling_elements(9)
1126         .pooling_tile(9, 8)
1127         .channels(channels)
1128         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1129     }
1130   }
1131 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_unipass_fulltile_with_input_offset)1132   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_unipass_fulltile_with_input_offset) {
1133     TEST_REQUIRES_X86_F16C;
1134     for (size_t channels = 9; channels < 16; channels++) {
1135       MaxPoolMicrokernelTester()
1136         .pooling_elements(9)
1137         .pooling_tile(9, 8)
1138         .channels(channels)
1139         .input_offset(17)
1140         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1141     }
1142   }
1143 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_unipass_fulltile_with_qmin)1144   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_unipass_fulltile_with_qmin) {
1145     TEST_REQUIRES_X86_F16C;
1146     for (size_t channels = 9; channels < 16; channels++) {
1147       MaxPoolMicrokernelTester()
1148         .pooling_elements(9)
1149         .pooling_tile(9, 8)
1150         .channels(channels)
1151         .qmin(-16384)
1152         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1153     }
1154   }
1155 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_unipass_fulltile_with_qmax)1156   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_unipass_fulltile_with_qmax) {
1157     TEST_REQUIRES_X86_F16C;
1158     for (size_t channels = 9; channels < 16; channels++) {
1159       MaxPoolMicrokernelTester()
1160         .pooling_elements(9)
1161         .pooling_tile(9, 8)
1162         .channels(channels)
1163         .qmax(16384)
1164         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1165     }
1166   }
1167 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_unipass_subtile)1168   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_unipass_subtile) {
1169     TEST_REQUIRES_X86_F16C;
1170     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1171       for (size_t channels = 9; channels < 16; channels++) {
1172         MaxPoolMicrokernelTester()
1173           .pooling_elements(pooling_elements)
1174           .pooling_tile(9, 8)
1175           .channels(channels)
1176           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1177       }
1178     }
1179   }
1180 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_unipass_subtile_with_input_offset)1181   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_unipass_subtile_with_input_offset) {
1182     TEST_REQUIRES_X86_F16C;
1183     for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) {
1184       for (size_t channels = 9; channels < 16; channels++) {
1185         MaxPoolMicrokernelTester()
1186           .pooling_elements(pooling_elements)
1187           .pooling_tile(9, 8)
1188           .channels(channels)
1189           .input_offset(17)
1190           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1191       }
1192     }
1193   }
1194 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_twopass_fulltile)1195   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_twopass_fulltile) {
1196     TEST_REQUIRES_X86_F16C;
1197     MaxPoolMicrokernelTester()
1198       .pooling_elements(17)
1199       .pooling_tile(9, 8)
1200       .channels(8)
1201       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1202   }
1203 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_twopass_fulltile_with_input_offset)1204   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_twopass_fulltile_with_input_offset) {
1205     TEST_REQUIRES_X86_F16C;
1206     MaxPoolMicrokernelTester()
1207       .pooling_elements(17)
1208       .pooling_tile(9, 8)
1209       .channels(8)
1210       .input_offset(11)
1211       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1212   }
1213 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_twopass_fulltile_with_qmin)1214   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_twopass_fulltile_with_qmin) {
1215     TEST_REQUIRES_X86_F16C;
1216     MaxPoolMicrokernelTester()
1217       .pooling_elements(17)
1218       .pooling_tile(9, 8)
1219       .channels(8)
1220       .qmin(-16384)
1221       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1222   }
1223 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_twopass_fulltile_with_qmax)1224   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_twopass_fulltile_with_qmax) {
1225     TEST_REQUIRES_X86_F16C;
1226     MaxPoolMicrokernelTester()
1227       .pooling_elements(17)
1228       .pooling_tile(9, 8)
1229       .channels(8)
1230       .qmax(16384)
1231       .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1232   }
1233 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_twopass_subtile)1234   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_twopass_subtile) {
1235     TEST_REQUIRES_X86_F16C;
1236     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1237       MaxPoolMicrokernelTester()
1238         .pooling_elements(pooling_elements)
1239         .pooling_tile(9, 8)
1240         .channels(8)
1241         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1242     }
1243   }
1244 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_twopass_subtile_with_input_offset)1245   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_twopass_subtile_with_input_offset) {
1246     TEST_REQUIRES_X86_F16C;
1247     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1248       MaxPoolMicrokernelTester()
1249         .pooling_elements(pooling_elements)
1250         .pooling_tile(9, 8)
1251         .channels(8)
1252         .input_offset(11)
1253         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1254     }
1255   }
1256 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_twopass_fulltile)1257   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_twopass_fulltile) {
1258     TEST_REQUIRES_X86_F16C;
1259     for (size_t channels = 16; channels < 64; channels += 8) {
1260       MaxPoolMicrokernelTester()
1261         .pooling_elements(17)
1262         .pooling_tile(9, 8)
1263         .channels(channels)
1264         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1265     }
1266   }
1267 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_twopass_fulltile_with_input_offset)1268   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_twopass_fulltile_with_input_offset) {
1269     TEST_REQUIRES_X86_F16C;
1270     for (size_t channels = 16; channels < 64; channels += 8) {
1271       MaxPoolMicrokernelTester()
1272         .pooling_elements(17)
1273         .pooling_tile(9, 8)
1274         .channels(channels)
1275         .input_offset(41)
1276         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1277     }
1278   }
1279 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_twopass_fulltile_with_qmin)1280   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_twopass_fulltile_with_qmin) {
1281     TEST_REQUIRES_X86_F16C;
1282     for (size_t channels = 16; channels < 64; channels += 8) {
1283       MaxPoolMicrokernelTester()
1284         .pooling_elements(17)
1285         .pooling_tile(9, 8)
1286         .channels(channels)
1287         .qmin(-16384)
1288         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1289     }
1290   }
1291 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_twopass_fulltile_with_qmax)1292   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_twopass_fulltile_with_qmax) {
1293     TEST_REQUIRES_X86_F16C;
1294     for (size_t channels = 16; channels < 64; channels += 8) {
1295       MaxPoolMicrokernelTester()
1296         .pooling_elements(17)
1297         .pooling_tile(9, 8)
1298         .channels(channels)
1299         .qmax(16384)
1300         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1301     }
1302   }
1303 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_twopass_subtile)1304   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_twopass_subtile) {
1305     TEST_REQUIRES_X86_F16C;
1306     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1307       for (size_t channels = 16; channels < 64; channels += 8) {
1308         MaxPoolMicrokernelTester()
1309           .pooling_elements(pooling_elements)
1310           .pooling_tile(9, 8)
1311           .channels(channels)
1312           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1313       }
1314     }
1315   }
1316 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_twopass_subtile_with_input_offset)1317   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_twopass_subtile_with_input_offset) {
1318     TEST_REQUIRES_X86_F16C;
1319     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1320       for (size_t channels = 16; channels < 64; channels += 8) {
1321         MaxPoolMicrokernelTester()
1322           .pooling_elements(pooling_elements)
1323           .pooling_tile(9, 8)
1324           .channels(channels)
1325           .input_offset(67)
1326           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1327       }
1328     }
1329   }
1330 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_twopass_fulltile)1331   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_twopass_fulltile) {
1332     TEST_REQUIRES_X86_F16C;
1333     for (size_t channels = 1; channels < 8; channels++) {
1334       MaxPoolMicrokernelTester()
1335         .pooling_elements(17)
1336         .pooling_tile(9, 8)
1337         .channels(channels)
1338         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1339     }
1340   }
1341 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_twopass_fulltile_with_input_offset)1342   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_twopass_fulltile_with_input_offset) {
1343     TEST_REQUIRES_X86_F16C;
1344     for (size_t channels = 1; channels < 8; channels++) {
1345       MaxPoolMicrokernelTester()
1346         .pooling_elements(17)
1347         .pooling_tile(9, 8)
1348         .channels(channels)
1349         .input_offset(11)
1350         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1351     }
1352   }
1353 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_twopass_fulltile_with_qmin)1354   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_twopass_fulltile_with_qmin) {
1355     TEST_REQUIRES_X86_F16C;
1356     for (size_t channels = 1; channels < 8; channels++) {
1357       MaxPoolMicrokernelTester()
1358         .pooling_elements(17)
1359         .pooling_tile(9, 8)
1360         .channels(channels)
1361         .qmin(-16384)
1362         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1363     }
1364   }
1365 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_twopass_fulltile_with_qmax)1366   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_twopass_fulltile_with_qmax) {
1367     TEST_REQUIRES_X86_F16C;
1368     for (size_t channels = 1; channels < 8; channels++) {
1369       MaxPoolMicrokernelTester()
1370         .pooling_elements(17)
1371         .pooling_tile(9, 8)
1372         .channels(channels)
1373         .qmax(16384)
1374         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1375     }
1376   }
1377 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_twopass_subtile)1378   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_twopass_subtile) {
1379     TEST_REQUIRES_X86_F16C;
1380     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1381       for (size_t channels = 1; channels < 8; channels++) {
1382         MaxPoolMicrokernelTester()
1383           .pooling_elements(pooling_elements)
1384           .pooling_tile(9, 8)
1385           .channels(channels)
1386           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1387       }
1388     }
1389   }
1390 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_twopass_subtile_with_input_offset)1391   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_twopass_subtile_with_input_offset) {
1392     TEST_REQUIRES_X86_F16C;
1393     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1394       for (size_t channels = 1; channels < 8; channels++) {
1395         MaxPoolMicrokernelTester()
1396           .pooling_elements(pooling_elements)
1397           .pooling_tile(9, 8)
1398           .channels(channels)
1399           .input_offset(11)
1400           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1401       }
1402     }
1403   }
1404 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_twopass_fulltile)1405   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_twopass_fulltile) {
1406     TEST_REQUIRES_X86_F16C;
1407     for (size_t channels = 9; channels < 16; channels++) {
1408       MaxPoolMicrokernelTester()
1409         .pooling_elements(17)
1410         .pooling_tile(9, 8)
1411         .channels(channels)
1412         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1413     }
1414   }
1415 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_twopass_fulltile_with_input_offset)1416   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_twopass_fulltile_with_input_offset) {
1417     TEST_REQUIRES_X86_F16C;
1418     for (size_t channels = 9; channels < 16; channels++) {
1419       MaxPoolMicrokernelTester()
1420         .pooling_elements(17)
1421         .pooling_tile(9, 8)
1422         .channels(channels)
1423         .input_offset(17)
1424         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1425     }
1426   }
1427 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_twopass_fulltile_with_qmin)1428   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_twopass_fulltile_with_qmin) {
1429     TEST_REQUIRES_X86_F16C;
1430     for (size_t channels = 9; channels < 16; channels++) {
1431       MaxPoolMicrokernelTester()
1432         .pooling_elements(17)
1433         .pooling_tile(9, 8)
1434         .channels(channels)
1435         .qmin(-16384)
1436         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1437     }
1438   }
1439 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_twopass_fulltile_with_qmax)1440   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_twopass_fulltile_with_qmax) {
1441     TEST_REQUIRES_X86_F16C;
1442     for (size_t channels = 9; channels < 16; channels++) {
1443       MaxPoolMicrokernelTester()
1444         .pooling_elements(17)
1445         .pooling_tile(9, 8)
1446         .channels(channels)
1447         .qmax(16384)
1448         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1449     }
1450   }
1451 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_twopass_subtile)1452   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_twopass_subtile) {
1453     TEST_REQUIRES_X86_F16C;
1454     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1455       for (size_t channels = 9; channels < 16; channels++) {
1456         MaxPoolMicrokernelTester()
1457           .pooling_elements(pooling_elements)
1458           .pooling_tile(9, 8)
1459           .channels(channels)
1460           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1461       }
1462     }
1463   }
1464 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_twopass_subtile_with_input_offset)1465   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_twopass_subtile_with_input_offset) {
1466     TEST_REQUIRES_X86_F16C;
1467     for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) {
1468       for (size_t channels = 9; channels < 16; channels++) {
1469         MaxPoolMicrokernelTester()
1470           .pooling_elements(pooling_elements)
1471           .pooling_tile(9, 8)
1472           .channels(channels)
1473           .input_offset(17)
1474           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1475       }
1476     }
1477   }
1478 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_multipass)1479   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_multipass) {
1480     TEST_REQUIRES_X86_F16C;
1481     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1482       MaxPoolMicrokernelTester()
1483         .pooling_elements(pooling_elements)
1484         .pooling_tile(9, 8)
1485         .channels(8)
1486         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1487     }
1488   }
1489 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_multipass_with_input_offset)1490   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_multipass_with_input_offset) {
1491     TEST_REQUIRES_X86_F16C;
1492     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1493       MaxPoolMicrokernelTester()
1494         .pooling_elements(pooling_elements)
1495         .pooling_tile(9, 8)
1496         .channels(8)
1497         .input_offset(11)
1498         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1499     }
1500   }
1501 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_multipass_with_qmin)1502   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_multipass_with_qmin) {
1503     TEST_REQUIRES_X86_F16C;
1504     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1505       MaxPoolMicrokernelTester()
1506         .pooling_elements(pooling_elements)
1507         .pooling_tile(9, 8)
1508         .channels(8)
1509         .qmin(-16384)
1510         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1511     }
1512   }
1513 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_eq_8_multipass_with_qmax)1514   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_eq_8_multipass_with_qmax) {
1515     TEST_REQUIRES_X86_F16C;
1516     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1517       MaxPoolMicrokernelTester()
1518         .pooling_elements(pooling_elements)
1519         .pooling_tile(9, 8)
1520         .channels(8)
1521         .qmax(16384)
1522         .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1523     }
1524   }
1525 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_multipass)1526   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_multipass) {
1527     TEST_REQUIRES_X86_F16C;
1528     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1529       for (size_t channels = 16; channels < 64; channels += 8) {
1530         MaxPoolMicrokernelTester()
1531           .pooling_elements(pooling_elements)
1532           .pooling_tile(9, 8)
1533           .channels(channels)
1534           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1535       }
1536     }
1537   }
1538 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_multipass_with_input_offset)1539   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_multipass_with_input_offset) {
1540     TEST_REQUIRES_X86_F16C;
1541     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1542       for (size_t channels = 16; channels < 64; channels += 8) {
1543         MaxPoolMicrokernelTester()
1544           .pooling_elements(pooling_elements)
1545           .pooling_tile(9, 8)
1546           .channels(channels)
1547           .input_offset(67)
1548           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1549       }
1550     }
1551   }
1552 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_multipass_with_qmin)1553   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_multipass_with_qmin) {
1554     TEST_REQUIRES_X86_F16C;
1555     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1556       for (size_t channels = 16; channels < 64; channels += 8) {
1557         MaxPoolMicrokernelTester()
1558           .pooling_elements(pooling_elements)
1559           .pooling_tile(9, 8)
1560           .channels(channels)
1561           .qmin(-16384)
1562           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1563       }
1564     }
1565   }
1566 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_div_8_multipass_with_qmax)1567   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_div_8_multipass_with_qmax) {
1568     TEST_REQUIRES_X86_F16C;
1569     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1570       for (size_t channels = 16; channels < 64; channels += 8) {
1571         MaxPoolMicrokernelTester()
1572           .pooling_elements(pooling_elements)
1573           .pooling_tile(9, 8)
1574           .channels(channels)
1575           .qmax(16384)
1576           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1577       }
1578     }
1579   }
1580 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_multipass)1581   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_multipass) {
1582     TEST_REQUIRES_X86_F16C;
1583     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1584       for (size_t channels = 1; channels < 8; channels++) {
1585         MaxPoolMicrokernelTester()
1586           .pooling_elements(pooling_elements)
1587           .pooling_tile(9, 8)
1588           .channels(channels)
1589           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1590       }
1591     }
1592   }
1593 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_multipass_with_input_offset)1594   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_multipass_with_input_offset) {
1595     TEST_REQUIRES_X86_F16C;
1596     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1597       for (size_t channels = 1; channels < 8; channels++) {
1598         MaxPoolMicrokernelTester()
1599           .pooling_elements(pooling_elements)
1600           .pooling_tile(9, 8)
1601           .channels(channels)
1602           .input_offset(8)
1603           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1604       }
1605     }
1606   }
1607 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_multipass_with_qmin)1608   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_multipass_with_qmin) {
1609     TEST_REQUIRES_X86_F16C;
1610     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1611       for (size_t channels = 1; channels < 8; channels++) {
1612         MaxPoolMicrokernelTester()
1613           .pooling_elements(pooling_elements)
1614           .pooling_tile(9, 8)
1615           .channels(channels)
1616           .qmin(-16384)
1617           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1618       }
1619     }
1620   }
1621 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_lt_8_multipass_with_qmax)1622   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_lt_8_multipass_with_qmax) {
1623     TEST_REQUIRES_X86_F16C;
1624     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1625       for (size_t channels = 1; channels < 8; channels++) {
1626         MaxPoolMicrokernelTester()
1627           .pooling_elements(pooling_elements)
1628           .pooling_tile(9, 8)
1629           .channels(channels)
1630           .qmax(16384)
1631           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1632       }
1633     }
1634   }
1635 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_multipass)1636   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_multipass) {
1637     TEST_REQUIRES_X86_F16C;
1638     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1639       for (size_t channels = 9; channels < 16; channels++) {
1640         MaxPoolMicrokernelTester()
1641           .pooling_elements(pooling_elements)
1642           .pooling_tile(9, 8)
1643           .channels(channels)
1644           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1645       }
1646     }
1647   }
1648 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_multipass_with_input_offset)1649   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_multipass_with_input_offset) {
1650     TEST_REQUIRES_X86_F16C;
1651     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1652       for (size_t channels = 9; channels < 16; channels++) {
1653         MaxPoolMicrokernelTester()
1654           .pooling_elements(pooling_elements)
1655           .pooling_tile(9, 8)
1656           .channels(channels)
1657           .input_offset(17)
1658           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1659       }
1660     }
1661   }
1662 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_multipass_with_qmin)1663   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_multipass_with_qmin) {
1664     TEST_REQUIRES_X86_F16C;
1665     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1666       for (size_t channels = 9; channels < 16; channels++) {
1667         MaxPoolMicrokernelTester()
1668           .pooling_elements(pooling_elements)
1669           .pooling_tile(9, 8)
1670           .channels(channels)
1671           .qmin(-16384)
1672           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1673       }
1674     }
1675   }
1676 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,channels_gt_8_multipass_with_qmax)1677   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, channels_gt_8_multipass_with_qmax) {
1678     TEST_REQUIRES_X86_F16C;
1679     for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) {
1680       for (size_t channels = 9; channels < 16; channels++) {
1681         MaxPoolMicrokernelTester()
1682           .pooling_elements(pooling_elements)
1683           .pooling_tile(9, 8)
1684           .channels(channels)
1685           .qmax(16384)
1686           .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1687       }
1688     }
1689   }
1690 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,few_output_pixels)1691   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, few_output_pixels) {
1692     TEST_REQUIRES_X86_F16C;
1693     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1694       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
1695         for (size_t channels = 1; channels <= 40; channels += 7) {
1696           MaxPoolMicrokernelTester()
1697             .output_pixels(output_pixels)
1698             .pooling_elements(pooling_elements)
1699             .pooling_tile(9, 8)
1700             .channels(channels)
1701             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1702         }
1703       }
1704     }
1705   }
1706 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,few_output_pixels_with_input_offset)1707   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, few_output_pixels_with_input_offset) {
1708     TEST_REQUIRES_X86_F16C;
1709     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1710       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
1711         for (size_t channels = 1; channels <= 40; channels += 7) {
1712           MaxPoolMicrokernelTester()
1713             .output_pixels(output_pixels)
1714             .pooling_elements(pooling_elements)
1715             .pooling_tile(9, 8)
1716             .channels(channels)
1717             .input_offset(43)
1718             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1719         }
1720       }
1721     }
1722   }
1723 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,few_output_pixels_with_qmin)1724   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, few_output_pixels_with_qmin) {
1725     TEST_REQUIRES_X86_F16C;
1726     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1727       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
1728         for (size_t channels = 1; channels <= 40; channels += 7) {
1729           MaxPoolMicrokernelTester()
1730             .output_pixels(output_pixels)
1731             .pooling_elements(pooling_elements)
1732             .pooling_tile(9, 8)
1733             .channels(channels)
1734             .qmin(-16384)
1735             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1736         }
1737       }
1738     }
1739   }
1740 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,few_output_pixels_with_qmax)1741   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, few_output_pixels_with_qmax) {
1742     TEST_REQUIRES_X86_F16C;
1743     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1744       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
1745         for (size_t channels = 1; channels <= 40; channels += 7) {
1746           MaxPoolMicrokernelTester()
1747             .output_pixels(output_pixels)
1748             .pooling_elements(pooling_elements)
1749             .pooling_tile(9, 8)
1750             .channels(channels)
1751             .qmax(16384)
1752             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1753         }
1754       }
1755     }
1756   }
1757 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,few_output_pixels_with_output_stride)1758   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, few_output_pixels_with_output_stride) {
1759     TEST_REQUIRES_X86_F16C;
1760     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1761       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
1762         for (size_t channels = 1; channels <= 40; channels += 7) {
1763           MaxPoolMicrokernelTester()
1764             .output_pixels(output_pixels)
1765             .pooling_elements(pooling_elements)
1766             .pooling_tile(9, 8)
1767             .channels(channels)
1768             .output_stride(43)
1769             .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1770         }
1771       }
1772     }
1773   }
1774 
TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8,few_output_pixels_with_step)1775   TEST(F16_MAXPOOL_MINMAX_9P8X__F16C_C8, few_output_pixels_with_step) {
1776     TEST_REQUIRES_X86_F16C;
1777     for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) {
1778       for (size_t pooling_elements : std::vector<size_t>{{2, 9, 16}}) {
1779         for (size_t channels = 1; channels <= 40; channels += 7) {
1780           for (size_t step = 2; step <= pooling_elements; step++) {
1781             MaxPoolMicrokernelTester()
1782               .output_pixels(output_pixels)
1783               .pooling_elements(pooling_elements)
1784               .pooling_tile(9, 8)
1785               .step(step)
1786               .channels(channels)
1787               .output_stride(43)
1788               .Test(xnn_f16_maxpool_minmax_ukernel_9p8x__f16c_c8, xnn_init_f16_minmax_avx_params);
1789           }
1790         }
1791       }
1792     }
1793   }
1794 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1795