1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2020 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/f16-pavgpool-minmax.yaml 11 // Generator: tools/generate-avgpool-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/common.h> 17 #include <xnnpack/isa-checks.h> 18 19 #include <xnnpack/avgpool.h> 20 #include <xnnpack/pavgpool.h> 21 #include "avgpool-microkernel-tester.h" 22 23 24 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile)25 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile) { 26 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 27 AvgPoolMicrokernelTester() 28 .pooling_elements(17) 29 .pooling_tile(9, 8) 30 .channels(8) 31 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 32 } 33 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_input_offset)34 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_input_offset) { 35 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 36 AvgPoolMicrokernelTester() 37 .pooling_elements(17) 38 .pooling_tile(9, 8) 39 .channels(8) 40 .input_offset(11) 41 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 42 } 43 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_zero)44 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_zero) { 45 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 46 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 47 AvgPoolMicrokernelTester() 48 .pooling_elements(17) 49 .pooling_tile(9, 8) 50 .channels(8) 51 .input_offset(11) 52 .zero_index(zero_index) 53 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 54 } 55 } 56 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_qmin)57 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_qmin) { 58 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 59 AvgPoolMicrokernelTester() 60 .pooling_elements(17) 61 .pooling_tile(9, 8) 62 .channels(8) 63 .qmin(128) 64 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 65 } 66 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_fulltile_with_qmax)67 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_fulltile_with_qmax) { 68 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 69 AvgPoolMicrokernelTester() 70 .pooling_elements(17) 71 .pooling_tile(9, 8) 72 .channels(8) 73 .qmax(128) 74 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 75 } 76 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile)77 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile) { 78 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 79 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 80 AvgPoolMicrokernelTester() 81 .pooling_elements(pooling_elements) 82 .pooling_tile(9, 8) 83 .channels(8) 84 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 85 } 86 } 87 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile_with_input_offset)88 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile_with_input_offset) { 89 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 90 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 91 AvgPoolMicrokernelTester() 92 .pooling_elements(pooling_elements) 93 .pooling_tile(9, 8) 94 .channels(8) 95 .input_offset(11) 96 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 97 } 98 } 99 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_twopass_subtile_with_zero)100 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_twopass_subtile_with_zero) { 101 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 102 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 103 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 104 AvgPoolMicrokernelTester() 105 .pooling_elements(pooling_elements) 106 .pooling_tile(9, 8) 107 .channels(8) 108 .input_offset(11) 109 .zero_index(zero_index) 110 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 111 } 112 } 113 } 114 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile)115 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile) { 116 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 117 for (size_t channels = 16; channels < 64; channels += 8) { 118 AvgPoolMicrokernelTester() 119 .pooling_elements(17) 120 .pooling_tile(9, 8) 121 .channels(channels) 122 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 123 } 124 } 125 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_input_offset)126 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_input_offset) { 127 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 128 for (size_t channels = 16; channels < 64; channels += 8) { 129 AvgPoolMicrokernelTester() 130 .pooling_elements(17) 131 .pooling_tile(9, 8) 132 .channels(channels) 133 .input_offset(41) 134 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 135 } 136 } 137 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_zero)138 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_zero) { 139 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 140 for (size_t channels = 16; channels < 64; channels += 8) { 141 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 142 AvgPoolMicrokernelTester() 143 .pooling_elements(17) 144 .pooling_tile(9, 8) 145 .channels(channels) 146 .input_offset(41) 147 .zero_index(zero_index) 148 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 149 } 150 } 151 } 152 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_qmin)153 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_qmin) { 154 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 155 for (size_t channels = 16; channels < 64; channels += 8) { 156 AvgPoolMicrokernelTester() 157 .pooling_elements(17) 158 .pooling_tile(9, 8) 159 .channels(channels) 160 .qmin(128) 161 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 162 } 163 } 164 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_fulltile_with_qmax)165 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_fulltile_with_qmax) { 166 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 167 for (size_t channels = 16; channels < 64; channels += 8) { 168 AvgPoolMicrokernelTester() 169 .pooling_elements(17) 170 .pooling_tile(9, 8) 171 .channels(channels) 172 .qmax(128) 173 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 174 } 175 } 176 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile)177 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile) { 178 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 179 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 180 for (size_t channels = 16; channels < 64; channels += 8) { 181 AvgPoolMicrokernelTester() 182 .pooling_elements(pooling_elements) 183 .pooling_tile(9, 8) 184 .channels(channels) 185 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 186 } 187 } 188 } 189 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile_with_input_offset)190 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile_with_input_offset) { 191 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 192 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 193 for (size_t channels = 16; channels < 64; channels += 8) { 194 AvgPoolMicrokernelTester() 195 .pooling_elements(pooling_elements) 196 .pooling_tile(9, 8) 197 .channels(channels) 198 .input_offset(67) 199 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 200 } 201 } 202 } 203 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_twopass_subtile_with_zero)204 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_twopass_subtile_with_zero) { 205 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 206 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 207 for (size_t channels = 16; channels < 64; channels += 8) { 208 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 209 AvgPoolMicrokernelTester() 210 .pooling_elements(pooling_elements) 211 .pooling_tile(9, 8) 212 .channels(channels) 213 .input_offset(67) 214 .zero_index(zero_index) 215 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 216 } 217 } 218 } 219 } 220 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile)221 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile) { 222 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 223 for (size_t channels = 1; channels < 8; channels++) { 224 AvgPoolMicrokernelTester() 225 .pooling_elements(17) 226 .pooling_tile(9, 8) 227 .channels(channels) 228 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 229 } 230 } 231 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_input_offset)232 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_input_offset) { 233 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 234 for (size_t channels = 1; channels < 8; channels++) { 235 AvgPoolMicrokernelTester() 236 .pooling_elements(17) 237 .pooling_tile(9, 8) 238 .channels(channels) 239 .input_offset(11) 240 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 241 } 242 } 243 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_zero_index)244 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_zero_index) { 245 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 246 for (size_t channels = 1; channels < 8; channels++) { 247 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 248 AvgPoolMicrokernelTester() 249 .pooling_elements(17) 250 .pooling_tile(9, 8) 251 .channels(channels) 252 .input_offset(11) 253 .zero_index(zero_index) 254 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 255 } 256 } 257 } 258 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_qmin)259 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_qmin) { 260 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 261 for (size_t channels = 1; channels < 8; channels++) { 262 AvgPoolMicrokernelTester() 263 .pooling_elements(17) 264 .pooling_tile(9, 8) 265 .channels(channels) 266 .qmin(128) 267 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 268 } 269 } 270 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_fulltile_with_qmax)271 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_fulltile_with_qmax) { 272 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 273 for (size_t channels = 1; channels < 8; channels++) { 274 AvgPoolMicrokernelTester() 275 .pooling_elements(17) 276 .pooling_tile(9, 8) 277 .channels(channels) 278 .qmax(128) 279 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 280 } 281 } 282 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile)283 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile) { 284 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 285 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 286 for (size_t channels = 1; channels < 8; channels++) { 287 AvgPoolMicrokernelTester() 288 .pooling_elements(pooling_elements) 289 .pooling_tile(9, 8) 290 .channels(channels) 291 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 292 } 293 } 294 } 295 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile_with_input_offset)296 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile_with_input_offset) { 297 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 298 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 299 for (size_t channels = 1; channels < 8; channels++) { 300 AvgPoolMicrokernelTester() 301 .pooling_elements(pooling_elements) 302 .pooling_tile(9, 8) 303 .channels(channels) 304 .input_offset(11) 305 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 306 } 307 } 308 } 309 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_twopass_subtile_with_zero)310 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_twopass_subtile_with_zero) { 311 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 312 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 313 for (size_t channels = 1; channels < 8; channels++) { 314 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 315 AvgPoolMicrokernelTester() 316 .pooling_elements(pooling_elements) 317 .pooling_tile(9, 8) 318 .channels(channels) 319 .input_offset(11) 320 .zero_index(zero_index) 321 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 322 } 323 } 324 } 325 } 326 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile)327 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile) { 328 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 329 for (size_t channels = 9; channels < 16; channels++) { 330 AvgPoolMicrokernelTester() 331 .pooling_elements(17) 332 .pooling_tile(9, 8) 333 .channels(channels) 334 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 335 } 336 } 337 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_input_offset)338 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_input_offset) { 339 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 340 for (size_t channels = 9; channels < 16; channels++) { 341 AvgPoolMicrokernelTester() 342 .pooling_elements(17) 343 .pooling_tile(9, 8) 344 .channels(channels) 345 .input_offset(17) 346 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 347 } 348 } 349 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_zero)350 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_zero) { 351 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 352 for (size_t channels = 9; channels < 16; channels++) { 353 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 354 AvgPoolMicrokernelTester() 355 .pooling_elements(17) 356 .pooling_tile(9, 8) 357 .channels(channels) 358 .input_offset(17) 359 .zero_index(zero_index) 360 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 361 } 362 } 363 } 364 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_qmin)365 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_qmin) { 366 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 367 for (size_t channels = 9; channels < 16; channels++) { 368 AvgPoolMicrokernelTester() 369 .pooling_elements(17) 370 .pooling_tile(9, 8) 371 .channels(channels) 372 .qmin(128) 373 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 374 } 375 } 376 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_fulltile_with_qmax)377 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_fulltile_with_qmax) { 378 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 379 for (size_t channels = 9; channels < 16; channels++) { 380 AvgPoolMicrokernelTester() 381 .pooling_elements(17) 382 .pooling_tile(9, 8) 383 .channels(channels) 384 .qmax(128) 385 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 386 } 387 } 388 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile)389 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile) { 390 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 391 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 392 for (size_t channels = 9; channels < 16; channels++) { 393 AvgPoolMicrokernelTester() 394 .pooling_elements(pooling_elements) 395 .pooling_tile(9, 8) 396 .channels(channels) 397 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 398 } 399 } 400 } 401 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile_with_input_offset)402 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile_with_input_offset) { 403 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 404 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 405 for (size_t channels = 9; channels < 16; channels++) { 406 AvgPoolMicrokernelTester() 407 .pooling_elements(pooling_elements) 408 .pooling_tile(9, 8) 409 .channels(channels) 410 .input_offset(17) 411 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 412 } 413 } 414 } 415 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_twopass_subtile_with_zero)416 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_twopass_subtile_with_zero) { 417 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 418 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 419 for (size_t channels = 9; channels < 16; channels++) { 420 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 421 AvgPoolMicrokernelTester() 422 .pooling_elements(pooling_elements) 423 .pooling_tile(9, 8) 424 .channels(channels) 425 .input_offset(17) 426 .zero_index(zero_index) 427 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 428 } 429 } 430 } 431 } 432 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass)433 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass) { 434 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 435 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 436 AvgPoolMicrokernelTester() 437 .pooling_elements(pooling_elements) 438 .pooling_tile(9, 8) 439 .channels(8) 440 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 441 } 442 } 443 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_input_offset)444 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_input_offset) { 445 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 446 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 447 AvgPoolMicrokernelTester() 448 .pooling_elements(pooling_elements) 449 .pooling_tile(9, 8) 450 .channels(8) 451 .input_offset(11) 452 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 453 } 454 } 455 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_zero)456 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_zero) { 457 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 458 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 459 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 460 AvgPoolMicrokernelTester() 461 .pooling_elements(pooling_elements) 462 .pooling_tile(9, 8) 463 .channels(8) 464 .input_offset(11) 465 .zero_index(zero_index) 466 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 467 } 468 } 469 } 470 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_qmin)471 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_qmin) { 472 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 473 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 474 AvgPoolMicrokernelTester() 475 .pooling_elements(pooling_elements) 476 .pooling_tile(9, 8) 477 .channels(8) 478 .qmin(128) 479 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 480 } 481 } 482 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_eq_8_multipass_with_qmax)483 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_eq_8_multipass_with_qmax) { 484 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 485 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 486 AvgPoolMicrokernelTester() 487 .pooling_elements(pooling_elements) 488 .pooling_tile(9, 8) 489 .channels(8) 490 .qmax(128) 491 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 492 } 493 } 494 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass)495 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass) { 496 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 497 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 498 for (size_t channels = 16; channels < 64; channels += 8) { 499 AvgPoolMicrokernelTester() 500 .pooling_elements(pooling_elements) 501 .pooling_tile(9, 8) 502 .channels(channels) 503 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 504 } 505 } 506 } 507 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_input_offset)508 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_input_offset) { 509 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 510 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 511 for (size_t channels = 16; channels < 64; channels += 8) { 512 AvgPoolMicrokernelTester() 513 .pooling_elements(pooling_elements) 514 .pooling_tile(9, 8) 515 .channels(channels) 516 .input_offset(67) 517 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 518 } 519 } 520 } 521 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_zero)522 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_zero) { 523 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 524 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 525 for (size_t channels = 16; channels < 64; channels += 8) { 526 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 527 AvgPoolMicrokernelTester() 528 .pooling_elements(pooling_elements) 529 .pooling_tile(9, 8) 530 .channels(channels) 531 .input_offset(67) 532 .zero_index(zero_index) 533 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 534 } 535 } 536 } 537 } 538 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_qmin)539 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_qmin) { 540 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 541 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 542 for (size_t channels = 16; channels < 64; channels += 8) { 543 AvgPoolMicrokernelTester() 544 .pooling_elements(pooling_elements) 545 .pooling_tile(9, 8) 546 .channels(channels) 547 .qmin(128) 548 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 549 } 550 } 551 } 552 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_div_8_multipass_with_qmax)553 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_div_8_multipass_with_qmax) { 554 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 555 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 556 for (size_t channels = 16; channels < 64; channels += 8) { 557 AvgPoolMicrokernelTester() 558 .pooling_elements(pooling_elements) 559 .pooling_tile(9, 8) 560 .channels(channels) 561 .qmax(128) 562 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 563 } 564 } 565 } 566 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass)567 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass) { 568 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 569 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 570 for (size_t channels = 1; channels < 8; channels++) { 571 AvgPoolMicrokernelTester() 572 .pooling_elements(pooling_elements) 573 .pooling_tile(9, 8) 574 .channels(channels) 575 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 576 } 577 } 578 } 579 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_input_offset)580 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_input_offset) { 581 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 582 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 583 for (size_t channels = 1; channels < 8; channels++) { 584 AvgPoolMicrokernelTester() 585 .pooling_elements(pooling_elements) 586 .pooling_tile(9, 8) 587 .channels(channels) 588 .input_offset(8) 589 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 590 } 591 } 592 } 593 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_zero)594 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_zero) { 595 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 596 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 597 for (size_t channels = 1; channels < 8; channels++) { 598 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 599 AvgPoolMicrokernelTester() 600 .pooling_elements(pooling_elements) 601 .pooling_tile(9, 8) 602 .channels(channels) 603 .input_offset(8) 604 .zero_index(zero_index) 605 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 606 } 607 } 608 } 609 } 610 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_qmin)611 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_qmin) { 612 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 613 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 614 for (size_t channels = 1; channels < 8; channels++) { 615 AvgPoolMicrokernelTester() 616 .pooling_elements(pooling_elements) 617 .pooling_tile(9, 8) 618 .channels(channels) 619 .qmin(128) 620 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 621 } 622 } 623 } 624 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_lt_8_multipass_with_qmax)625 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_lt_8_multipass_with_qmax) { 626 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 627 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 628 for (size_t channels = 1; channels < 8; channels++) { 629 AvgPoolMicrokernelTester() 630 .pooling_elements(pooling_elements) 631 .pooling_tile(9, 8) 632 .channels(channels) 633 .qmax(128) 634 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 635 } 636 } 637 } 638 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass)639 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass) { 640 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 641 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 642 for (size_t channels = 9; channels < 16; channels++) { 643 AvgPoolMicrokernelTester() 644 .pooling_elements(pooling_elements) 645 .pooling_tile(9, 8) 646 .channels(channels) 647 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 648 } 649 } 650 } 651 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_input_offset)652 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_input_offset) { 653 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 654 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 655 for (size_t channels = 9; channels < 16; channels++) { 656 AvgPoolMicrokernelTester() 657 .pooling_elements(pooling_elements) 658 .pooling_tile(9, 8) 659 .channels(channels) 660 .input_offset(17) 661 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 662 } 663 } 664 } 665 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_zero)666 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_zero) { 667 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 668 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 669 for (size_t channels = 9; channels < 16; channels++) { 670 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 671 AvgPoolMicrokernelTester() 672 .pooling_elements(pooling_elements) 673 .pooling_tile(9, 8) 674 .channels(channels) 675 .input_offset(17) 676 .zero_index(zero_index) 677 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 678 } 679 } 680 } 681 } 682 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_qmin)683 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_qmin) { 684 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 685 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 686 for (size_t channels = 9; channels < 16; channels++) { 687 AvgPoolMicrokernelTester() 688 .pooling_elements(pooling_elements) 689 .pooling_tile(9, 8) 690 .channels(channels) 691 .qmin(128) 692 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 693 } 694 } 695 } 696 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,channels_gt_8_multipass_with_qmax)697 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, channels_gt_8_multipass_with_qmax) { 698 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 699 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 700 for (size_t channels = 9; channels < 16; channels++) { 701 AvgPoolMicrokernelTester() 702 .pooling_elements(pooling_elements) 703 .pooling_tile(9, 8) 704 .channels(channels) 705 .qmax(128) 706 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 707 } 708 } 709 } 710 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels)711 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels) { 712 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 713 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 714 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 715 for (size_t channels = 1; channels <= 40; channels += 7) { 716 AvgPoolMicrokernelTester() 717 .output_pixels(output_pixels) 718 .pooling_elements(pooling_elements) 719 .pooling_tile(9, 8) 720 .channels(channels) 721 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 722 } 723 } 724 } 725 } 726 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_input_offset)727 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_input_offset) { 728 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 729 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 730 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 731 for (size_t channels = 1; channels <= 40; channels += 7) { 732 AvgPoolMicrokernelTester() 733 .output_pixels(output_pixels) 734 .pooling_elements(pooling_elements) 735 .pooling_tile(9, 8) 736 .channels(channels) 737 .input_offset(43) 738 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 739 } 740 } 741 } 742 } 743 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_zero)744 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_zero) { 745 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 746 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 747 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 748 for (size_t channels = 1; channels <= 40; channels += 7) { 749 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 750 AvgPoolMicrokernelTester() 751 .output_pixels(output_pixels) 752 .pooling_elements(pooling_elements) 753 .pooling_tile(9, 8) 754 .channels(channels) 755 .input_offset(43) 756 .zero_index(zero_index) 757 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 758 } 759 } 760 } 761 } 762 } 763 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_qmin)764 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_qmin) { 765 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 766 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 767 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 768 for (size_t channels = 1; channels <= 40; channels += 7) { 769 AvgPoolMicrokernelTester() 770 .output_pixels(output_pixels) 771 .pooling_elements(pooling_elements) 772 .pooling_tile(9, 8) 773 .channels(channels) 774 .qmin(128) 775 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 776 } 777 } 778 } 779 } 780 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_qmax)781 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_qmax) { 782 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 783 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 784 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 785 for (size_t channels = 1; channels <= 40; channels += 7) { 786 AvgPoolMicrokernelTester() 787 .output_pixels(output_pixels) 788 .pooling_elements(pooling_elements) 789 .pooling_tile(9, 8) 790 .channels(channels) 791 .qmax(128) 792 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 793 } 794 } 795 } 796 } 797 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_output_stride)798 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_output_stride) { 799 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 800 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 801 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 802 for (size_t channels = 1; channels <= 40; channels += 7) { 803 AvgPoolMicrokernelTester() 804 .output_pixels(output_pixels) 805 .pooling_elements(pooling_elements) 806 .pooling_tile(9, 8) 807 .channels(channels) 808 .output_stride(43) 809 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 810 } 811 } 812 } 813 } 814 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8,few_output_pixels_with_step)815 TEST(F16_PAVGPOOL_MINMAX_9P8X__NEONFP16ARITH_C8, few_output_pixels_with_step) { 816 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 817 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 818 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 819 for (size_t channels = 1; channels <= 40; channels += 7) { 820 for (size_t step = 2; step <= pooling_elements; step++) { 821 AvgPoolMicrokernelTester() 822 .output_pixels(output_pixels) 823 .pooling_elements(pooling_elements) 824 .pooling_tile(9, 8) 825 .step(step) 826 .channels(channels) 827 .output_stride(43) 828 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 829 } 830 } 831 } 832 } 833 } 834 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 835 836 837 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile)838 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile) { 839 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 840 AvgPoolMicrokernelTester() 841 .pooling_elements(9) 842 .pooling_tile(9) 843 .channels(8) 844 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 845 } 846 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_input_offset)847 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_input_offset) { 848 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 849 AvgPoolMicrokernelTester() 850 .pooling_elements(9) 851 .pooling_tile(9) 852 .channels(8) 853 .input_offset(11) 854 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 855 } 856 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_zero)857 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_zero) { 858 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 859 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 860 AvgPoolMicrokernelTester() 861 .pooling_elements(9) 862 .pooling_tile(9) 863 .channels(8) 864 .input_offset(11) 865 .zero_index(zero_index) 866 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 867 } 868 } 869 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_qmin)870 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_qmin) { 871 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 872 AvgPoolMicrokernelTester() 873 .pooling_elements(9) 874 .pooling_tile(9) 875 .channels(8) 876 .qmin(128) 877 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 878 } 879 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_fulltile_with_qmax)880 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_fulltile_with_qmax) { 881 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 882 AvgPoolMicrokernelTester() 883 .pooling_elements(9) 884 .pooling_tile(9) 885 .channels(8) 886 .qmax(128) 887 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 888 } 889 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile)890 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile) { 891 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 892 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 893 AvgPoolMicrokernelTester() 894 .pooling_elements(pooling_elements) 895 .pooling_tile(9) 896 .channels(8) 897 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 898 } 899 } 900 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile_with_input_offset)901 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile_with_input_offset) { 902 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 903 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 904 AvgPoolMicrokernelTester() 905 .pooling_elements(pooling_elements) 906 .pooling_tile(9) 907 .channels(8) 908 .input_offset(11) 909 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 910 } 911 } 912 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_eq_8_unipass_subtile_with_zero)913 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_eq_8_unipass_subtile_with_zero) { 914 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 915 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 916 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 917 AvgPoolMicrokernelTester() 918 .pooling_elements(pooling_elements) 919 .pooling_tile(9) 920 .channels(8) 921 .input_offset(11) 922 .zero_index(zero_index) 923 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 924 } 925 } 926 } 927 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile)928 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile) { 929 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 930 for (size_t channels = 16; channels < 64; channels += 8) { 931 AvgPoolMicrokernelTester() 932 .pooling_elements(9) 933 .pooling_tile(9) 934 .channels(channels) 935 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 936 } 937 } 938 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_input_offset)939 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_input_offset) { 940 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 941 for (size_t channels = 16; channels < 64; channels += 8) { 942 AvgPoolMicrokernelTester() 943 .pooling_elements(9) 944 .pooling_tile(9) 945 .channels(channels) 946 .input_offset(67) 947 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 948 } 949 } 950 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_zero)951 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_zero) { 952 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 953 for (size_t channels = 16; channels < 64; channels += 8) { 954 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 955 AvgPoolMicrokernelTester() 956 .pooling_elements(9) 957 .pooling_tile(9) 958 .channels(channels) 959 .input_offset(67) 960 .zero_index(zero_index) 961 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 962 } 963 } 964 } 965 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_qmin)966 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_qmin) { 967 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 968 for (size_t channels = 16; channels < 64; channels += 8) { 969 AvgPoolMicrokernelTester() 970 .pooling_elements(9) 971 .pooling_tile(9) 972 .channels(channels) 973 .qmin(128) 974 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 975 } 976 } 977 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_fulltile_with_qmax)978 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_fulltile_with_qmax) { 979 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 980 for (size_t channels = 16; channels < 64; channels += 8) { 981 AvgPoolMicrokernelTester() 982 .pooling_elements(9) 983 .pooling_tile(9) 984 .channels(channels) 985 .qmax(128) 986 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 987 } 988 } 989 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile)990 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile) { 991 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 992 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 993 for (size_t channels = 16; channels < 64; channels += 8) { 994 AvgPoolMicrokernelTester() 995 .pooling_elements(pooling_elements) 996 .pooling_tile(9) 997 .channels(channels) 998 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 999 } 1000 } 1001 } 1002 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile_with_input_offset)1003 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile_with_input_offset) { 1004 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1005 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1006 for (size_t channels = 16; channels < 64; channels += 8) { 1007 AvgPoolMicrokernelTester() 1008 .pooling_elements(pooling_elements) 1009 .pooling_tile(9) 1010 .channels(channels) 1011 .input_offset(67) 1012 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1013 } 1014 } 1015 } 1016 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_div_8_unipass_subtile_with_zero)1017 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_div_8_unipass_subtile_with_zero) { 1018 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1019 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1020 for (size_t channels = 16; channels < 64; channels += 8) { 1021 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1022 AvgPoolMicrokernelTester() 1023 .pooling_elements(pooling_elements) 1024 .pooling_tile(9) 1025 .channels(channels) 1026 .input_offset(67) 1027 .zero_index(zero_index) 1028 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1029 } 1030 } 1031 } 1032 } 1033 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile)1034 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile) { 1035 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1036 for (size_t channels = 1; channels < 8; channels++) { 1037 AvgPoolMicrokernelTester() 1038 .pooling_elements(9) 1039 .pooling_tile(9) 1040 .channels(channels) 1041 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1042 } 1043 } 1044 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_input_offset)1045 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_input_offset) { 1046 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1047 for (size_t channels = 1; channels < 8; channels++) { 1048 AvgPoolMicrokernelTester() 1049 .pooling_elements(9) 1050 .pooling_tile(9) 1051 .channels(channels) 1052 .input_offset(11) 1053 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1054 } 1055 } 1056 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_zero)1057 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_zero) { 1058 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1059 for (size_t channels = 1; channels < 8; channels++) { 1060 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 1061 AvgPoolMicrokernelTester() 1062 .pooling_elements(9) 1063 .pooling_tile(9) 1064 .channels(channels) 1065 .input_offset(11) 1066 .zero_index(zero_index) 1067 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1068 } 1069 } 1070 } 1071 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_qmin)1072 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_qmin) { 1073 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1074 for (size_t channels = 1; channels < 8; channels++) { 1075 AvgPoolMicrokernelTester() 1076 .pooling_elements(9) 1077 .pooling_tile(9) 1078 .channels(channels) 1079 .qmin(128) 1080 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1081 } 1082 } 1083 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_fulltile_with_qmax)1084 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_fulltile_with_qmax) { 1085 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1086 for (size_t channels = 1; channels < 8; channels++) { 1087 AvgPoolMicrokernelTester() 1088 .pooling_elements(9) 1089 .pooling_tile(9) 1090 .channels(channels) 1091 .qmax(128) 1092 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1093 } 1094 } 1095 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile)1096 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile) { 1097 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1098 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1099 for (size_t channels = 1; channels < 8; channels++) { 1100 AvgPoolMicrokernelTester() 1101 .pooling_elements(pooling_elements) 1102 .pooling_tile(9) 1103 .channels(channels) 1104 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1105 } 1106 } 1107 } 1108 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile_with_input_offset)1109 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile_with_input_offset) { 1110 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1111 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1112 for (size_t channels = 1; channels < 8; channels++) { 1113 AvgPoolMicrokernelTester() 1114 .pooling_elements(pooling_elements) 1115 .pooling_tile(9) 1116 .channels(channels) 1117 .input_offset(11) 1118 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1119 } 1120 } 1121 } 1122 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_lt_8_unipass_subtile_with_zero)1123 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_lt_8_unipass_subtile_with_zero) { 1124 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1125 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1126 for (size_t channels = 1; channels < 8; channels++) { 1127 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1128 AvgPoolMicrokernelTester() 1129 .pooling_elements(pooling_elements) 1130 .pooling_tile(9) 1131 .channels(channels) 1132 .input_offset(11) 1133 .zero_index(zero_index) 1134 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1135 } 1136 } 1137 } 1138 } 1139 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile)1140 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile) { 1141 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1142 for (size_t channels = 9; channels < 16; channels++) { 1143 AvgPoolMicrokernelTester() 1144 .pooling_elements(9) 1145 .pooling_tile(9) 1146 .channels(channels) 1147 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1148 } 1149 } 1150 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_input_offset)1151 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_input_offset) { 1152 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1153 for (size_t channels = 9; channels < 16; channels++) { 1154 AvgPoolMicrokernelTester() 1155 .pooling_elements(9) 1156 .pooling_tile(9) 1157 .channels(channels) 1158 .input_offset(17) 1159 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1160 } 1161 } 1162 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_zero)1163 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_zero) { 1164 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1165 for (size_t channels = 9; channels < 16; channels++) { 1166 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 1167 AvgPoolMicrokernelTester() 1168 .pooling_elements(9) 1169 .pooling_tile(9) 1170 .channels(channels) 1171 .input_offset(17) 1172 .zero_index(zero_index) 1173 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1174 } 1175 } 1176 } 1177 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_qmin)1178 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_qmin) { 1179 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1180 for (size_t channels = 9; channels < 16; channels++) { 1181 AvgPoolMicrokernelTester() 1182 .pooling_elements(9) 1183 .pooling_tile(9) 1184 .channels(channels) 1185 .qmin(128) 1186 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1187 } 1188 } 1189 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_fulltile_with_qmax)1190 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_fulltile_with_qmax) { 1191 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1192 for (size_t channels = 9; channels < 16; channels++) { 1193 AvgPoolMicrokernelTester() 1194 .pooling_elements(9) 1195 .pooling_tile(9) 1196 .channels(channels) 1197 .qmax(128) 1198 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1199 } 1200 } 1201 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile)1202 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile) { 1203 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1204 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1205 for (size_t channels = 9; channels < 16; channels++) { 1206 AvgPoolMicrokernelTester() 1207 .pooling_elements(pooling_elements) 1208 .pooling_tile(9) 1209 .channels(channels) 1210 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1211 } 1212 } 1213 } 1214 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile_with_input_offset)1215 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile_with_input_offset) { 1216 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1217 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1218 for (size_t channels = 9; channels < 16; channels++) { 1219 AvgPoolMicrokernelTester() 1220 .pooling_elements(pooling_elements) 1221 .pooling_tile(9) 1222 .channels(channels) 1223 .input_offset(17) 1224 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1225 } 1226 } 1227 } 1228 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,channels_gt_8_unipass_subtile_with_zero)1229 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, channels_gt_8_unipass_subtile_with_zero) { 1230 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1231 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 1232 for (size_t channels = 9; channels < 16; channels++) { 1233 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1234 AvgPoolMicrokernelTester() 1235 .pooling_elements(pooling_elements) 1236 .pooling_tile(9) 1237 .channels(channels) 1238 .input_offset(17) 1239 .zero_index(zero_index) 1240 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1241 } 1242 } 1243 } 1244 } 1245 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels)1246 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels) { 1247 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1248 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 1249 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 1250 for (size_t channels = 1; channels <= 40; channels += 7) { 1251 AvgPoolMicrokernelTester() 1252 .output_pixels(output_pixels) 1253 .pooling_elements(pooling_elements) 1254 .pooling_tile(9, 0) 1255 .channels(channels) 1256 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1257 } 1258 } 1259 } 1260 } 1261 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_input_offset)1262 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_input_offset) { 1263 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1264 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 1265 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 1266 for (size_t channels = 1; channels <= 40; channels += 7) { 1267 AvgPoolMicrokernelTester() 1268 .output_pixels(output_pixels) 1269 .pooling_elements(pooling_elements) 1270 .pooling_tile(9, 0) 1271 .channels(channels) 1272 .input_offset(43) 1273 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1274 } 1275 } 1276 } 1277 } 1278 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_zero)1279 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_zero) { 1280 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1281 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 1282 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 1283 for (size_t channels = 1; channels <= 40; channels += 7) { 1284 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1285 AvgPoolMicrokernelTester() 1286 .output_pixels(output_pixels) 1287 .pooling_elements(pooling_elements) 1288 .pooling_tile(9, 0) 1289 .channels(channels) 1290 .input_offset(43) 1291 .zero_index(zero_index) 1292 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1293 } 1294 } 1295 } 1296 } 1297 } 1298 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_qmin)1299 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_qmin) { 1300 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1301 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 1302 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 1303 for (size_t channels = 1; channels <= 40; channels += 7) { 1304 AvgPoolMicrokernelTester() 1305 .output_pixels(output_pixels) 1306 .pooling_elements(pooling_elements) 1307 .pooling_tile(9, 0) 1308 .channels(channels) 1309 .qmin(128) 1310 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1311 } 1312 } 1313 } 1314 } 1315 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_qmax)1316 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_qmax) { 1317 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1318 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 1319 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 1320 for (size_t channels = 1; channels <= 40; channels += 7) { 1321 AvgPoolMicrokernelTester() 1322 .output_pixels(output_pixels) 1323 .pooling_elements(pooling_elements) 1324 .pooling_tile(9, 0) 1325 .channels(channels) 1326 .qmax(128) 1327 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1328 } 1329 } 1330 } 1331 } 1332 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_output_stride)1333 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_output_stride) { 1334 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1335 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 1336 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 1337 for (size_t channels = 1; channels <= 40; channels += 7) { 1338 AvgPoolMicrokernelTester() 1339 .output_pixels(output_pixels) 1340 .pooling_elements(pooling_elements) 1341 .pooling_tile(9, 0) 1342 .channels(channels) 1343 .output_stride(43) 1344 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1345 } 1346 } 1347 } 1348 } 1349 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8,few_output_pixels_with_step)1350 TEST(F16_PAVGPOOL_MINMAX_9X__NEONFP16ARITH_C8, few_output_pixels_with_step) { 1351 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1352 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 1353 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 1354 for (size_t channels = 1; channels <= 40; channels += 7) { 1355 for (size_t step = 2; step <= pooling_elements; step++) { 1356 AvgPoolMicrokernelTester() 1357 .output_pixels(output_pixels) 1358 .pooling_elements(pooling_elements) 1359 .pooling_tile(9, 0) 1360 .step(step) 1361 .channels(channels) 1362 .output_stride(43) 1363 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__neonfp16arith_c8, xnn_init_f16_minmax_neon_params); 1364 } 1365 } 1366 } 1367 } 1368 } 1369 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 1370 1371 1372 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile)1373 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile) { 1374 TEST_REQUIRES_X86_AVX2; 1375 AvgPoolMicrokernelTester() 1376 .pooling_elements(17) 1377 .pooling_tile(9, 8) 1378 .channels(8) 1379 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1380 } 1381 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_input_offset)1382 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_input_offset) { 1383 TEST_REQUIRES_X86_AVX2; 1384 AvgPoolMicrokernelTester() 1385 .pooling_elements(17) 1386 .pooling_tile(9, 8) 1387 .channels(8) 1388 .input_offset(11) 1389 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1390 } 1391 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_zero)1392 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_zero) { 1393 TEST_REQUIRES_X86_AVX2; 1394 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 1395 AvgPoolMicrokernelTester() 1396 .pooling_elements(17) 1397 .pooling_tile(9, 8) 1398 .channels(8) 1399 .input_offset(11) 1400 .zero_index(zero_index) 1401 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1402 } 1403 } 1404 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_qmin)1405 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_qmin) { 1406 TEST_REQUIRES_X86_AVX2; 1407 AvgPoolMicrokernelTester() 1408 .pooling_elements(17) 1409 .pooling_tile(9, 8) 1410 .channels(8) 1411 .qmin(128) 1412 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1413 } 1414 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_fulltile_with_qmax)1415 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_fulltile_with_qmax) { 1416 TEST_REQUIRES_X86_AVX2; 1417 AvgPoolMicrokernelTester() 1418 .pooling_elements(17) 1419 .pooling_tile(9, 8) 1420 .channels(8) 1421 .qmax(128) 1422 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1423 } 1424 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_subtile)1425 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_subtile) { 1426 TEST_REQUIRES_X86_AVX2; 1427 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1428 AvgPoolMicrokernelTester() 1429 .pooling_elements(pooling_elements) 1430 .pooling_tile(9, 8) 1431 .channels(8) 1432 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1433 } 1434 } 1435 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_subtile_with_input_offset)1436 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_subtile_with_input_offset) { 1437 TEST_REQUIRES_X86_AVX2; 1438 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1439 AvgPoolMicrokernelTester() 1440 .pooling_elements(pooling_elements) 1441 .pooling_tile(9, 8) 1442 .channels(8) 1443 .input_offset(11) 1444 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1445 } 1446 } 1447 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_twopass_subtile_with_zero)1448 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_twopass_subtile_with_zero) { 1449 TEST_REQUIRES_X86_AVX2; 1450 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1451 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1452 AvgPoolMicrokernelTester() 1453 .pooling_elements(pooling_elements) 1454 .pooling_tile(9, 8) 1455 .channels(8) 1456 .input_offset(11) 1457 .zero_index(zero_index) 1458 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1459 } 1460 } 1461 } 1462 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile)1463 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile) { 1464 TEST_REQUIRES_X86_AVX2; 1465 for (size_t channels = 16; channels < 64; channels += 8) { 1466 AvgPoolMicrokernelTester() 1467 .pooling_elements(17) 1468 .pooling_tile(9, 8) 1469 .channels(channels) 1470 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1471 } 1472 } 1473 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_input_offset)1474 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_input_offset) { 1475 TEST_REQUIRES_X86_AVX2; 1476 for (size_t channels = 16; channels < 64; channels += 8) { 1477 AvgPoolMicrokernelTester() 1478 .pooling_elements(17) 1479 .pooling_tile(9, 8) 1480 .channels(channels) 1481 .input_offset(41) 1482 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1483 } 1484 } 1485 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_zero)1486 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_zero) { 1487 TEST_REQUIRES_X86_AVX2; 1488 for (size_t channels = 16; channels < 64; channels += 8) { 1489 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 1490 AvgPoolMicrokernelTester() 1491 .pooling_elements(17) 1492 .pooling_tile(9, 8) 1493 .channels(channels) 1494 .input_offset(41) 1495 .zero_index(zero_index) 1496 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1497 } 1498 } 1499 } 1500 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_qmin)1501 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_qmin) { 1502 TEST_REQUIRES_X86_AVX2; 1503 for (size_t channels = 16; channels < 64; channels += 8) { 1504 AvgPoolMicrokernelTester() 1505 .pooling_elements(17) 1506 .pooling_tile(9, 8) 1507 .channels(channels) 1508 .qmin(128) 1509 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1510 } 1511 } 1512 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_fulltile_with_qmax)1513 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_fulltile_with_qmax) { 1514 TEST_REQUIRES_X86_AVX2; 1515 for (size_t channels = 16; channels < 64; channels += 8) { 1516 AvgPoolMicrokernelTester() 1517 .pooling_elements(17) 1518 .pooling_tile(9, 8) 1519 .channels(channels) 1520 .qmax(128) 1521 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1522 } 1523 } 1524 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_subtile)1525 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_subtile) { 1526 TEST_REQUIRES_X86_AVX2; 1527 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1528 for (size_t channels = 16; channels < 64; channels += 8) { 1529 AvgPoolMicrokernelTester() 1530 .pooling_elements(pooling_elements) 1531 .pooling_tile(9, 8) 1532 .channels(channels) 1533 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1534 } 1535 } 1536 } 1537 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_subtile_with_input_offset)1538 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_subtile_with_input_offset) { 1539 TEST_REQUIRES_X86_AVX2; 1540 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1541 for (size_t channels = 16; channels < 64; channels += 8) { 1542 AvgPoolMicrokernelTester() 1543 .pooling_elements(pooling_elements) 1544 .pooling_tile(9, 8) 1545 .channels(channels) 1546 .input_offset(67) 1547 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1548 } 1549 } 1550 } 1551 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_twopass_subtile_with_zero)1552 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_twopass_subtile_with_zero) { 1553 TEST_REQUIRES_X86_AVX2; 1554 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1555 for (size_t channels = 16; channels < 64; channels += 8) { 1556 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1557 AvgPoolMicrokernelTester() 1558 .pooling_elements(pooling_elements) 1559 .pooling_tile(9, 8) 1560 .channels(channels) 1561 .input_offset(67) 1562 .zero_index(zero_index) 1563 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1564 } 1565 } 1566 } 1567 } 1568 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile)1569 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile) { 1570 TEST_REQUIRES_X86_AVX2; 1571 for (size_t channels = 1; channels < 8; channels++) { 1572 AvgPoolMicrokernelTester() 1573 .pooling_elements(17) 1574 .pooling_tile(9, 8) 1575 .channels(channels) 1576 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1577 } 1578 } 1579 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_input_offset)1580 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_input_offset) { 1581 TEST_REQUIRES_X86_AVX2; 1582 for (size_t channels = 1; channels < 8; channels++) { 1583 AvgPoolMicrokernelTester() 1584 .pooling_elements(17) 1585 .pooling_tile(9, 8) 1586 .channels(channels) 1587 .input_offset(11) 1588 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1589 } 1590 } 1591 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_zero_index)1592 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_zero_index) { 1593 TEST_REQUIRES_X86_AVX2; 1594 for (size_t channels = 1; channels < 8; channels++) { 1595 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 1596 AvgPoolMicrokernelTester() 1597 .pooling_elements(17) 1598 .pooling_tile(9, 8) 1599 .channels(channels) 1600 .input_offset(11) 1601 .zero_index(zero_index) 1602 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1603 } 1604 } 1605 } 1606 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_qmin)1607 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_qmin) { 1608 TEST_REQUIRES_X86_AVX2; 1609 for (size_t channels = 1; channels < 8; channels++) { 1610 AvgPoolMicrokernelTester() 1611 .pooling_elements(17) 1612 .pooling_tile(9, 8) 1613 .channels(channels) 1614 .qmin(128) 1615 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1616 } 1617 } 1618 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_fulltile_with_qmax)1619 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_fulltile_with_qmax) { 1620 TEST_REQUIRES_X86_AVX2; 1621 for (size_t channels = 1; channels < 8; channels++) { 1622 AvgPoolMicrokernelTester() 1623 .pooling_elements(17) 1624 .pooling_tile(9, 8) 1625 .channels(channels) 1626 .qmax(128) 1627 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1628 } 1629 } 1630 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_subtile)1631 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_subtile) { 1632 TEST_REQUIRES_X86_AVX2; 1633 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1634 for (size_t channels = 1; channels < 8; channels++) { 1635 AvgPoolMicrokernelTester() 1636 .pooling_elements(pooling_elements) 1637 .pooling_tile(9, 8) 1638 .channels(channels) 1639 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1640 } 1641 } 1642 } 1643 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_subtile_with_input_offset)1644 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_subtile_with_input_offset) { 1645 TEST_REQUIRES_X86_AVX2; 1646 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1647 for (size_t channels = 1; channels < 8; channels++) { 1648 AvgPoolMicrokernelTester() 1649 .pooling_elements(pooling_elements) 1650 .pooling_tile(9, 8) 1651 .channels(channels) 1652 .input_offset(11) 1653 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1654 } 1655 } 1656 } 1657 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_twopass_subtile_with_zero)1658 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_twopass_subtile_with_zero) { 1659 TEST_REQUIRES_X86_AVX2; 1660 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1661 for (size_t channels = 1; channels < 8; channels++) { 1662 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1663 AvgPoolMicrokernelTester() 1664 .pooling_elements(pooling_elements) 1665 .pooling_tile(9, 8) 1666 .channels(channels) 1667 .input_offset(11) 1668 .zero_index(zero_index) 1669 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1670 } 1671 } 1672 } 1673 } 1674 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile)1675 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile) { 1676 TEST_REQUIRES_X86_AVX2; 1677 for (size_t channels = 9; channels < 16; channels++) { 1678 AvgPoolMicrokernelTester() 1679 .pooling_elements(17) 1680 .pooling_tile(9, 8) 1681 .channels(channels) 1682 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1683 } 1684 } 1685 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_input_offset)1686 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_input_offset) { 1687 TEST_REQUIRES_X86_AVX2; 1688 for (size_t channels = 9; channels < 16; channels++) { 1689 AvgPoolMicrokernelTester() 1690 .pooling_elements(17) 1691 .pooling_tile(9, 8) 1692 .channels(channels) 1693 .input_offset(17) 1694 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1695 } 1696 } 1697 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_zero)1698 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_zero) { 1699 TEST_REQUIRES_X86_AVX2; 1700 for (size_t channels = 9; channels < 16; channels++) { 1701 for (size_t zero_index = 0; zero_index < 17; zero_index++) { 1702 AvgPoolMicrokernelTester() 1703 .pooling_elements(17) 1704 .pooling_tile(9, 8) 1705 .channels(channels) 1706 .input_offset(17) 1707 .zero_index(zero_index) 1708 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1709 } 1710 } 1711 } 1712 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_qmin)1713 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_qmin) { 1714 TEST_REQUIRES_X86_AVX2; 1715 for (size_t channels = 9; channels < 16; channels++) { 1716 AvgPoolMicrokernelTester() 1717 .pooling_elements(17) 1718 .pooling_tile(9, 8) 1719 .channels(channels) 1720 .qmin(128) 1721 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1722 } 1723 } 1724 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_fulltile_with_qmax)1725 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_fulltile_with_qmax) { 1726 TEST_REQUIRES_X86_AVX2; 1727 for (size_t channels = 9; channels < 16; channels++) { 1728 AvgPoolMicrokernelTester() 1729 .pooling_elements(17) 1730 .pooling_tile(9, 8) 1731 .channels(channels) 1732 .qmax(128) 1733 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1734 } 1735 } 1736 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_subtile)1737 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_subtile) { 1738 TEST_REQUIRES_X86_AVX2; 1739 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1740 for (size_t channels = 9; channels < 16; channels++) { 1741 AvgPoolMicrokernelTester() 1742 .pooling_elements(pooling_elements) 1743 .pooling_tile(9, 8) 1744 .channels(channels) 1745 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1746 } 1747 } 1748 } 1749 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_subtile_with_input_offset)1750 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_subtile_with_input_offset) { 1751 TEST_REQUIRES_X86_AVX2; 1752 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1753 for (size_t channels = 9; channels < 16; channels++) { 1754 AvgPoolMicrokernelTester() 1755 .pooling_elements(pooling_elements) 1756 .pooling_tile(9, 8) 1757 .channels(channels) 1758 .input_offset(17) 1759 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1760 } 1761 } 1762 } 1763 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_twopass_subtile_with_zero)1764 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_twopass_subtile_with_zero) { 1765 TEST_REQUIRES_X86_AVX2; 1766 for (size_t pooling_elements = 10; pooling_elements < 17; pooling_elements++) { 1767 for (size_t channels = 9; channels < 16; channels++) { 1768 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1769 AvgPoolMicrokernelTester() 1770 .pooling_elements(pooling_elements) 1771 .pooling_tile(9, 8) 1772 .channels(channels) 1773 .input_offset(17) 1774 .zero_index(zero_index) 1775 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1776 } 1777 } 1778 } 1779 } 1780 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass)1781 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass) { 1782 TEST_REQUIRES_X86_AVX2; 1783 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1784 AvgPoolMicrokernelTester() 1785 .pooling_elements(pooling_elements) 1786 .pooling_tile(9, 8) 1787 .channels(8) 1788 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1789 } 1790 } 1791 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_input_offset)1792 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_input_offset) { 1793 TEST_REQUIRES_X86_AVX2; 1794 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1795 AvgPoolMicrokernelTester() 1796 .pooling_elements(pooling_elements) 1797 .pooling_tile(9, 8) 1798 .channels(8) 1799 .input_offset(11) 1800 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1801 } 1802 } 1803 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_zero)1804 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_zero) { 1805 TEST_REQUIRES_X86_AVX2; 1806 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1807 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1808 AvgPoolMicrokernelTester() 1809 .pooling_elements(pooling_elements) 1810 .pooling_tile(9, 8) 1811 .channels(8) 1812 .input_offset(11) 1813 .zero_index(zero_index) 1814 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1815 } 1816 } 1817 } 1818 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_qmin)1819 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_qmin) { 1820 TEST_REQUIRES_X86_AVX2; 1821 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1822 AvgPoolMicrokernelTester() 1823 .pooling_elements(pooling_elements) 1824 .pooling_tile(9, 8) 1825 .channels(8) 1826 .qmin(128) 1827 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1828 } 1829 } 1830 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_eq_8_multipass_with_qmax)1831 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_eq_8_multipass_with_qmax) { 1832 TEST_REQUIRES_X86_AVX2; 1833 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1834 AvgPoolMicrokernelTester() 1835 .pooling_elements(pooling_elements) 1836 .pooling_tile(9, 8) 1837 .channels(8) 1838 .qmax(128) 1839 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1840 } 1841 } 1842 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass)1843 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass) { 1844 TEST_REQUIRES_X86_AVX2; 1845 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1846 for (size_t channels = 16; channels < 64; channels += 8) { 1847 AvgPoolMicrokernelTester() 1848 .pooling_elements(pooling_elements) 1849 .pooling_tile(9, 8) 1850 .channels(channels) 1851 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1852 } 1853 } 1854 } 1855 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_input_offset)1856 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_input_offset) { 1857 TEST_REQUIRES_X86_AVX2; 1858 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1859 for (size_t channels = 16; channels < 64; channels += 8) { 1860 AvgPoolMicrokernelTester() 1861 .pooling_elements(pooling_elements) 1862 .pooling_tile(9, 8) 1863 .channels(channels) 1864 .input_offset(67) 1865 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1866 } 1867 } 1868 } 1869 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_zero)1870 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_zero) { 1871 TEST_REQUIRES_X86_AVX2; 1872 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1873 for (size_t channels = 16; channels < 64; channels += 8) { 1874 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1875 AvgPoolMicrokernelTester() 1876 .pooling_elements(pooling_elements) 1877 .pooling_tile(9, 8) 1878 .channels(channels) 1879 .input_offset(67) 1880 .zero_index(zero_index) 1881 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1882 } 1883 } 1884 } 1885 } 1886 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_qmin)1887 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_qmin) { 1888 TEST_REQUIRES_X86_AVX2; 1889 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1890 for (size_t channels = 16; channels < 64; channels += 8) { 1891 AvgPoolMicrokernelTester() 1892 .pooling_elements(pooling_elements) 1893 .pooling_tile(9, 8) 1894 .channels(channels) 1895 .qmin(128) 1896 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1897 } 1898 } 1899 } 1900 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_div_8_multipass_with_qmax)1901 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_div_8_multipass_with_qmax) { 1902 TEST_REQUIRES_X86_AVX2; 1903 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1904 for (size_t channels = 16; channels < 64; channels += 8) { 1905 AvgPoolMicrokernelTester() 1906 .pooling_elements(pooling_elements) 1907 .pooling_tile(9, 8) 1908 .channels(channels) 1909 .qmax(128) 1910 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1911 } 1912 } 1913 } 1914 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass)1915 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass) { 1916 TEST_REQUIRES_X86_AVX2; 1917 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1918 for (size_t channels = 1; channels < 8; channels++) { 1919 AvgPoolMicrokernelTester() 1920 .pooling_elements(pooling_elements) 1921 .pooling_tile(9, 8) 1922 .channels(channels) 1923 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1924 } 1925 } 1926 } 1927 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_input_offset)1928 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_input_offset) { 1929 TEST_REQUIRES_X86_AVX2; 1930 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1931 for (size_t channels = 1; channels < 8; channels++) { 1932 AvgPoolMicrokernelTester() 1933 .pooling_elements(pooling_elements) 1934 .pooling_tile(9, 8) 1935 .channels(channels) 1936 .input_offset(8) 1937 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1938 } 1939 } 1940 } 1941 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_zero)1942 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_zero) { 1943 TEST_REQUIRES_X86_AVX2; 1944 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1945 for (size_t channels = 1; channels < 8; channels++) { 1946 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 1947 AvgPoolMicrokernelTester() 1948 .pooling_elements(pooling_elements) 1949 .pooling_tile(9, 8) 1950 .channels(channels) 1951 .input_offset(8) 1952 .zero_index(zero_index) 1953 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1954 } 1955 } 1956 } 1957 } 1958 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_qmin)1959 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_qmin) { 1960 TEST_REQUIRES_X86_AVX2; 1961 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1962 for (size_t channels = 1; channels < 8; channels++) { 1963 AvgPoolMicrokernelTester() 1964 .pooling_elements(pooling_elements) 1965 .pooling_tile(9, 8) 1966 .channels(channels) 1967 .qmin(128) 1968 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1969 } 1970 } 1971 } 1972 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_lt_8_multipass_with_qmax)1973 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_lt_8_multipass_with_qmax) { 1974 TEST_REQUIRES_X86_AVX2; 1975 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1976 for (size_t channels = 1; channels < 8; channels++) { 1977 AvgPoolMicrokernelTester() 1978 .pooling_elements(pooling_elements) 1979 .pooling_tile(9, 8) 1980 .channels(channels) 1981 .qmax(128) 1982 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1983 } 1984 } 1985 } 1986 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass)1987 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass) { 1988 TEST_REQUIRES_X86_AVX2; 1989 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 1990 for (size_t channels = 9; channels < 16; channels++) { 1991 AvgPoolMicrokernelTester() 1992 .pooling_elements(pooling_elements) 1993 .pooling_tile(9, 8) 1994 .channels(channels) 1995 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 1996 } 1997 } 1998 } 1999 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_input_offset)2000 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_input_offset) { 2001 TEST_REQUIRES_X86_AVX2; 2002 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 2003 for (size_t channels = 9; channels < 16; channels++) { 2004 AvgPoolMicrokernelTester() 2005 .pooling_elements(pooling_elements) 2006 .pooling_tile(9, 8) 2007 .channels(channels) 2008 .input_offset(17) 2009 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2010 } 2011 } 2012 } 2013 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_zero)2014 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_zero) { 2015 TEST_REQUIRES_X86_AVX2; 2016 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 2017 for (size_t channels = 9; channels < 16; channels++) { 2018 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 2019 AvgPoolMicrokernelTester() 2020 .pooling_elements(pooling_elements) 2021 .pooling_tile(9, 8) 2022 .channels(channels) 2023 .input_offset(17) 2024 .zero_index(zero_index) 2025 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2026 } 2027 } 2028 } 2029 } 2030 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_qmin)2031 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_qmin) { 2032 TEST_REQUIRES_X86_AVX2; 2033 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 2034 for (size_t channels = 9; channels < 16; channels++) { 2035 AvgPoolMicrokernelTester() 2036 .pooling_elements(pooling_elements) 2037 .pooling_tile(9, 8) 2038 .channels(channels) 2039 .qmin(128) 2040 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2041 } 2042 } 2043 } 2044 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,channels_gt_8_multipass_with_qmax)2045 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, channels_gt_8_multipass_with_qmax) { 2046 TEST_REQUIRES_X86_AVX2; 2047 for (size_t pooling_elements = 18; pooling_elements <= 33; pooling_elements += 3) { 2048 for (size_t channels = 9; channels < 16; channels++) { 2049 AvgPoolMicrokernelTester() 2050 .pooling_elements(pooling_elements) 2051 .pooling_tile(9, 8) 2052 .channels(channels) 2053 .qmax(128) 2054 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2055 } 2056 } 2057 } 2058 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels)2059 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels) { 2060 TEST_REQUIRES_X86_AVX2; 2061 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2062 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 2063 for (size_t channels = 1; channels <= 40; channels += 7) { 2064 AvgPoolMicrokernelTester() 2065 .output_pixels(output_pixels) 2066 .pooling_elements(pooling_elements) 2067 .pooling_tile(9, 8) 2068 .channels(channels) 2069 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2070 } 2071 } 2072 } 2073 } 2074 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_input_offset)2075 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_input_offset) { 2076 TEST_REQUIRES_X86_AVX2; 2077 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2078 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 2079 for (size_t channels = 1; channels <= 40; channels += 7) { 2080 AvgPoolMicrokernelTester() 2081 .output_pixels(output_pixels) 2082 .pooling_elements(pooling_elements) 2083 .pooling_tile(9, 8) 2084 .channels(channels) 2085 .input_offset(43) 2086 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2087 } 2088 } 2089 } 2090 } 2091 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_zero)2092 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_zero) { 2093 TEST_REQUIRES_X86_AVX2; 2094 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2095 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 2096 for (size_t channels = 1; channels <= 40; channels += 7) { 2097 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 2098 AvgPoolMicrokernelTester() 2099 .output_pixels(output_pixels) 2100 .pooling_elements(pooling_elements) 2101 .pooling_tile(9, 8) 2102 .channels(channels) 2103 .input_offset(43) 2104 .zero_index(zero_index) 2105 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2106 } 2107 } 2108 } 2109 } 2110 } 2111 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_qmin)2112 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_qmin) { 2113 TEST_REQUIRES_X86_AVX2; 2114 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2115 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 2116 for (size_t channels = 1; channels <= 40; channels += 7) { 2117 AvgPoolMicrokernelTester() 2118 .output_pixels(output_pixels) 2119 .pooling_elements(pooling_elements) 2120 .pooling_tile(9, 8) 2121 .channels(channels) 2122 .qmin(128) 2123 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2124 } 2125 } 2126 } 2127 } 2128 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_qmax)2129 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_qmax) { 2130 TEST_REQUIRES_X86_AVX2; 2131 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2132 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 2133 for (size_t channels = 1; channels <= 40; channels += 7) { 2134 AvgPoolMicrokernelTester() 2135 .output_pixels(output_pixels) 2136 .pooling_elements(pooling_elements) 2137 .pooling_tile(9, 8) 2138 .channels(channels) 2139 .qmax(128) 2140 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2141 } 2142 } 2143 } 2144 } 2145 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_output_stride)2146 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_output_stride) { 2147 TEST_REQUIRES_X86_AVX2; 2148 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2149 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 2150 for (size_t channels = 1; channels <= 40; channels += 7) { 2151 AvgPoolMicrokernelTester() 2152 .output_pixels(output_pixels) 2153 .pooling_elements(pooling_elements) 2154 .pooling_tile(9, 8) 2155 .channels(channels) 2156 .output_stride(43) 2157 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2158 } 2159 } 2160 } 2161 } 2162 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8,few_output_pixels_with_step)2163 TEST(F16_PAVGPOOL_MINMAX_9P8X__AVX2_C8, few_output_pixels_with_step) { 2164 TEST_REQUIRES_X86_AVX2; 2165 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2166 for (size_t pooling_elements : std::vector<size_t>{{10, 16, 18}}) { 2167 for (size_t channels = 1; channels <= 40; channels += 7) { 2168 for (size_t step = 2; step <= pooling_elements; step++) { 2169 AvgPoolMicrokernelTester() 2170 .output_pixels(output_pixels) 2171 .pooling_elements(pooling_elements) 2172 .pooling_tile(9, 8) 2173 .step(step) 2174 .channels(channels) 2175 .output_stride(43) 2176 .Test(xnn_f16_pavgpool_minmax_ukernel_9p8x__avx2_c8, xnn_init_f16_minmax_avx_params); 2177 } 2178 } 2179 } 2180 } 2181 } 2182 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 2183 2184 2185 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile)2186 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile) { 2187 TEST_REQUIRES_X86_AVX2; 2188 AvgPoolMicrokernelTester() 2189 .pooling_elements(9) 2190 .pooling_tile(9) 2191 .channels(8) 2192 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2193 } 2194 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_input_offset)2195 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_input_offset) { 2196 TEST_REQUIRES_X86_AVX2; 2197 AvgPoolMicrokernelTester() 2198 .pooling_elements(9) 2199 .pooling_tile(9) 2200 .channels(8) 2201 .input_offset(11) 2202 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2203 } 2204 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_zero)2205 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_zero) { 2206 TEST_REQUIRES_X86_AVX2; 2207 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 2208 AvgPoolMicrokernelTester() 2209 .pooling_elements(9) 2210 .pooling_tile(9) 2211 .channels(8) 2212 .input_offset(11) 2213 .zero_index(zero_index) 2214 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2215 } 2216 } 2217 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_qmin)2218 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_qmin) { 2219 TEST_REQUIRES_X86_AVX2; 2220 AvgPoolMicrokernelTester() 2221 .pooling_elements(9) 2222 .pooling_tile(9) 2223 .channels(8) 2224 .qmin(128) 2225 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2226 } 2227 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_fulltile_with_qmax)2228 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_fulltile_with_qmax) { 2229 TEST_REQUIRES_X86_AVX2; 2230 AvgPoolMicrokernelTester() 2231 .pooling_elements(9) 2232 .pooling_tile(9) 2233 .channels(8) 2234 .qmax(128) 2235 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2236 } 2237 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_subtile)2238 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_subtile) { 2239 TEST_REQUIRES_X86_AVX2; 2240 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2241 AvgPoolMicrokernelTester() 2242 .pooling_elements(pooling_elements) 2243 .pooling_tile(9) 2244 .channels(8) 2245 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2246 } 2247 } 2248 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_subtile_with_input_offset)2249 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_subtile_with_input_offset) { 2250 TEST_REQUIRES_X86_AVX2; 2251 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2252 AvgPoolMicrokernelTester() 2253 .pooling_elements(pooling_elements) 2254 .pooling_tile(9) 2255 .channels(8) 2256 .input_offset(11) 2257 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2258 } 2259 } 2260 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_eq_8_unipass_subtile_with_zero)2261 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_eq_8_unipass_subtile_with_zero) { 2262 TEST_REQUIRES_X86_AVX2; 2263 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2264 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 2265 AvgPoolMicrokernelTester() 2266 .pooling_elements(pooling_elements) 2267 .pooling_tile(9) 2268 .channels(8) 2269 .input_offset(11) 2270 .zero_index(zero_index) 2271 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2272 } 2273 } 2274 } 2275 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile)2276 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile) { 2277 TEST_REQUIRES_X86_AVX2; 2278 for (size_t channels = 16; channels < 64; channels += 8) { 2279 AvgPoolMicrokernelTester() 2280 .pooling_elements(9) 2281 .pooling_tile(9) 2282 .channels(channels) 2283 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2284 } 2285 } 2286 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_input_offset)2287 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_input_offset) { 2288 TEST_REQUIRES_X86_AVX2; 2289 for (size_t channels = 16; channels < 64; channels += 8) { 2290 AvgPoolMicrokernelTester() 2291 .pooling_elements(9) 2292 .pooling_tile(9) 2293 .channels(channels) 2294 .input_offset(67) 2295 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2296 } 2297 } 2298 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_zero)2299 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_zero) { 2300 TEST_REQUIRES_X86_AVX2; 2301 for (size_t channels = 16; channels < 64; channels += 8) { 2302 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 2303 AvgPoolMicrokernelTester() 2304 .pooling_elements(9) 2305 .pooling_tile(9) 2306 .channels(channels) 2307 .input_offset(67) 2308 .zero_index(zero_index) 2309 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2310 } 2311 } 2312 } 2313 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_qmin)2314 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_qmin) { 2315 TEST_REQUIRES_X86_AVX2; 2316 for (size_t channels = 16; channels < 64; channels += 8) { 2317 AvgPoolMicrokernelTester() 2318 .pooling_elements(9) 2319 .pooling_tile(9) 2320 .channels(channels) 2321 .qmin(128) 2322 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2323 } 2324 } 2325 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_fulltile_with_qmax)2326 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_fulltile_with_qmax) { 2327 TEST_REQUIRES_X86_AVX2; 2328 for (size_t channels = 16; channels < 64; channels += 8) { 2329 AvgPoolMicrokernelTester() 2330 .pooling_elements(9) 2331 .pooling_tile(9) 2332 .channels(channels) 2333 .qmax(128) 2334 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2335 } 2336 } 2337 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_subtile)2338 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_subtile) { 2339 TEST_REQUIRES_X86_AVX2; 2340 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2341 for (size_t channels = 16; channels < 64; channels += 8) { 2342 AvgPoolMicrokernelTester() 2343 .pooling_elements(pooling_elements) 2344 .pooling_tile(9) 2345 .channels(channels) 2346 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2347 } 2348 } 2349 } 2350 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_subtile_with_input_offset)2351 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_subtile_with_input_offset) { 2352 TEST_REQUIRES_X86_AVX2; 2353 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2354 for (size_t channels = 16; channels < 64; channels += 8) { 2355 AvgPoolMicrokernelTester() 2356 .pooling_elements(pooling_elements) 2357 .pooling_tile(9) 2358 .channels(channels) 2359 .input_offset(67) 2360 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2361 } 2362 } 2363 } 2364 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_div_8_unipass_subtile_with_zero)2365 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_div_8_unipass_subtile_with_zero) { 2366 TEST_REQUIRES_X86_AVX2; 2367 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2368 for (size_t channels = 16; channels < 64; channels += 8) { 2369 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 2370 AvgPoolMicrokernelTester() 2371 .pooling_elements(pooling_elements) 2372 .pooling_tile(9) 2373 .channels(channels) 2374 .input_offset(67) 2375 .zero_index(zero_index) 2376 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2377 } 2378 } 2379 } 2380 } 2381 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile)2382 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile) { 2383 TEST_REQUIRES_X86_AVX2; 2384 for (size_t channels = 1; channels < 8; channels++) { 2385 AvgPoolMicrokernelTester() 2386 .pooling_elements(9) 2387 .pooling_tile(9) 2388 .channels(channels) 2389 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2390 } 2391 } 2392 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_input_offset)2393 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_input_offset) { 2394 TEST_REQUIRES_X86_AVX2; 2395 for (size_t channels = 1; channels < 8; channels++) { 2396 AvgPoolMicrokernelTester() 2397 .pooling_elements(9) 2398 .pooling_tile(9) 2399 .channels(channels) 2400 .input_offset(11) 2401 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2402 } 2403 } 2404 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_zero)2405 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_zero) { 2406 TEST_REQUIRES_X86_AVX2; 2407 for (size_t channels = 1; channels < 8; channels++) { 2408 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 2409 AvgPoolMicrokernelTester() 2410 .pooling_elements(9) 2411 .pooling_tile(9) 2412 .channels(channels) 2413 .input_offset(11) 2414 .zero_index(zero_index) 2415 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2416 } 2417 } 2418 } 2419 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_qmin)2420 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_qmin) { 2421 TEST_REQUIRES_X86_AVX2; 2422 for (size_t channels = 1; channels < 8; channels++) { 2423 AvgPoolMicrokernelTester() 2424 .pooling_elements(9) 2425 .pooling_tile(9) 2426 .channels(channels) 2427 .qmin(128) 2428 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2429 } 2430 } 2431 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_fulltile_with_qmax)2432 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_fulltile_with_qmax) { 2433 TEST_REQUIRES_X86_AVX2; 2434 for (size_t channels = 1; channels < 8; channels++) { 2435 AvgPoolMicrokernelTester() 2436 .pooling_elements(9) 2437 .pooling_tile(9) 2438 .channels(channels) 2439 .qmax(128) 2440 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2441 } 2442 } 2443 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_subtile)2444 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_subtile) { 2445 TEST_REQUIRES_X86_AVX2; 2446 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2447 for (size_t channels = 1; channels < 8; channels++) { 2448 AvgPoolMicrokernelTester() 2449 .pooling_elements(pooling_elements) 2450 .pooling_tile(9) 2451 .channels(channels) 2452 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2453 } 2454 } 2455 } 2456 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_subtile_with_input_offset)2457 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_subtile_with_input_offset) { 2458 TEST_REQUIRES_X86_AVX2; 2459 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2460 for (size_t channels = 1; channels < 8; channels++) { 2461 AvgPoolMicrokernelTester() 2462 .pooling_elements(pooling_elements) 2463 .pooling_tile(9) 2464 .channels(channels) 2465 .input_offset(11) 2466 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2467 } 2468 } 2469 } 2470 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_lt_8_unipass_subtile_with_zero)2471 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_lt_8_unipass_subtile_with_zero) { 2472 TEST_REQUIRES_X86_AVX2; 2473 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2474 for (size_t channels = 1; channels < 8; channels++) { 2475 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 2476 AvgPoolMicrokernelTester() 2477 .pooling_elements(pooling_elements) 2478 .pooling_tile(9) 2479 .channels(channels) 2480 .input_offset(11) 2481 .zero_index(zero_index) 2482 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2483 } 2484 } 2485 } 2486 } 2487 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile)2488 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile) { 2489 TEST_REQUIRES_X86_AVX2; 2490 for (size_t channels = 9; channels < 16; channels++) { 2491 AvgPoolMicrokernelTester() 2492 .pooling_elements(9) 2493 .pooling_tile(9) 2494 .channels(channels) 2495 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2496 } 2497 } 2498 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_input_offset)2499 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_input_offset) { 2500 TEST_REQUIRES_X86_AVX2; 2501 for (size_t channels = 9; channels < 16; channels++) { 2502 AvgPoolMicrokernelTester() 2503 .pooling_elements(9) 2504 .pooling_tile(9) 2505 .channels(channels) 2506 .input_offset(17) 2507 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2508 } 2509 } 2510 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_zero)2511 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_zero) { 2512 TEST_REQUIRES_X86_AVX2; 2513 for (size_t channels = 9; channels < 16; channels++) { 2514 for (size_t zero_index = 0; zero_index < 9; zero_index++) { 2515 AvgPoolMicrokernelTester() 2516 .pooling_elements(9) 2517 .pooling_tile(9) 2518 .channels(channels) 2519 .input_offset(17) 2520 .zero_index(zero_index) 2521 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2522 } 2523 } 2524 } 2525 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_qmin)2526 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_qmin) { 2527 TEST_REQUIRES_X86_AVX2; 2528 for (size_t channels = 9; channels < 16; channels++) { 2529 AvgPoolMicrokernelTester() 2530 .pooling_elements(9) 2531 .pooling_tile(9) 2532 .channels(channels) 2533 .qmin(128) 2534 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2535 } 2536 } 2537 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_fulltile_with_qmax)2538 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_fulltile_with_qmax) { 2539 TEST_REQUIRES_X86_AVX2; 2540 for (size_t channels = 9; channels < 16; channels++) { 2541 AvgPoolMicrokernelTester() 2542 .pooling_elements(9) 2543 .pooling_tile(9) 2544 .channels(channels) 2545 .qmax(128) 2546 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2547 } 2548 } 2549 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_subtile)2550 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_subtile) { 2551 TEST_REQUIRES_X86_AVX2; 2552 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2553 for (size_t channels = 9; channels < 16; channels++) { 2554 AvgPoolMicrokernelTester() 2555 .pooling_elements(pooling_elements) 2556 .pooling_tile(9) 2557 .channels(channels) 2558 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2559 } 2560 } 2561 } 2562 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_subtile_with_input_offset)2563 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_subtile_with_input_offset) { 2564 TEST_REQUIRES_X86_AVX2; 2565 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2566 for (size_t channels = 9; channels < 16; channels++) { 2567 AvgPoolMicrokernelTester() 2568 .pooling_elements(pooling_elements) 2569 .pooling_tile(9) 2570 .channels(channels) 2571 .input_offset(17) 2572 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2573 } 2574 } 2575 } 2576 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,channels_gt_8_unipass_subtile_with_zero)2577 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, channels_gt_8_unipass_subtile_with_zero) { 2578 TEST_REQUIRES_X86_AVX2; 2579 for (size_t pooling_elements = 2; pooling_elements < 9; pooling_elements++) { 2580 for (size_t channels = 9; channels < 16; channels++) { 2581 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 2582 AvgPoolMicrokernelTester() 2583 .pooling_elements(pooling_elements) 2584 .pooling_tile(9) 2585 .channels(channels) 2586 .input_offset(17) 2587 .zero_index(zero_index) 2588 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2589 } 2590 } 2591 } 2592 } 2593 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels)2594 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels) { 2595 TEST_REQUIRES_X86_AVX2; 2596 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2597 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 2598 for (size_t channels = 1; channels <= 40; channels += 7) { 2599 AvgPoolMicrokernelTester() 2600 .output_pixels(output_pixels) 2601 .pooling_elements(pooling_elements) 2602 .pooling_tile(9, 0) 2603 .channels(channels) 2604 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2605 } 2606 } 2607 } 2608 } 2609 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_input_offset)2610 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_input_offset) { 2611 TEST_REQUIRES_X86_AVX2; 2612 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2613 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 2614 for (size_t channels = 1; channels <= 40; channels += 7) { 2615 AvgPoolMicrokernelTester() 2616 .output_pixels(output_pixels) 2617 .pooling_elements(pooling_elements) 2618 .pooling_tile(9, 0) 2619 .channels(channels) 2620 .input_offset(43) 2621 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2622 } 2623 } 2624 } 2625 } 2626 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_zero)2627 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_zero) { 2628 TEST_REQUIRES_X86_AVX2; 2629 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2630 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 2631 for (size_t channels = 1; channels <= 40; channels += 7) { 2632 for (size_t zero_index = 0; zero_index < pooling_elements; zero_index++) { 2633 AvgPoolMicrokernelTester() 2634 .output_pixels(output_pixels) 2635 .pooling_elements(pooling_elements) 2636 .pooling_tile(9, 0) 2637 .channels(channels) 2638 .input_offset(43) 2639 .zero_index(zero_index) 2640 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2641 } 2642 } 2643 } 2644 } 2645 } 2646 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_qmin)2647 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_qmin) { 2648 TEST_REQUIRES_X86_AVX2; 2649 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2650 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 2651 for (size_t channels = 1; channels <= 40; channels += 7) { 2652 AvgPoolMicrokernelTester() 2653 .output_pixels(output_pixels) 2654 .pooling_elements(pooling_elements) 2655 .pooling_tile(9, 0) 2656 .channels(channels) 2657 .qmin(128) 2658 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2659 } 2660 } 2661 } 2662 } 2663 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_qmax)2664 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_qmax) { 2665 TEST_REQUIRES_X86_AVX2; 2666 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2667 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 2668 for (size_t channels = 1; channels <= 40; channels += 7) { 2669 AvgPoolMicrokernelTester() 2670 .output_pixels(output_pixels) 2671 .pooling_elements(pooling_elements) 2672 .pooling_tile(9, 0) 2673 .channels(channels) 2674 .qmax(128) 2675 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2676 } 2677 } 2678 } 2679 } 2680 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_output_stride)2681 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_output_stride) { 2682 TEST_REQUIRES_X86_AVX2; 2683 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2684 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 2685 for (size_t channels = 1; channels <= 40; channels += 7) { 2686 AvgPoolMicrokernelTester() 2687 .output_pixels(output_pixels) 2688 .pooling_elements(pooling_elements) 2689 .pooling_tile(9, 0) 2690 .channels(channels) 2691 .output_stride(43) 2692 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2693 } 2694 } 2695 } 2696 } 2697 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8,few_output_pixels_with_step)2698 TEST(F16_PAVGPOOL_MINMAX_9X__AVX2_C8, few_output_pixels_with_step) { 2699 TEST_REQUIRES_X86_AVX2; 2700 for (size_t output_pixels = 2; output_pixels <= 5; output_pixels++) { 2701 for (size_t pooling_elements : std::vector<size_t>{{2, 8, 9}}) { 2702 for (size_t channels = 1; channels <= 40; channels += 7) { 2703 for (size_t step = 2; step <= pooling_elements; step++) { 2704 AvgPoolMicrokernelTester() 2705 .output_pixels(output_pixels) 2706 .pooling_elements(pooling_elements) 2707 .pooling_tile(9, 0) 2708 .step(step) 2709 .channels(channels) 2710 .output_stride(43) 2711 .Test(xnn_f16_pavgpool_minmax_ukernel_9x__avx2_c8, xnn_init_f16_minmax_avx_params); 2712 } 2713 } 2714 } 2715 } 2716 } 2717 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 2718