1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 // 6 // Auto-generated file. Do not edit! 7 // Specification: test/f16-prelu.yaml 8 // Generator: tools/generate-prelu-test.py 9 10 11 #include <gtest/gtest.h> 12 13 #include <xnnpack/common.h> 14 #include <xnnpack/isa-checks.h> 15 16 #include <xnnpack/prelu.h> 17 #include "prelu-microkernel-tester.h" 18 19 20 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_eq_8)21 TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_eq_8) { 22 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 23 PReLUMicrokernelTester() 24 .rows(2) 25 .channels(8) 26 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 27 } 28 TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_div_8)29 TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_div_8) { 30 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 31 for (size_t channels = 16; channels < 80; channels += 8) { 32 PReLUMicrokernelTester() 33 .rows(2) 34 .channels(channels) 35 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 36 } 37 } 38 TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_lt_8)39 TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_lt_8) { 40 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 41 for (size_t channels = 1; channels < 8; channels++) { 42 PReLUMicrokernelTester() 43 .rows(2) 44 .channels(channels) 45 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 46 } 47 } 48 TEST(F16_PRELU__NEONFP16ARITH_2X8,channels_gt_8)49 TEST(F16_PRELU__NEONFP16ARITH_2X8, channels_gt_8) { 50 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 51 for (size_t channels = 9; channels < 16; channels++) { 52 PReLUMicrokernelTester() 53 .rows(2) 54 .channels(channels) 55 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 56 } 57 } 58 TEST(F16_PRELU__NEONFP16ARITH_2X8,rows_lt_2)59 TEST(F16_PRELU__NEONFP16ARITH_2X8, rows_lt_2) { 60 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 61 for (size_t rows = 1; rows < 2; rows++) { 62 for (size_t channels = 1; channels <= 40; channels += 7) { 63 PReLUMicrokernelTester() 64 .rows(rows) 65 .channels(channels) 66 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 67 } 68 } 69 } 70 TEST(F16_PRELU__NEONFP16ARITH_2X8,rows_div_2)71 TEST(F16_PRELU__NEONFP16ARITH_2X8, rows_div_2) { 72 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 73 for (size_t rows = 4; rows <= 8; rows += 2) { 74 for (size_t channels = 1; channels <= 40; channels += 7) { 75 PReLUMicrokernelTester() 76 .rows(rows) 77 .channels(channels) 78 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 79 } 80 } 81 } 82 TEST(F16_PRELU__NEONFP16ARITH_2X8,rows_gt_2)83 TEST(F16_PRELU__NEONFP16ARITH_2X8, rows_gt_2) { 84 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 85 for (size_t rows = 3; rows < 4; rows++) { 86 for (size_t channels = 1; channels <= 40; channels += 7) { 87 PReLUMicrokernelTester() 88 .rows(rows) 89 .channels(channels) 90 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 91 } 92 } 93 } 94 TEST(F16_PRELU__NEONFP16ARITH_2X8,input_stride)95 TEST(F16_PRELU__NEONFP16ARITH_2X8, input_stride) { 96 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 97 for (size_t rows = 1; rows <= 6; rows += 1) { 98 for (size_t channels = 1; channels <= 40; channels += 7) { 99 PReLUMicrokernelTester() 100 .rows(rows) 101 .channels(channels) 102 .input_stride(43) 103 .iterations(1) 104 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 105 } 106 } 107 } 108 TEST(F16_PRELU__NEONFP16ARITH_2X8,output_stride)109 TEST(F16_PRELU__NEONFP16ARITH_2X8, output_stride) { 110 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 111 for (size_t rows = 1; rows <= 6; rows += 1) { 112 for (size_t channels = 1; channels <= 40; channels += 7) { 113 PReLUMicrokernelTester() 114 .rows(rows) 115 .channels(channels) 116 .output_stride(43) 117 .iterations(1) 118 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 119 } 120 } 121 } 122 TEST(F16_PRELU__NEONFP16ARITH_2X8,inplace)123 TEST(F16_PRELU__NEONFP16ARITH_2X8, inplace) { 124 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 125 for (size_t rows = 1; rows <= 6; rows += 1) { 126 for (size_t channels = 1; channels <= 40; channels += 7) { 127 PReLUMicrokernelTester() 128 .rows(rows) 129 .channels(channels) 130 .inplace(true) 131 .iterations(1) 132 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x8); 133 } 134 } 135 } 136 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 137 138 139 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_eq_16)140 TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_eq_16) { 141 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 142 PReLUMicrokernelTester() 143 .rows(2) 144 .channels(16) 145 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 146 } 147 TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_div_16)148 TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_div_16) { 149 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 150 for (size_t channels = 32; channels < 160; channels += 16) { 151 PReLUMicrokernelTester() 152 .rows(2) 153 .channels(channels) 154 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 155 } 156 } 157 TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_lt_16)158 TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_lt_16) { 159 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 160 for (size_t channels = 1; channels < 16; channels++) { 161 PReLUMicrokernelTester() 162 .rows(2) 163 .channels(channels) 164 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 165 } 166 } 167 TEST(F16_PRELU__NEONFP16ARITH_2X16,channels_gt_16)168 TEST(F16_PRELU__NEONFP16ARITH_2X16, channels_gt_16) { 169 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 170 for (size_t channels = 17; channels < 32; channels++) { 171 PReLUMicrokernelTester() 172 .rows(2) 173 .channels(channels) 174 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 175 } 176 } 177 TEST(F16_PRELU__NEONFP16ARITH_2X16,rows_lt_2)178 TEST(F16_PRELU__NEONFP16ARITH_2X16, rows_lt_2) { 179 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 180 for (size_t rows = 1; rows < 2; rows++) { 181 for (size_t channels = 1; channels <= 80; channels += 15) { 182 PReLUMicrokernelTester() 183 .rows(rows) 184 .channels(channels) 185 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 186 } 187 } 188 } 189 TEST(F16_PRELU__NEONFP16ARITH_2X16,rows_div_2)190 TEST(F16_PRELU__NEONFP16ARITH_2X16, rows_div_2) { 191 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 192 for (size_t rows = 4; rows <= 8; rows += 2) { 193 for (size_t channels = 1; channels <= 80; channels += 15) { 194 PReLUMicrokernelTester() 195 .rows(rows) 196 .channels(channels) 197 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 198 } 199 } 200 } 201 TEST(F16_PRELU__NEONFP16ARITH_2X16,rows_gt_2)202 TEST(F16_PRELU__NEONFP16ARITH_2X16, rows_gt_2) { 203 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 204 for (size_t rows = 3; rows < 4; rows++) { 205 for (size_t channels = 1; channels <= 80; channels += 15) { 206 PReLUMicrokernelTester() 207 .rows(rows) 208 .channels(channels) 209 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 210 } 211 } 212 } 213 TEST(F16_PRELU__NEONFP16ARITH_2X16,input_stride)214 TEST(F16_PRELU__NEONFP16ARITH_2X16, input_stride) { 215 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 216 for (size_t rows = 1; rows <= 6; rows += 1) { 217 for (size_t channels = 1; channels <= 80; channels += 15) { 218 PReLUMicrokernelTester() 219 .rows(rows) 220 .channels(channels) 221 .input_stride(83) 222 .iterations(1) 223 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 224 } 225 } 226 } 227 TEST(F16_PRELU__NEONFP16ARITH_2X16,output_stride)228 TEST(F16_PRELU__NEONFP16ARITH_2X16, output_stride) { 229 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 230 for (size_t rows = 1; rows <= 6; rows += 1) { 231 for (size_t channels = 1; channels <= 80; channels += 15) { 232 PReLUMicrokernelTester() 233 .rows(rows) 234 .channels(channels) 235 .output_stride(83) 236 .iterations(1) 237 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 238 } 239 } 240 } 241 TEST(F16_PRELU__NEONFP16ARITH_2X16,inplace)242 TEST(F16_PRELU__NEONFP16ARITH_2X16, inplace) { 243 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 244 for (size_t rows = 1; rows <= 6; rows += 1) { 245 for (size_t channels = 1; channels <= 80; channels += 15) { 246 PReLUMicrokernelTester() 247 .rows(rows) 248 .channels(channels) 249 .inplace(true) 250 .iterations(1) 251 .Test(xnn_f16_prelu_ukernel__neonfp16arith_2x16); 252 } 253 } 254 } 255 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 256 257 258 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_PRELU__F16C_2X8,channels_eq_8)259 TEST(F16_PRELU__F16C_2X8, channels_eq_8) { 260 TEST_REQUIRES_X86_F16C; 261 PReLUMicrokernelTester() 262 .rows(2) 263 .channels(8) 264 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 265 } 266 TEST(F16_PRELU__F16C_2X8,channels_div_8)267 TEST(F16_PRELU__F16C_2X8, channels_div_8) { 268 TEST_REQUIRES_X86_F16C; 269 for (size_t channels = 16; channels < 80; channels += 8) { 270 PReLUMicrokernelTester() 271 .rows(2) 272 .channels(channels) 273 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 274 } 275 } 276 TEST(F16_PRELU__F16C_2X8,channels_lt_8)277 TEST(F16_PRELU__F16C_2X8, channels_lt_8) { 278 TEST_REQUIRES_X86_F16C; 279 for (size_t channels = 1; channels < 8; channels++) { 280 PReLUMicrokernelTester() 281 .rows(2) 282 .channels(channels) 283 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 284 } 285 } 286 TEST(F16_PRELU__F16C_2X8,channels_gt_8)287 TEST(F16_PRELU__F16C_2X8, channels_gt_8) { 288 TEST_REQUIRES_X86_F16C; 289 for (size_t channels = 9; channels < 16; channels++) { 290 PReLUMicrokernelTester() 291 .rows(2) 292 .channels(channels) 293 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 294 } 295 } 296 TEST(F16_PRELU__F16C_2X8,rows_lt_2)297 TEST(F16_PRELU__F16C_2X8, rows_lt_2) { 298 TEST_REQUIRES_X86_F16C; 299 for (size_t rows = 1; rows < 2; rows++) { 300 for (size_t channels = 1; channels <= 40; channels += 7) { 301 PReLUMicrokernelTester() 302 .rows(rows) 303 .channels(channels) 304 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 305 } 306 } 307 } 308 TEST(F16_PRELU__F16C_2X8,rows_div_2)309 TEST(F16_PRELU__F16C_2X8, rows_div_2) { 310 TEST_REQUIRES_X86_F16C; 311 for (size_t rows = 4; rows <= 8; rows += 2) { 312 for (size_t channels = 1; channels <= 40; channels += 7) { 313 PReLUMicrokernelTester() 314 .rows(rows) 315 .channels(channels) 316 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 317 } 318 } 319 } 320 TEST(F16_PRELU__F16C_2X8,rows_gt_2)321 TEST(F16_PRELU__F16C_2X8, rows_gt_2) { 322 TEST_REQUIRES_X86_F16C; 323 for (size_t rows = 3; rows < 4; rows++) { 324 for (size_t channels = 1; channels <= 40; channels += 7) { 325 PReLUMicrokernelTester() 326 .rows(rows) 327 .channels(channels) 328 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 329 } 330 } 331 } 332 TEST(F16_PRELU__F16C_2X8,input_stride)333 TEST(F16_PRELU__F16C_2X8, input_stride) { 334 TEST_REQUIRES_X86_F16C; 335 for (size_t rows = 1; rows <= 6; rows += 1) { 336 for (size_t channels = 1; channels <= 40; channels += 7) { 337 PReLUMicrokernelTester() 338 .rows(rows) 339 .channels(channels) 340 .input_stride(43) 341 .iterations(1) 342 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 343 } 344 } 345 } 346 TEST(F16_PRELU__F16C_2X8,output_stride)347 TEST(F16_PRELU__F16C_2X8, output_stride) { 348 TEST_REQUIRES_X86_F16C; 349 for (size_t rows = 1; rows <= 6; rows += 1) { 350 for (size_t channels = 1; channels <= 40; channels += 7) { 351 PReLUMicrokernelTester() 352 .rows(rows) 353 .channels(channels) 354 .output_stride(43) 355 .iterations(1) 356 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 357 } 358 } 359 } 360 TEST(F16_PRELU__F16C_2X8,inplace)361 TEST(F16_PRELU__F16C_2X8, inplace) { 362 TEST_REQUIRES_X86_F16C; 363 for (size_t rows = 1; rows <= 6; rows += 1) { 364 for (size_t channels = 1; channels <= 40; channels += 7) { 365 PReLUMicrokernelTester() 366 .rows(rows) 367 .channels(channels) 368 .inplace(true) 369 .iterations(1) 370 .Test(xnn_f16_prelu_ukernel__f16c_2x8); 371 } 372 } 373 } 374 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 375 376 377 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_PRELU__F16C_2X16,channels_eq_16)378 TEST(F16_PRELU__F16C_2X16, channels_eq_16) { 379 TEST_REQUIRES_X86_F16C; 380 PReLUMicrokernelTester() 381 .rows(2) 382 .channels(16) 383 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 384 } 385 TEST(F16_PRELU__F16C_2X16,channels_div_16)386 TEST(F16_PRELU__F16C_2X16, channels_div_16) { 387 TEST_REQUIRES_X86_F16C; 388 for (size_t channels = 32; channels < 160; channels += 16) { 389 PReLUMicrokernelTester() 390 .rows(2) 391 .channels(channels) 392 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 393 } 394 } 395 TEST(F16_PRELU__F16C_2X16,channels_lt_16)396 TEST(F16_PRELU__F16C_2X16, channels_lt_16) { 397 TEST_REQUIRES_X86_F16C; 398 for (size_t channels = 1; channels < 16; channels++) { 399 PReLUMicrokernelTester() 400 .rows(2) 401 .channels(channels) 402 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 403 } 404 } 405 TEST(F16_PRELU__F16C_2X16,channels_gt_16)406 TEST(F16_PRELU__F16C_2X16, channels_gt_16) { 407 TEST_REQUIRES_X86_F16C; 408 for (size_t channels = 17; channels < 32; channels++) { 409 PReLUMicrokernelTester() 410 .rows(2) 411 .channels(channels) 412 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 413 } 414 } 415 TEST(F16_PRELU__F16C_2X16,rows_lt_2)416 TEST(F16_PRELU__F16C_2X16, rows_lt_2) { 417 TEST_REQUIRES_X86_F16C; 418 for (size_t rows = 1; rows < 2; rows++) { 419 for (size_t channels = 1; channels <= 80; channels += 15) { 420 PReLUMicrokernelTester() 421 .rows(rows) 422 .channels(channels) 423 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 424 } 425 } 426 } 427 TEST(F16_PRELU__F16C_2X16,rows_div_2)428 TEST(F16_PRELU__F16C_2X16, rows_div_2) { 429 TEST_REQUIRES_X86_F16C; 430 for (size_t rows = 4; rows <= 8; rows += 2) { 431 for (size_t channels = 1; channels <= 80; channels += 15) { 432 PReLUMicrokernelTester() 433 .rows(rows) 434 .channels(channels) 435 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 436 } 437 } 438 } 439 TEST(F16_PRELU__F16C_2X16,rows_gt_2)440 TEST(F16_PRELU__F16C_2X16, rows_gt_2) { 441 TEST_REQUIRES_X86_F16C; 442 for (size_t rows = 3; rows < 4; rows++) { 443 for (size_t channels = 1; channels <= 80; channels += 15) { 444 PReLUMicrokernelTester() 445 .rows(rows) 446 .channels(channels) 447 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 448 } 449 } 450 } 451 TEST(F16_PRELU__F16C_2X16,input_stride)452 TEST(F16_PRELU__F16C_2X16, input_stride) { 453 TEST_REQUIRES_X86_F16C; 454 for (size_t rows = 1; rows <= 6; rows += 1) { 455 for (size_t channels = 1; channels <= 80; channels += 15) { 456 PReLUMicrokernelTester() 457 .rows(rows) 458 .channels(channels) 459 .input_stride(83) 460 .iterations(1) 461 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 462 } 463 } 464 } 465 TEST(F16_PRELU__F16C_2X16,output_stride)466 TEST(F16_PRELU__F16C_2X16, output_stride) { 467 TEST_REQUIRES_X86_F16C; 468 for (size_t rows = 1; rows <= 6; rows += 1) { 469 for (size_t channels = 1; channels <= 80; channels += 15) { 470 PReLUMicrokernelTester() 471 .rows(rows) 472 .channels(channels) 473 .output_stride(83) 474 .iterations(1) 475 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 476 } 477 } 478 } 479 TEST(F16_PRELU__F16C_2X16,inplace)480 TEST(F16_PRELU__F16C_2X16, inplace) { 481 TEST_REQUIRES_X86_F16C; 482 for (size_t rows = 1; rows <= 6; rows += 1) { 483 for (size_t channels = 1; channels <= 80; channels += 15) { 484 PReLUMicrokernelTester() 485 .rows(rows) 486 .channels(channels) 487 .inplace(true) 488 .iterations(1) 489 .Test(xnn_f16_prelu_ukernel__f16c_2x16); 490 } 491 } 492 } 493 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 494