1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <cstddef> 14 #include <cstdlib> 15 #include <algorithm> 16 #include <cmath> 17 #include <limits> 18 #include <random> 19 #include <vector> 20 21 #include <fp16.h> 22 23 #include <xnnpack.h> 24 25 26 class GlobalAveragePoolingOperatorTester { 27 public: channels(size_t channels)28 inline GlobalAveragePoolingOperatorTester& channels(size_t channels) { 29 assert(channels != 0); 30 this->channels_ = channels; 31 return *this; 32 } 33 channels()34 inline size_t channels() const { 35 return this->channels_; 36 } 37 width(size_t width)38 inline GlobalAveragePoolingOperatorTester& width(size_t width) { 39 assert(width != 0); 40 this->width_ = width; 41 return *this; 42 } 43 width()44 inline size_t width() const { 45 return this->width_; 46 } 47 input_stride(size_t input_stride)48 inline GlobalAveragePoolingOperatorTester& input_stride(size_t input_stride) { 49 assert(input_stride != 0); 50 this->input_stride_ = input_stride; 51 return *this; 52 } 53 input_stride()54 inline size_t input_stride() const { 55 if (this->input_stride_ == 0) { 56 return channels(); 57 } else { 58 assert(this->input_stride_ >= channels()); 59 return this->input_stride_; 60 } 61 } 62 output_stride(size_t output_stride)63 inline GlobalAveragePoolingOperatorTester& output_stride(size_t output_stride) { 64 assert(output_stride != 0); 65 this->output_stride_ = output_stride; 66 return *this; 67 } 68 output_stride()69 inline size_t output_stride() const { 70 if (this->output_stride_ == 0) { 71 return channels(); 72 } else { 73 assert(this->output_stride_ >= channels()); 74 return this->output_stride_; 75 } 76 } 77 batch_size(size_t batch_size)78 inline GlobalAveragePoolingOperatorTester& batch_size(size_t batch_size) { 79 assert(batch_size != 0); 80 this->batch_size_ = batch_size; 81 return *this; 82 } 83 batch_size()84 inline size_t batch_size() const { 85 return this->batch_size_; 86 } 87 input_scale(float input_scale)88 inline GlobalAveragePoolingOperatorTester& input_scale(float input_scale) { 89 assert(input_scale > 0.0f); 90 assert(std::isnormal(input_scale)); 91 this->input_scale_ = input_scale; 92 return *this; 93 } 94 input_scale()95 inline float input_scale() const { 96 return this->input_scale_; 97 } 98 input_zero_point(uint8_t input_zero_point)99 inline GlobalAveragePoolingOperatorTester& input_zero_point(uint8_t input_zero_point) { 100 this->input_zero_point_ = input_zero_point; 101 return *this; 102 } 103 input_zero_point()104 inline uint8_t input_zero_point() const { 105 return this->input_zero_point_; 106 } 107 output_scale(float output_scale)108 inline GlobalAveragePoolingOperatorTester& output_scale(float output_scale) { 109 assert(output_scale > 0.0f); 110 assert(std::isnormal(output_scale)); 111 this->output_scale_ = output_scale; 112 return *this; 113 } 114 output_scale()115 inline float output_scale() const { 116 return this->output_scale_; 117 } 118 output_zero_point(uint8_t output_zero_point)119 inline GlobalAveragePoolingOperatorTester& output_zero_point(uint8_t output_zero_point) { 120 this->output_zero_point_ = output_zero_point; 121 return *this; 122 } 123 output_zero_point()124 inline uint8_t output_zero_point() const { 125 return this->output_zero_point_; 126 } 127 qmin(uint8_t qmin)128 inline GlobalAveragePoolingOperatorTester& qmin(uint8_t qmin) { 129 this->qmin_ = qmin; 130 return *this; 131 } 132 qmin()133 inline uint8_t qmin() const { 134 return this->qmin_; 135 } 136 qmax(uint8_t qmax)137 inline GlobalAveragePoolingOperatorTester& qmax(uint8_t qmax) { 138 this->qmax_ = qmax; 139 return *this; 140 } 141 qmax()142 inline uint8_t qmax() const { 143 return this->qmax_; 144 } 145 iterations(size_t iterations)146 inline GlobalAveragePoolingOperatorTester& iterations(size_t iterations) { 147 this->iterations_ = iterations; 148 return *this; 149 } 150 iterations()151 inline size_t iterations() const { 152 return this->iterations_; 153 } 154 TestNWCxQU8()155 void TestNWCxQU8() const { 156 std::random_device random_device; 157 auto rng = std::mt19937(random_device()); 158 std::uniform_int_distribution<int32_t> u8dist( 159 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 160 161 std::vector<uint8_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 162 std::vector<uint8_t> output(batch_size() * output_stride()); 163 std::vector<float> output_ref(batch_size() * channels()); 164 for (size_t iteration = 0; iteration < iterations(); iteration++) { 165 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 166 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 167 168 // Compute reference results. 169 const double scale = double(input_scale()) / (double(width()) * double(output_scale())); 170 for (size_t i = 0; i < batch_size(); i++) { 171 for (size_t j = 0; j < channels(); j++) { 172 double acc = 0.0f; 173 for (size_t k = 0; k < width(); k++) { 174 acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point())); 175 } 176 output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point())); 177 output_ref[i * channels() + j] = std::min<float>(output_ref[i * channels() + j], float(qmax())); 178 output_ref[i * channels() + j] = std::max<float>(output_ref[i * channels() + j], float(qmin())); 179 } 180 } 181 182 // Create, setup, run, and destroy Global Average Pooling operator. 183 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 184 xnn_operator_t global_average_pooling_op = nullptr; 185 186 xnn_status status = xnn_create_global_average_pooling_nwc_qu8( 187 channels(), input_stride(), output_stride(), 188 input_zero_point(), input_scale(), 189 output_zero_point(), output_scale(), 190 qmin(), qmax(), 191 0, &global_average_pooling_op); 192 if (status == xnn_status_unsupported_hardware) { 193 GTEST_SKIP(); 194 } 195 ASSERT_EQ(xnn_status_success, status); 196 ASSERT_NE(nullptr, global_average_pooling_op); 197 198 // Smart pointer to automatically delete global_average_pooling_op. 199 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 200 201 ASSERT_EQ(xnn_status_success, 202 xnn_setup_global_average_pooling_nwc_qu8( 203 global_average_pooling_op, 204 batch_size(), width(), 205 input.data(), output.data(), 206 nullptr /* thread pool */)); 207 208 ASSERT_EQ(xnn_status_success, 209 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 210 211 // Verify results. 212 for (size_t i = 0; i < batch_size(); i++) { 213 for (size_t c = 0; c < channels(); c++) { 214 ASSERT_LE(uint32_t(output[i * output_stride() + c]), uint32_t(qmax())); 215 ASSERT_GE(uint32_t(output[i * output_stride() + c]), uint32_t(qmin())); 216 ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f) 217 << "at batch index " << i << " / " << batch_size() 218 << ", channel " << c << " / " << channels(); 219 } 220 } 221 } 222 } 223 TestNWCxQS8()224 void TestNWCxQS8() const { 225 std::random_device random_device; 226 auto rng = std::mt19937(random_device()); 227 std::uniform_int_distribution<int32_t> i8dist( 228 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 229 230 std::vector<int8_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 231 std::vector<int8_t> output(batch_size() * output_stride()); 232 std::vector<float> output_ref(batch_size() * channels()); 233 for (size_t iteration = 0; iteration < iterations(); iteration++) { 234 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 235 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 236 237 // Compute reference results. 238 const double scale = double(input_scale()) / (double(width()) * double(output_scale())); 239 for (size_t i = 0; i < batch_size(); i++) { 240 for (size_t j = 0; j < channels(); j++) { 241 double acc = 0.0f; 242 for (size_t k = 0; k < width(); k++) { 243 acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point() - 0x80)); 244 } 245 output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point() - 0x80)); 246 output_ref[i * channels() + j] = std::min<float>(output_ref[i * channels() + j], float(qmax() - 0x80)); 247 output_ref[i * channels() + j] = std::max<float>(output_ref[i * channels() + j], float(qmin() - 0x80)); 248 } 249 } 250 251 // Create, setup, run, and destroy Global Average Pooling operator. 252 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 253 xnn_operator_t global_average_pooling_op = nullptr; 254 255 xnn_status status = xnn_create_global_average_pooling_nwc_qs8( 256 channels(), input_stride(), output_stride(), 257 int8_t(input_zero_point() - 0x80), input_scale(), 258 int8_t(output_zero_point() - 0x80), output_scale(), 259 int8_t(qmin() - 0x80), int8_t(qmax() - 0x80), 260 0, &global_average_pooling_op); 261 if (status == xnn_status_unsupported_hardware) { 262 GTEST_SKIP(); 263 } 264 ASSERT_EQ(xnn_status_success, status); 265 ASSERT_NE(nullptr, global_average_pooling_op); 266 267 // Smart pointer to automatically delete global_average_pooling_op. 268 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 269 270 ASSERT_EQ(xnn_status_success, 271 xnn_setup_global_average_pooling_nwc_qs8( 272 global_average_pooling_op, 273 batch_size(), width(), 274 input.data(), output.data(), 275 nullptr /* thread pool */)); 276 277 ASSERT_EQ(xnn_status_success, 278 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 279 280 // Verify results. 281 for (size_t i = 0; i < batch_size(); i++) { 282 for (size_t c = 0; c < channels(); c++) { 283 ASSERT_LE(int32_t(output[i * output_stride() + c]), int32_t(qmax() - 0x80)); 284 ASSERT_GE(int32_t(output[i * output_stride() + c]), int32_t(qmin() - 0x80)); 285 ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f) 286 << "at batch index " << i << " / " << batch_size() 287 << ", channel " << c << " / " << channels(); 288 } 289 } 290 } 291 } 292 TestNWCxF16()293 void TestNWCxF16() const { 294 std::random_device random_device; 295 auto rng = std::mt19937(random_device()); 296 std::uniform_real_distribution<float> f32dist(1.0e-3f, 1.0f); 297 298 std::vector<uint16_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 299 std::vector<uint16_t> output(batch_size() * output_stride()); 300 std::vector<float> output_ref(batch_size() * channels()); 301 for (size_t iteration = 0; iteration < iterations(); iteration++) { 302 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 303 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 304 305 // Compute reference results, without clamping. 306 for (size_t i = 0; i < batch_size(); i++) { 307 for (size_t j = 0; j < channels(); j++) { 308 float acc = 0.0f; 309 for (size_t k = 0; k < width(); k++) { 310 acc += fp16_ieee_to_fp32_value(input[(i * width() + k) * input_stride() + j]); 311 } 312 output_ref[i * channels() + j] = acc / float(width()); 313 } 314 } 315 316 // Compute clamping parameters. 317 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 318 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 319 const float accumulated_range = accumulated_max - accumulated_min; 320 const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin()))); 321 const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax()))); 322 const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min; 323 const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max; 324 325 // Clamp reference results. 326 for (float& value : output_ref) { 327 value = std::max(std::min(value, output_max), output_min); 328 } 329 330 // Create, setup, run, and destroy Global Average Pooling operator. 331 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 332 xnn_operator_t global_average_pooling_op = nullptr; 333 334 xnn_status status = xnn_create_global_average_pooling_nwc_f16( 335 channels(), input_stride(), output_stride(), 336 output_min, output_max, 337 0, &global_average_pooling_op); 338 if (status == xnn_status_unsupported_hardware) { 339 GTEST_SKIP(); 340 } 341 ASSERT_EQ(xnn_status_success, status); 342 ASSERT_NE(nullptr, global_average_pooling_op); 343 344 // Smart pointer to automatically delete global_average_pooling_op. 345 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 346 347 ASSERT_EQ(xnn_status_success, 348 xnn_setup_global_average_pooling_nwc_f16( 349 global_average_pooling_op, 350 batch_size(), width(), 351 input.data(), output.data(), 352 nullptr /* thread pool */)); 353 354 ASSERT_EQ(xnn_status_success, 355 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 356 357 // Verify results. 358 for (size_t i = 0; i < batch_size(); i++) { 359 for (size_t c = 0; c < channels(); c++) { 360 ASSERT_LE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_max); 361 ASSERT_GE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_min); 362 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f)) 363 << "at batch index " << i << " / " << batch_size() 364 << ", channel " << c << " / " << channels(); 365 } 366 } 367 } 368 } 369 TestNWCxF32()370 void TestNWCxF32() const { 371 std::random_device random_device; 372 auto rng = std::mt19937(random_device()); 373 std::uniform_real_distribution<float> f32dist; 374 375 std::vector<float> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 376 std::vector<float> output(batch_size() * output_stride()); 377 std::vector<float> output_ref(batch_size() * channels()); 378 for (size_t iteration = 0; iteration < iterations(); iteration++) { 379 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 380 std::fill(output.begin(), output.end(), std::nanf("")); 381 382 // Compute reference results, without clamping. 383 for (size_t i = 0; i < batch_size(); i++) { 384 for (size_t j = 0; j < channels(); j++) { 385 float acc = 0.0f; 386 for (size_t k = 0; k < width(); k++) { 387 acc += input[(i * width() + k) * input_stride() + j]; 388 } 389 output_ref[i * channels() + j] = acc / float(width()); 390 } 391 } 392 393 // Compute clamping parameters. 394 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 395 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 396 const float accumulated_range = accumulated_max - accumulated_min; 397 const float output_min = accumulated_range == 0.0f ? 398 -std::numeric_limits<float>::infinity() : 399 accumulated_min + accumulated_range / 255.0f * float(qmin()); 400 const float output_max = accumulated_range == 0.0f ? 401 +std::numeric_limits<float>::infinity() : 402 accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 403 404 // Clamp reference results. 405 for (float& value : output_ref) { 406 value = std::max(std::min(value, output_max), output_min); 407 } 408 409 // Create, setup, run, and destroy Global Average Pooling operator. 410 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 411 xnn_operator_t global_average_pooling_op = nullptr; 412 413 xnn_status status = xnn_create_global_average_pooling_nwc_f32( 414 channels(), input_stride(), output_stride(), 415 output_min, output_max, 416 0, &global_average_pooling_op); 417 if (status == xnn_status_unsupported_hardware) { 418 GTEST_SKIP(); 419 } 420 ASSERT_EQ(xnn_status_success, status); 421 ASSERT_NE(nullptr, global_average_pooling_op); 422 423 // Smart pointer to automatically delete global_average_pooling_op. 424 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 425 426 ASSERT_EQ(xnn_status_success, 427 xnn_setup_global_average_pooling_nwc_f32( 428 global_average_pooling_op, 429 batch_size(), width(), 430 input.data(), output.data(), 431 nullptr /* thread pool */)); 432 433 ASSERT_EQ(xnn_status_success, 434 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 435 436 // Verify results. 437 for (size_t i = 0; i < batch_size(); i++) { 438 for (size_t c = 0; c < channels(); c++) { 439 ASSERT_LE(output[i * output_stride() + c], output_max); 440 ASSERT_GE(output[i * output_stride() + c], output_min); 441 ASSERT_NEAR(output[i * output_stride() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-6f) 442 << "at batch index " << i << " / " << batch_size() 443 << ", channel " << c << " / " << channels(); 444 } 445 } 446 } 447 } 448 TestNCWxF32()449 void TestNCWxF32() const { 450 std::random_device random_device; 451 auto rng = std::mt19937(random_device()); 452 std::uniform_real_distribution<float> f32dist; 453 454 std::vector<float> input(batch_size() * channels() * width() + XNN_EXTRA_BYTES / sizeof(float)); 455 std::vector<float> output(batch_size() * channels()); 456 std::vector<float> output_ref(batch_size() * channels()); 457 for (size_t iteration = 0; iteration < iterations(); iteration++) { 458 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 459 std::fill(output.begin(), output.end(), std::nanf("")); 460 461 // Compute reference results, without clamping. 462 for (size_t i = 0; i < batch_size(); i++) { 463 for (size_t j = 0; j < channels(); j++) { 464 float acc = 0.0f; 465 for (size_t k = 0; k < width(); k++) { 466 acc += input[(i * channels() + j) * width() + k]; 467 } 468 output_ref[i * channels() + j] = acc / float(width()); 469 } 470 } 471 472 // Compute clamping parameters. 473 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 474 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 475 const float accumulated_range = accumulated_max - accumulated_min; 476 const float output_min = accumulated_range == 0.0f ? 477 -std::numeric_limits<float>::infinity() : 478 accumulated_min + accumulated_range / 255.0f * float(qmin()); 479 const float output_max = accumulated_range == 0.0f ? 480 +std::numeric_limits<float>::infinity() : 481 accumulated_max - accumulated_range / 255.0f * float(255 - qmax()); 482 483 // Clamp reference results. 484 for (float& value : output_ref) { 485 value = std::max(std::min(value, output_max), output_min); 486 } 487 488 // Create, setup, run, and destroy Global Average Pooling operator. 489 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 490 xnn_operator_t global_average_pooling_op = nullptr; 491 492 xnn_status status = xnn_create_global_average_pooling_ncw_f32( 493 channels(), output_min, output_max, 494 0, &global_average_pooling_op); 495 if (status == xnn_status_unsupported_parameter) { 496 GTEST_SKIP(); 497 } 498 ASSERT_EQ(xnn_status_success, status); 499 500 // Smart pointer to automatically delete global_average_pooling_op. 501 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator); 502 503 ASSERT_EQ(xnn_status_success, 504 xnn_setup_global_average_pooling_ncw_f32( 505 global_average_pooling_op, 506 batch_size(), width(), 507 input.data(), output.data(), 508 nullptr /* thread pool */)); 509 510 ASSERT_EQ(xnn_status_success, 511 xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */)); 512 513 // Verify results. 514 for (size_t i = 0; i < batch_size(); i++) { 515 for (size_t c = 0; c < channels(); c++) { 516 ASSERT_LE(output[i * channels() + c], output_max); 517 ASSERT_GE(output[i * channels() + c], output_min); 518 ASSERT_NEAR(output[i * channels() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-5f) 519 << "at batch index " << i << " / " << batch_size() 520 << ", channel " << c << " / " << channels(); 521 } 522 } 523 } 524 } 525 526 private: 527 size_t batch_size_{1}; 528 size_t width_{1}; 529 size_t channels_{1}; 530 size_t input_stride_{0}; 531 size_t output_stride_{0}; 532 float input_scale_{1.0f}; 533 float output_scale_{1.0f}; 534 uint8_t input_zero_point_{121}; 535 uint8_t output_zero_point_{133}; 536 uint8_t qmin_{0}; 537 uint8_t qmax_{255}; 538 size_t iterations_{1}; 539 }; 540