1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdlib> 18 #include <limits> 19 #include <random> 20 #include <vector> 21 22 #include <fp16.h> 23 24 #include <xnnpack.h> 25 #include <xnnpack/aligned-allocator.h> 26 #include <xnnpack/microfnptr.h> 27 #include <xnnpack/microparams-init.h> 28 #include <xnnpack/requantization.h> 29 30 31 class GAvgPoolMicrokernelTester { 32 public: rows(size_t rows)33 inline GAvgPoolMicrokernelTester& rows(size_t rows) { 34 assert(rows != 0); 35 this->rows_ = rows; 36 return *this; 37 } 38 rows()39 inline size_t rows() const { 40 return this->rows_; 41 } 42 channels(size_t channels)43 inline GAvgPoolMicrokernelTester& channels(size_t channels) { 44 assert(channels != 0); 45 this->channels_ = channels; 46 return *this; 47 } 48 channels()49 inline size_t channels() const { 50 return this->channels_; 51 } 52 channel_tile(size_t channel_tile)53 inline GAvgPoolMicrokernelTester& channel_tile(size_t channel_tile) { 54 assert(channel_tile != 0); 55 this->channel_tile_ = channel_tile; 56 return *this; 57 } 58 channel_tile()59 inline size_t channel_tile() const { 60 return this->channel_tile_; 61 } 62 input_stride(size_t input_stride)63 inline GAvgPoolMicrokernelTester& input_stride(size_t input_stride) { 64 assert(input_stride != 0); 65 this->input_stride_ = input_stride; 66 return *this; 67 } 68 input_stride()69 inline size_t input_stride() const { 70 if (this->input_stride_ == 0) { 71 return channels(); 72 } else { 73 assert(this->input_stride_ >= channels()); 74 return this->input_stride_; 75 } 76 } 77 input_scale(float input_scale)78 inline GAvgPoolMicrokernelTester& input_scale(float input_scale) { 79 assert(input_scale > 0.0f); 80 assert(std::isnormal(input_scale)); 81 this->input_scale_ = input_scale; 82 return *this; 83 } 84 input_scale()85 inline float input_scale() const { 86 return this->input_scale_; 87 } 88 input_zero_point(uint8_t input_zero_point)89 inline GAvgPoolMicrokernelTester& input_zero_point(uint8_t input_zero_point) { 90 this->input_zero_point_ = input_zero_point; 91 return *this; 92 } 93 input_zero_point()94 inline uint8_t input_zero_point() const { 95 return this->input_zero_point_; 96 } 97 output_scale(float output_scale)98 inline GAvgPoolMicrokernelTester& output_scale(float output_scale) { 99 assert(output_scale > 0.0f); 100 assert(std::isnormal(output_scale)); 101 this->output_scale_ = output_scale; 102 return *this; 103 } 104 output_scale()105 inline float output_scale() const { 106 return this->output_scale_; 107 } 108 output_zero_point(uint8_t output_zero_point)109 inline GAvgPoolMicrokernelTester& output_zero_point(uint8_t output_zero_point) { 110 this->output_zero_point_ = output_zero_point; 111 return *this; 112 } 113 output_zero_point()114 inline uint8_t output_zero_point() const { 115 return this->output_zero_point_; 116 } 117 qmin(uint8_t qmin)118 inline GAvgPoolMicrokernelTester& qmin(uint8_t qmin) { 119 this->qmin_ = qmin; 120 return *this; 121 } 122 qmin()123 inline uint8_t qmin() const { 124 return this->qmin_; 125 } 126 qmax(uint8_t qmax)127 inline GAvgPoolMicrokernelTester& qmax(uint8_t qmax) { 128 this->qmax_ = qmax; 129 return *this; 130 } 131 qmax()132 inline uint8_t qmax() const { 133 return this->qmax_; 134 } 135 iterations(size_t iterations)136 inline GAvgPoolMicrokernelTester& iterations(size_t iterations) { 137 this->iterations_ = iterations; 138 return *this; 139 } 140 iterations()141 inline size_t iterations() const { 142 return this->iterations_; 143 } 144 Test(xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)145 void Test( 146 xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, 147 xnn_init_qu8_avgpool_minmax_params_fn init_params, 148 xnn_qu8_requantize_fn requantize) const 149 { 150 std::random_device random_device; 151 auto rng = std::mt19937(random_device()); 152 std::uniform_int_distribution<int32_t> u8dist( 153 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 154 155 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 156 (rows() - 1) * input_stride() + channels()); 157 std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 158 std::vector<uint8_t> output(channels()); 159 std::vector<uint8_t> output_ref(channels()); 160 std::vector<float> output_fp(channels()); 161 std::vector<int32_t> accumulators(channels()); 162 for (size_t iteration = 0; iteration < iterations(); iteration++) { 163 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 164 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 165 166 // Prepare parameters. 167 union xnn_qu8_avgpool_minmax_params params; 168 init_params( 169 ¶ms, 170 -int32_t(input_zero_point()) * int32_t(rows()), 171 input_scale() / (output_scale() * float(rows())), 172 output_zero_point(), qmin(), qmax()); 173 174 // Compute reference results. 175 for (size_t c = 0; c < channels(); c++) { 176 int32_t acc = 0; 177 for (size_t n = 0; n < rows(); n++) { 178 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point()); 179 } 180 accumulators[c] = acc; 181 output_ref[c] = requantize( 182 acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax()); 183 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point()); 184 output_fp[c] = std::min<float>(output_fp[c], float(qmax())); 185 output_fp[c] = std::max<float>(output_fp[c], float(qmin())); 186 } 187 188 // Call optimized micro-kernel. 189 gavgpool_minmax(rows(), channels(), 190 input.data(), input_stride() * sizeof(uint8_t), 191 zero.data(), 192 output.data(), 193 ¶ms); 194 195 // Verify results. 196 for (size_t c = 0; c < channels(); c++) { 197 ASSERT_LE(uint32_t(output[c]), uint32_t(qmax())) 198 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 199 ASSERT_GE(uint32_t(output[c]), uint32_t(qmin())) 200 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 201 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f) 202 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 203 << ", acc = " << accumulators[c]; 204 ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c])) 205 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 206 << ", acc = " << accumulators[c]; 207 } 208 } 209 } 210 Test(xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)211 void Test( 212 xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, 213 xnn_init_qu8_avgpool_minmax_params_fn init_params, 214 xnn_qu8_requantize_fn requantize) const 215 { 216 std::random_device random_device; 217 auto rng = std::mt19937(random_device()); 218 std::uniform_int_distribution<int32_t> u8dist( 219 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 220 221 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 222 (rows() - 1) * input_stride() + channels()); 223 std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 224 std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 225 std::vector<uint8_t> output(channels()); 226 std::vector<uint8_t> output_ref(channels()); 227 std::vector<float> output_fp(channels()); 228 std::vector<int32_t> accumulators(channels()); 229 for (size_t iteration = 0; iteration < iterations(); iteration++) { 230 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 231 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 232 233 // Prepare parameters. 234 union xnn_qu8_avgpool_minmax_params params; 235 init_params( 236 ¶ms, 237 -int32_t(input_zero_point()) * int32_t(rows()), 238 input_scale() / (output_scale() * float(rows())), 239 output_zero_point(), qmin(), qmax()); 240 241 // Compute reference results. 242 for (size_t c = 0; c < channels(); c++) { 243 int32_t acc = 0; 244 for (size_t n = 0; n < rows(); n++) { 245 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point()); 246 } 247 248 accumulators[c] = acc; 249 output_ref[c] = requantize( 250 acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax()); 251 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point()); 252 output_fp[c] = std::min<float>(output_fp[c], float(qmax())); 253 output_fp[c] = std::max<float>(output_fp[c], float(qmin())); 254 } 255 256 // Call optimized micro-kernel. 257 gavgpool_minmax(rows(), channels(), 258 input.data(), input_stride() * sizeof(uint8_t), 259 zero.data(), 260 buffer.data(), 261 output.data(), 262 ¶ms); 263 264 // Verify results. 265 for (size_t c = 0; c < channels(); c++) { 266 ASSERT_LE(uint32_t(output[c]), uint32_t(qmax())) 267 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 268 ASSERT_GE(uint32_t(output[c]), uint32_t(qmin())) 269 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 270 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f) 271 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 272 << ", acc = " << accumulators[c]; 273 ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c])) 274 << "at position " << c << ", rows = " << rows() << ", channels = " << channels() 275 << ", acc = " << accumulators[c]; 276 } 277 } 278 } 279 Test(xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)280 void Test( 281 xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, 282 xnn_init_qs8_avgpool_minmax_params_fn init_params, 283 xnn_qs8_requantize_fn requantize) const 284 { 285 std::random_device random_device; 286 auto rng = std::mt19937(random_device()); 287 std::uniform_int_distribution<int32_t> i8dist( 288 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 289 290 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 291 (rows() - 1) * input_stride() + channels()); 292 std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 293 std::vector<int8_t> output(channels()); 294 std::vector<int8_t> output_ref(channels()); 295 std::vector<float> output_fp(channels()); 296 std::vector<int32_t> accumulators(channels()); 297 for (size_t iteration = 0; iteration < iterations(); iteration++) { 298 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 299 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 300 301 // Prepare parameters. 302 union xnn_qs8_avgpool_minmax_params params; 303 init_params( 304 ¶ms, 305 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 306 input_scale() / (output_scale() * float(rows())), 307 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 308 309 // Compute reference results. 310 for (size_t c = 0; c < channels(); c++) { 311 int32_t acc = 0; 312 for (size_t n = 0; n < rows(); n++) { 313 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80); 314 } 315 accumulators[c] = acc; 316 output_ref[c] = requantize( 317 acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 318 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80); 319 output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80)); 320 output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80)); 321 } 322 323 // Call optimized micro-kernel. 324 gavgpool_minmax(rows(), channels(), 325 input.data(), input_stride() * sizeof(int8_t), 326 zero.data(), 327 output.data(), 328 ¶ms); 329 330 // Verify results. 331 for (size_t c = 0; c < channels(); c++) { 332 ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80)) 333 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 334 ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80)) 335 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 336 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f) 337 << "at channel " << c << " / " << channels() << ", rows = " << rows() 338 << ", accumulator = " << accumulators[c]; 339 ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c])) 340 << "at channel " << c << " / " << channels() << ", rows = " << rows() 341 << ", accumulator = " << accumulators[c]; 342 } 343 } 344 } 345 Test(xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)346 void Test( 347 xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, 348 xnn_init_qs8_avgpool_minmax_params_fn init_params, 349 xnn_qs8_requantize_fn requantize) const 350 { 351 std::random_device random_device; 352 auto rng = std::mt19937(random_device()); 353 std::uniform_int_distribution<int32_t> i8dist( 354 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 355 356 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 357 (rows() - 1) * input_stride() + channels()); 358 std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 359 std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t)); 360 std::vector<int8_t> output(channels()); 361 std::vector<int8_t> output_ref(channels()); 362 std::vector<float> output_fp(channels()); 363 std::vector<int32_t> accumulators(channels()); 364 for (size_t iteration = 0; iteration < iterations(); iteration++) { 365 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 366 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 367 368 // Prepare parameters. 369 union xnn_qs8_avgpool_minmax_params params; 370 init_params( 371 ¶ms, 372 -int32_t(input_zero_point() - 0x80) * int32_t(rows()), 373 input_scale() / (output_scale() * float(rows())), 374 int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 375 376 // Compute reference results. 377 for (size_t c = 0; c < channels(); c++) { 378 int32_t acc = 0; 379 for (size_t n = 0; n < rows(); n++) { 380 acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80); 381 } 382 accumulators[c] = acc; 383 output_ref[c] = requantize( 384 acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80)); 385 output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80); 386 output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80)); 387 output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80)); 388 } 389 390 // Call optimized micro-kernel. 391 gavgpool_minmax(rows(), channels(), 392 input.data(), input_stride() * sizeof(int8_t), 393 zero.data(), 394 buffer.data(), 395 output.data(), 396 ¶ms); 397 398 // Verify results. 399 for (size_t c = 0; c < channels(); c++) { 400 ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80)) 401 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 402 ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80)) 403 << "at channel " << c << " / " << channels() << ", rows = " << rows(); 404 ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f) 405 << "at channel " << c << " / " << channels() << ", rows = " << rows() 406 << ", accumulator = " << accumulators[c]; 407 ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c])) 408 << "at channel " << c << " / " << channels() << ", rows = " << rows() 409 << ", accumulator = " << accumulators[c]; 410 } 411 } 412 } 413 Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)414 void Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const { 415 std::random_device random_device; 416 auto rng = std::mt19937(random_device()); 417 std::uniform_real_distribution<float> f32dist; 418 419 std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 420 std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 421 std::vector<uint16_t> output(channels()); 422 std::vector<float> output_ref(channels()); 423 424 std::fill(zero.begin(), zero.end(), 0); 425 for (size_t iteration = 0; iteration < iterations(); iteration++) { 426 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 427 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 428 429 // Compute reference results, without clamping. 430 for (size_t c = 0; c < channels(); c++) { 431 float acc = 0.0f; 432 for (size_t n = 0; n < rows(); n++) { 433 acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]); 434 } 435 output_ref[c] = acc / float(rows()); 436 } 437 438 // Compute clamping parameters. 439 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 440 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 441 const float accumulated_range = accumulated_max - accumulated_min; 442 const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range)); 443 const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range)); 444 445 // Clamp reference results. 446 for (float& output_values : output_ref) { 447 output_values = std::max(std::min(output_values, output_max), output_min); 448 } 449 450 // Prepare parameters. 451 xnn_f16_scaleminmax_params params; 452 init_params(¶ms, 453 fp16_ieee_from_fp32_value(1.0f / float(rows())), 454 fp16_ieee_from_fp32_value(output_min), 455 fp16_ieee_from_fp32_value(output_max)); 456 457 // Call optimized micro-kernel. 458 gavgpool_minmax(rows(), channels(), 459 input.data(), input_stride() * sizeof(uint16_t), 460 zero.data(), 461 output.data(), 462 ¶ms); 463 464 // Verify results. 465 for (size_t c = 0; c < channels(); c++) { 466 ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max) 467 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 468 ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min) 469 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 470 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::max(1.0e-4f, std::abs(output_ref[c]) * 1.0e-2f)) 471 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 472 } 473 } 474 } 475 Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)476 void Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const { 477 std::random_device random_device; 478 auto rng = std::mt19937(random_device()); 479 std::uniform_real_distribution<float> f32dist; 480 481 std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 482 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 483 std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 484 std::vector<uint16_t> output(channels()); 485 std::vector<float> output_ref(channels()); 486 for (size_t iteration = 0; iteration < iterations(); iteration++) { 487 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 488 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 489 490 // Compute reference results, without clamping. 491 for (size_t c = 0; c < channels(); c++) { 492 float acc = 0.0f; 493 for (size_t n = 0; n < rows(); n++) { 494 acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]); 495 } 496 output_ref[c] = acc / float(rows()); 497 } 498 499 // Compute clamping parameters. 500 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 501 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 502 const float accumulated_range = accumulated_max - accumulated_min; 503 const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range)); 504 const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range)); 505 506 // Prepare parameters. 507 xnn_f16_scaleminmax_params params; 508 init_params(¶ms, 509 fp16_ieee_from_fp32_value(1.0f / float(rows())), 510 fp16_ieee_from_fp32_value(output_min), 511 fp16_ieee_from_fp32_value(output_max)); 512 513 // Clamp reference results. 514 for (float& output_values : output_ref) { 515 output_values = std::max(std::min(output_values, output_max), output_min); 516 } 517 518 // Call optimized micro-kernel. 519 gavgpool_minmax(rows(), channels(), 520 input.data(), input_stride() * sizeof(uint16_t), 521 zero.data(), 522 buffer.data(), 523 output.data(), 524 ¶ms); 525 526 // Verify results. 527 for (size_t c = 0; c < channels(); c++) { 528 ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max) 529 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 530 ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min) 531 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 532 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::abs(output_ref[c]) * 1.0e-0f) 533 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 534 } 535 } 536 } 537 Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)538 void Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const { 539 std::random_device random_device; 540 auto rng = std::mt19937(random_device()); 541 std::uniform_real_distribution<float> f32dist; 542 543 std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 544 std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float)); 545 std::vector<float> output(channels()); 546 std::vector<float> output_ref(channels()); 547 548 std::fill(zero.begin(), zero.end(), 0.0f); 549 for (size_t iteration = 0; iteration < iterations(); iteration++) { 550 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 551 std::fill(output.begin(), output.end(), std::nanf("")); 552 553 // Compute reference results, without clamping. 554 for (size_t c = 0; c < channels(); c++) { 555 float acc = 0.0f; 556 for (size_t n = 0; n < rows(); n++) { 557 acc += input[n * input_stride() + c]; 558 } 559 output_ref[c] = acc / float(rows()); 560 } 561 562 // Compute clamping parameters. 563 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 564 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 565 const float accumulated_range = accumulated_max - accumulated_min; 566 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 567 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 568 569 // Clamp reference results. 570 for (float& output_values : output_ref) { 571 output_values = std::max(std::min(output_values, output_max), output_min); 572 } 573 574 // Prepare parameters. 575 union xnn_f32_scaleminmax_params params; 576 init_params(¶ms, 1.0f / float(rows()), output_min, output_max); 577 578 // Call optimized micro-kernel. 579 gavgpool_minmax(rows(), channels(), 580 input.data(), input_stride() * sizeof(float), 581 zero.data(), 582 output.data(), 583 ¶ms); 584 585 // Verify results. 586 for (size_t c = 0; c < channels(); c++) { 587 ASSERT_LE(output[c], output_max) 588 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 589 ASSERT_GE(output[c], output_min) 590 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 591 ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f) 592 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 593 } 594 } 595 } 596 Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)597 void Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const { 598 std::random_device random_device; 599 auto rng = std::mt19937(random_device()); 600 std::uniform_real_distribution<float> f32dist; 601 602 std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 603 std::vector<float, AlignedAllocator<float, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(float)); 604 std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float)); 605 std::vector<float> output(channels()); 606 std::vector<float> output_ref(channels()); 607 for (size_t iteration = 0; iteration < iterations(); iteration++) { 608 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 609 std::fill(output.begin(), output.end(), std::nanf("")); 610 611 // Compute reference results, without clamping. 612 for (size_t c = 0; c < channels(); c++) { 613 float acc = 0.0f; 614 for (size_t n = 0; n < rows(); n++) { 615 acc += input[n * input_stride() + c]; 616 } 617 output_ref[c] = acc / float(rows()); 618 } 619 620 // Compute clamping parameters. 621 const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend()); 622 const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend()); 623 const float accumulated_range = accumulated_max - accumulated_min; 624 const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range; 625 const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range; 626 627 // Prepare parameters. 628 union xnn_f32_scaleminmax_params params; 629 init_params(¶ms, 1.0f / float(rows()), output_min, output_max); 630 631 // Clamp reference results. 632 for (float& output_values : output_ref) { 633 output_values = std::max(std::min(output_values, output_max), output_min); 634 } 635 636 // Call optimized micro-kernel. 637 gavgpool_minmax(rows(), channels(), 638 input.data(), input_stride() * sizeof(float), 639 zero.data(), 640 buffer.data(), 641 output.data(), 642 ¶ms); 643 644 // Verify results. 645 for (size_t c = 0; c < channels(); c++) { 646 ASSERT_LE(output[c], output_max) 647 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 648 ASSERT_GE(output[c], output_min) 649 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 650 ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f) 651 << "at position " << c << ", rows = " << rows() << ", channels = " << channels(); 652 } 653 } 654 } 655 656 private: 657 size_t rows_{1}; 658 size_t channels_{1}; 659 size_t channel_tile_{1}; 660 size_t input_stride_{0}; 661 float input_scale_{1.25f}; 662 float output_scale_{0.75f}; 663 uint8_t input_zero_point_{121}; 664 uint8_t output_zero_point_{133}; 665 uint8_t qmin_{0}; 666 uint8_t qmax_{255}; 667 size_t iterations_{15}; 668 }; 669