1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cfloat> 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdlib> 18 #include <functional> 19 #include <limits> 20 #include <random> 21 #include <vector> 22 23 #include <xnnpack/requantization-stubs.h> 24 #include <xnnpack/requantization.h> 25 26 27 class RequantizationTester { 28 public: s(uint32_t s)29 inline RequantizationTester& s(uint32_t s) { 30 this->s_ = s; 31 return *this; 32 } 33 s()34 inline uint32_t s() const { 35 return this->s_; 36 } 37 scale()38 inline float scale() const { 39 return ldexpf(1.0f, -s()); 40 } 41 zero_point(int32_t zero_point)42 inline RequantizationTester& zero_point(int32_t zero_point) { 43 this->zero_point_ = zero_point; 44 return *this; 45 } 46 zero_point()47 inline int32_t zero_point() const { 48 return this->zero_point_; 49 } 50 qmin(int16_t qmin)51 inline RequantizationTester& qmin(int16_t qmin) { 52 this->qmin_ = qmin; 53 return *this; 54 } 55 qmin()56 inline int16_t qmin() const { 57 return this->qmin_; 58 } 59 qmax(int16_t qmax)60 inline RequantizationTester& qmax(int16_t qmax) { 61 this->qmax_ = qmax; 62 return *this; 63 } 64 qmax()65 inline int16_t qmax() const { 66 return this->qmax_; 67 } 68 iterations(size_t iterations)69 inline RequantizationTester& iterations(size_t iterations) { 70 this->iterations_ = iterations; 71 return *this; 72 } 73 iterations()74 inline size_t iterations() const { 75 return this->iterations_; 76 } 77 78 /* 79 * Test that requantization of numbers ((i - zero point) * 2**s) with 80 * - scale = exp2(-s) 81 * - zero point in [0, 255] 82 * - no output clamping 83 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 84 */ TestExactDivideByPO2(xnn_qu8_requantization_function requantize)85 void TestExactDivideByPO2(xnn_qu8_requantization_function requantize) const { 86 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 87 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 88 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 89 ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max()); 90 ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min()); 91 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 92 ASSERT_LT(qmin(), qmax()); 93 94 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 95 ASSERT_GE(s(), 1); 96 ASSERT_LT(s(), 32); 97 98 std::vector<int32_t> inputs(256); 99 std::vector<uint8_t> outputs(inputs.size()); 100 const int32_t max_i = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zero_point(); 101 const int32_t min_i = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zero_point(); 102 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 103 const int32_t clamped_i = std::max(min_i, std::min(max_i, i)); 104 inputs[i] = int32_t(uint32_t(clamped_i - zero_point()) << s()); 105 } 106 requantize(inputs.size(), inputs.data(), 107 scale(), zero_point(), qmin(), qmax(), 108 outputs.data()); 109 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 110 const int32_t clamped_i = std::max(min_i, std::min(max_i, i)); 111 ASSERT_EQ(uint32_t(clamped_i), uint32_t(outputs[i])) 112 << "i = " << i << ", clamped i = " << clamped_i << ", input = " << inputs[i] 113 << ", min i = " << min_i << ", max i = " << max_i 114 << ", s = " << s() << ", zero point = " << zero_point(); 115 } 116 } 117 118 /* 119 * Test that requantization of numbers ((i - zero point) * 2**s) with 120 * - scale = exp2(-s) 121 * - zero point in [-128, 127] 122 * - no output clamping 123 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 124 */ TestExactDivideByPO2(xnn_qs8_requantization_function requantize)125 void TestExactDivideByPO2(xnn_qs8_requantization_function requantize) const { 126 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 127 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 128 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 129 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 130 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 131 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 132 ASSERT_LT(qmin(), qmax()); 133 134 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 135 ASSERT_GE(s(), 1); 136 ASSERT_LT(s(), 32); 137 138 std::vector<int32_t> inputs(256); 139 std::vector<int8_t> outputs(inputs.size()); 140 const int32_t max_i = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zero_point(); 141 const int32_t min_i = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zero_point(); 142 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 143 const int32_t clamped_i = std::max(min_i, std::min(max_i, i)); 144 inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(uint32_t(clamped_i - zero_point()) << s()); 145 } 146 requantize(inputs.size(), inputs.data(), 147 scale(), zero_point(), qmin(), qmax(), 148 outputs.data()); 149 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 150 const int32_t clamped_i = std::max(min_i, std::min(max_i, i)); 151 ASSERT_EQ(clamped_i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()])) 152 << "i = " << i << ", clamped i = " << clamped_i 153 << ", input = " << inputs[i - std::numeric_limits<int8_t>::min()] 154 << ", min i = " << min_i << ", max i = " << max_i 155 << ", s = " << s() << ", zero point = " << zero_point(); 156 } 157 } 158 159 /* 160 * Test that requantization of numbers ((i - zero point) * 2**s - 2**(s-1) + 1) with 161 * - scale = exp2(-s) 162 * - zero point in [1, 255] 163 * - no output clamping 164 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 165 */ TestDivideByPO2WithRoundingUp(xnn_qu8_requantization_function requantize)166 void TestDivideByPO2WithRoundingUp(xnn_qu8_requantization_function requantize) { 167 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 168 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 169 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 170 ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max()); 171 ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min()); 172 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 173 ASSERT_LT(qmin(), qmax()); 174 175 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 176 ASSERT_GE(s(), 1); 177 ASSERT_LT(s(), 32); 178 179 std::vector<int32_t> inputs(256); 180 std::vector<uint8_t> outputs(inputs.size()); 181 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 182 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) - 183 (INT64_C(1) << (s() - 1)) + 1; 184 inputs[i] = int32_t(input); 185 } 186 requantize(inputs.size(), inputs.data(), 187 scale(), zero_point(), qmin(), qmax(), 188 outputs.data()); 189 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 190 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) - 191 (INT64_C(1) << (s() - 1)) + 1; 192 if (int32_t(input) == input) { 193 ASSERT_EQ(i, int32_t(outputs[i])) 194 << "i = " << i << ", input = " << input 195 << ", s = " << s() << ", zero point = " << zero_point(); 196 } 197 } 198 } 199 200 /* 201 * Test that requantization of numbers ((i - zero point) * 2**s - 2**(s-1) + 1) with 202 * - scale = exp2(-s) 203 * - zero point in [-128, 127] 204 * - no output clamping 205 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 206 */ TestDivideByPO2WithRoundingUp(xnn_qs8_requantization_function requantize)207 void TestDivideByPO2WithRoundingUp(xnn_qs8_requantization_function requantize) { 208 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 209 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 210 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 211 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 212 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 213 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 214 ASSERT_LT(qmin(), qmax()); 215 216 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 217 ASSERT_GE(s(), 1); 218 ASSERT_LT(s(), 32); 219 220 std::vector<int32_t> inputs(256); 221 std::vector<int8_t> outputs(inputs.size()); 222 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 223 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) - 224 (INT64_C(1) << (s() - 1)) + 1; 225 inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input); 226 } 227 requantize(inputs.size(), inputs.data(), 228 scale(), zero_point(), qmin(), qmax(), 229 outputs.data()); 230 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 231 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) - 232 (INT64_C(1) << (s() - 1)) + 1; 233 if (int32_t(input) == input) { 234 ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()])) 235 << "i = " << i << ", input = " << input 236 << ", s = " << s() << ", zero point = " << zero_point(); 237 } 238 } 239 } 240 241 /* 242 * Test that requantization of numbers ((i - zero point) * 2**s + 2**(s-1) - 1) with 243 * - scale = exp2(-s) 244 * - zero point in [1, 255] 245 * - no output clamping 246 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 247 */ TestDivideByPO2WithRoundingDown(xnn_qu8_requantization_function requantize)248 void TestDivideByPO2WithRoundingDown(xnn_qu8_requantization_function requantize) { 249 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 250 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 251 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 252 ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max()); 253 ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min()); 254 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 255 ASSERT_LT(qmin(), qmax()); 256 257 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 258 ASSERT_GE(s(), 1); 259 ASSERT_LT(s(), 32); 260 261 std::vector<int32_t> inputs(256); 262 std::vector<uint8_t> outputs(inputs.size()); 263 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 264 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) + 265 (INT64_C(1) << (s() - 1)) - 1; 266 inputs[i] = int32_t(input); 267 } 268 requantize(inputs.size(), inputs.data(), 269 scale(), zero_point(), qmin(), qmax(), 270 outputs.data()); 271 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 272 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) + 273 (INT64_C(1) << (s() - 1)) - 1; 274 if (int32_t(input) == input) { 275 ASSERT_EQ(i, int32_t(outputs[i])) 276 << "i = " << i << ", input = " << input 277 << ", s = " << s() << ", zero point = " << zero_point(); 278 } 279 } 280 } 281 282 /* 283 * Test that requantization of numbers ((i - zero point) * 2**s + 2**(s-1) - 1) with 284 * - scale = exp2(-s) 285 * - zero point in [-128, 127] 286 * - no output clamping 287 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 288 */ TestDivideByPO2WithRoundingDown(xnn_qs8_requantization_function requantize)289 void TestDivideByPO2WithRoundingDown(xnn_qs8_requantization_function requantize) { 290 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 291 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 292 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 293 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 294 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 295 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 296 ASSERT_LT(qmin(), qmax()); 297 298 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 299 ASSERT_GE(s(), 1); 300 ASSERT_LT(s(), 32); 301 302 std::vector<int32_t> inputs(256); 303 std::vector<int8_t> outputs(inputs.size()); 304 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 305 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) + 306 (INT64_C(1) << (s() - 1)) - 1; 307 inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input); 308 } 309 requantize(inputs.size(), inputs.data(), 310 scale(), zero_point(), qmin(), qmax(), 311 outputs.data()); 312 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 313 const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) + 314 (INT64_C(1) << (s() - 1)) - 1; 315 if (int32_t(input) == input) { 316 ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()])) 317 << "i = " << i << ", input = " << input 318 << ", s = " << s() << ", zero point = " << zero_point(); 319 } 320 } 321 } 322 TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantization_function requantize)323 void TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantization_function requantize) { 324 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 325 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 326 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 327 ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max()); 328 ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min()); 329 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 330 ASSERT_LT(qmin(), qmax()); 331 332 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 333 ASSERT_GE(s(), 1); 334 ASSERT_LT(s(), 32); 335 336 std::vector<int32_t> inputs(256); 337 std::vector<uint8_t> outputs(inputs.size()); 338 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 339 int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()); 340 if (input > 0) { 341 input -= INT64_C(1) << (s() - 1); 342 } else if (input < 0) { 343 input += INT64_C(1) << (s() - 1); 344 } 345 inputs[i] = int32_t(input); 346 } 347 requantize(inputs.size(), inputs.data(), 348 scale(), zero_point(), qmin(), qmax(), 349 outputs.data()); 350 for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) { 351 int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()); 352 if (input > 0) { 353 input -= INT64_C(1) << (s() - 1); 354 } else if (input < 0) { 355 input += INT64_C(1) << (s() - 1); 356 } 357 if (int32_t(input) == input) { 358 ASSERT_EQ(i, int32_t(outputs[i])) 359 << "i = " << i << ", input = " << input 360 << ", s = " << s() << ", zero point = " << zero_point(); 361 } 362 } 363 } 364 TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantization_function requantize)365 void TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantization_function requantize) { 366 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 367 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 368 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 369 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 370 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 371 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 372 ASSERT_LT(qmin(), qmax()); 373 374 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 375 ASSERT_GE(s(), 1); 376 ASSERT_LT(s(), 32); 377 378 std::vector<int32_t> inputs(256); 379 std::vector<int8_t> outputs(inputs.size()); 380 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 381 int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()); 382 if (input > 0) { 383 input -= INT64_C(1) << (s() - 1); 384 } else if (input < 0) { 385 input += INT64_C(1) << (s() - 1); 386 } 387 inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input); 388 } 389 requantize(inputs.size(), inputs.data(), 390 scale(), zero_point(), qmin(), qmax(), 391 outputs.data()); 392 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 393 int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()); 394 if (input > 0) { 395 input -= INT64_C(1) << (s() - 1); 396 } else if (input < 0) { 397 input += INT64_C(1) << (s() - 1); 398 } 399 if (int32_t(input) == input) { 400 ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()])) 401 << "i = " << i << ", input = " << input 402 << ", s = " << s() << ", zero point = " << zero_point(); 403 } 404 } 405 } 406 TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantization_function requantize)407 void TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantization_function requantize) { 408 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 409 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 410 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 411 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 412 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 413 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 414 ASSERT_LT(qmin(), qmax()); 415 416 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 417 ASSERT_GE(s(), 1); 418 ASSERT_LT(s(), 32); 419 420 std::vector<int32_t> inputs(256); 421 std::vector<int8_t> outputs(inputs.size()); 422 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 423 int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()); 424 input -= INT64_C(1) << (s() - 1); 425 inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input); 426 } 427 requantize(inputs.size(), inputs.data(), 428 scale(), zero_point(), qmin(), qmax(), 429 outputs.data()); 430 for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) { 431 int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()); 432 input -= INT64_C(1) << (s() - 1); 433 if (int32_t(input) == input) { 434 ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()])) 435 << "i = " << i << ", input = " << input 436 << ", s = " << s() << ", zero point = " << zero_point(); 437 } 438 } 439 } 440 TestSpecialCases(xnn_qu8_requantization_function requantize)441 void TestSpecialCases(xnn_qu8_requantization_function requantize) { 442 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 443 ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max()); 444 ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min()); 445 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 446 ASSERT_LT(qmin(), qmax()); 447 448 std::vector<int32_t> inputs(256); 449 std::vector<uint8_t> outputs(inputs.size()); 450 451 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min()); 452 for (int32_t zero_point = 0; zero_point <= std::numeric_limits<uint8_t>::max(); zero_point++) { 453 requantize( 454 inputs.size(), 455 inputs.data(), 456 ldexpf(1.0f, -32) /* scale */, 457 zero_point /* zero point */, 458 std::numeric_limits<uint8_t>::min(), 459 std::numeric_limits<uint8_t>::max(), 460 outputs.data()); 461 for (size_t i = 0; i < outputs.size(); i++) { 462 ASSERT_EQ(std::max(int32_t(int32_t(std::numeric_limits<uint8_t>::min())), zero_point - 1), int32_t(outputs[i])); 463 } 464 } 465 466 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max()); 467 requantize( 468 inputs.size(), 469 inputs.data(), 470 0x1.FFFFFEp-1f /* scale */, 471 std::numeric_limits<uint8_t>::max() /* zero point */, 472 std::numeric_limits<uint8_t>::min(), 473 std::numeric_limits<uint8_t>::max(), 474 outputs.data()); 475 for (size_t i = 0; i < outputs.size(); i++) { 476 ASSERT_EQ(std::numeric_limits<uint8_t>::max(), int32_t(outputs[i])); 477 } 478 } 479 TestSpecialCases(xnn_qs8_requantization_function requantize)480 void TestSpecialCases(xnn_qs8_requantization_function requantize) { 481 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 482 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 483 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 484 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 485 ASSERT_LT(qmin(), qmax()); 486 487 std::vector<int32_t> inputs(256); 488 std::vector<int8_t> outputs(inputs.size()); 489 490 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min()); 491 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 492 zero_point <= std::numeric_limits<int8_t>::max(); 493 zero_point++) 494 { 495 requantize( 496 inputs.size(), 497 inputs.data(), 498 ldexpf(1.0f, -32) /* scale */, 499 zero_point, 500 std::numeric_limits<int8_t>::min(), 501 std::numeric_limits<int8_t>::max(), 502 outputs.data()); 503 for (size_t i = 0; i < outputs.size(); i++) { 504 ASSERT_EQ(std::max(int32_t(std::numeric_limits<int8_t>::min()), zero_point - 1), int32_t(outputs[i])); 505 } 506 } 507 508 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max()); 509 requantize( 510 inputs.size(), 511 inputs.data(), 512 0x1.FFFFFEp-1f /* scale */, 513 std::numeric_limits<int8_t>::max() /* zero point */, 514 std::numeric_limits<int8_t>::min(), 515 std::numeric_limits<int8_t>::max(), 516 outputs.data()); 517 for (size_t i = 0; i < outputs.size(); i++) { 518 ASSERT_EQ(std::numeric_limits<int8_t>::max(), int32_t(outputs[i])); 519 } 520 } 521 TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantization_function requantize)522 void TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantization_function requantize) { 523 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 524 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 525 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 526 ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max()); 527 ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min()); 528 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 529 ASSERT_LT(qmin(), qmax()); 530 531 std::random_device random_device; 532 std::mt19937 rng(random_device()); 533 for (size_t iteration = 0; iteration < iterations(); iteration++) { 534 auto u8rng = 535 std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng)); 536 537 std::vector<int32_t> inputs(4096); 538 std::vector<uint8_t> outputs(inputs.size()); 539 540 std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 541 const float scale = scale_distribution(rng); 542 for (size_t i = 0; i < inputs.size(); i++) { 543 const uint8_t approximate_output = std::min(std::max(uint8_t(u8rng()), uint8_t(qmin())), uint8_t(qmax())); 544 const int32_t input = int32_t(double(approximate_output) / double(scale)); 545 inputs[i] = input; 546 } 547 548 requantize( 549 inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(), 550 outputs.data()); 551 552 /* Ensure that outputs are not all identical, as in this case the test doesn't validate much */ 553 ASSERT_NE( 554 *std::max_element(outputs.cbegin(), outputs.cend()), 555 *std::min_element(outputs.cbegin(), outputs.cend())); 556 557 for (size_t i = 0; i < inputs.size(); i++) { 558 const uint8_t reference_output = xnn_qu8_requantize_rndna( 559 inputs[i], scale, zero_point(), qmin(), qmax()); 560 ASSERT_EQ(uint32_t(reference_output), uint32_t(outputs[i])); 561 } 562 } 563 } 564 TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantization_function requantize)565 void TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantization_function requantize) { 566 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 567 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 568 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 569 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 570 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 571 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 572 ASSERT_LT(qmin(), qmax()); 573 574 std::random_device random_device; 575 std::mt19937 rng(random_device()); 576 for (size_t iteration = 0; iteration < iterations(); iteration++) { 577 auto i8rng = std::bind( 578 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng)); 579 580 std::vector<int32_t> inputs(4096); 581 std::vector<int8_t> outputs(inputs.size()); 582 583 std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 584 const float scale = scale_distribution(rng); 585 for (size_t i = 0; i < inputs.size(); i++) { 586 const int8_t approximate_output = std::min(std::max(int8_t(i8rng()), int8_t(qmin())), int8_t(qmax())); 587 const int32_t input = int32_t(double(approximate_output) / double(scale)); 588 inputs[i] = input; 589 } 590 591 requantize( 592 inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(), 593 outputs.data()); 594 595 /* Ensure that outputs are not all identical, as in this case the test doesn't validate much */ 596 ASSERT_NE( 597 *std::max_element(outputs.cbegin(), outputs.cend()), 598 *std::min_element(outputs.cbegin(), outputs.cend())); 599 600 for (size_t i = 0; i < inputs.size(); i++) { 601 const int8_t reference_output = xnn_qs8_requantize_rndna( 602 inputs[i], scale, zero_point(), qmin(), qmax()); 603 ASSERT_EQ(int32_t(reference_output), int32_t(outputs[i])); 604 } 605 } 606 } 607 TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantization_function requantize)608 void TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantization_function requantize) { 609 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 610 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 611 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 612 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 613 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 614 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 615 ASSERT_LT(qmin(), qmax()); 616 617 std::random_device random_device; 618 std::mt19937 rng(random_device()); 619 for (size_t iteration = 0; iteration < iterations(); iteration++) { 620 auto i8rng = std::bind( 621 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng)); 622 623 std::vector<int32_t> inputs(4096); 624 std::vector<int8_t> outputs(inputs.size()); 625 626 std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 627 const float scale = scale_distribution(rng); 628 for (size_t i = 0; i < inputs.size(); i++) { 629 const int8_t approximate_output = std::min(std::max(int8_t(i8rng()), int8_t(qmin())), int8_t(qmax())); 630 const int32_t input = int32_t(double(approximate_output) / double(scale)); 631 inputs[i] = input; 632 } 633 634 requantize( 635 inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(), 636 outputs.data()); 637 638 /* Ensure that outputs are not all identical, as in this case the test doesn't validate much */ 639 ASSERT_NE( 640 *std::max_element(outputs.cbegin(), outputs.cend()), 641 *std::min_element(outputs.cbegin(), outputs.cend())); 642 643 for (size_t i = 0; i < inputs.size(); i++) { 644 const int8_t reference_output = xnn_qs8_requantize_rndnu( 645 inputs[i], scale, zero_point(), qmin(), qmax()); 646 ASSERT_EQ(int32_t(reference_output), int32_t(outputs[i])); 647 } 648 } 649 } 650 TestRandomCasesApproximate(xnn_qu8_requantization_function requantize)651 void TestRandomCasesApproximate(xnn_qu8_requantization_function requantize) { 652 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 653 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 654 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 655 ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max()); 656 ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min()); 657 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 658 ASSERT_LT(qmin(), qmax()); 659 660 std::random_device random_device; 661 std::mt19937 rng(random_device()); 662 for (size_t iteration = 0; iteration < iterations(); iteration++) { 663 auto u8rng = 664 std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng)); 665 666 std::vector<int32_t> inputs(4096); 667 std::vector<uint8_t> outputs(inputs.size()); 668 669 std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 670 const float scale = scale_distribution(rng); 671 for (size_t i = 0; i < inputs.size(); i++) { 672 const uint8_t approximate_output = std::min(std::max(uint8_t(u8rng()), uint8_t(qmin())), uint8_t(qmax())); 673 const int32_t input = int32_t(double(approximate_output) / double(scale)); 674 inputs[i] = input; 675 } 676 677 requantize( 678 inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(), 679 outputs.data()); 680 681 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */ 682 ASSERT_NE( 683 *std::max_element(outputs.cbegin(), outputs.cend()), 684 *std::min_element(outputs.cbegin(), outputs.cend())); 685 686 for (size_t i = 0; i < inputs.size(); i++) { 687 const double reference_output = RequantizationTester::RequantizeApproximate( 688 inputs[i], scale, uint8_t(zero_point()), uint8_t(qmin()), uint8_t(qmax())); 689 ASSERT_LE(std::abs(reference_output - double(outputs[i])), 0.55) 690 << "input = " << inputs[i] << ", output = " << int32_t(outputs[i]) 691 << ", reference output = " << reference_output; 692 } 693 } 694 } 695 TestRandomCasesApproximate(xnn_qs8_requantization_function requantize)696 void TestRandomCasesApproximate(xnn_qs8_requantization_function requantize) { 697 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 698 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 699 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 700 ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max()); 701 ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min()); 702 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 703 ASSERT_LT(qmin(), qmax()); 704 705 std::random_device random_device; 706 std::mt19937 rng(random_device()); 707 for (size_t iteration = 0; iteration < iterations(); iteration++) { 708 auto i8rng = std::bind( 709 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng)); 710 711 std::vector<int32_t> inputs(4096); 712 std::vector<int8_t> outputs(inputs.size()); 713 714 std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 715 const float scale = scale_distribution(rng); 716 for (size_t i = 0; i < inputs.size(); i++) { 717 const int8_t approximate_output = std::min(std::max(int8_t(i8rng()), int8_t(qmin())), int8_t(qmax())); 718 const int32_t input = int32_t(double(approximate_output) / double(scale)); 719 inputs[i] = input; 720 } 721 722 requantize( 723 inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(), 724 outputs.data()); 725 726 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */ 727 ASSERT_NE( 728 *std::max_element(outputs.cbegin(), outputs.cend()), 729 *std::min_element(outputs.cbegin(), outputs.cend())); 730 731 for (size_t i = 0; i < inputs.size(); i++) { 732 const double reference_output = RequantizationTester::RequantizeApproximate( 733 inputs[i], scale, int8_t(zero_point()), int8_t(qmin()), int8_t(qmax())); 734 ASSERT_LE(std::abs(reference_output - double(outputs[i])), 0.55) 735 << "input = " << inputs[i] << ", output = " << int32_t(outputs[i]) 736 << ", reference output = " << reference_output; 737 } 738 } 739 } 740 ShiftLeft(int64_t w,uint32_t n)741 static inline int64_t ShiftLeft(int64_t w, uint32_t n) { 742 return (int64_t) ((uint64_t) w << n); 743 } 744 RequantizeApproximate(int32_t value,float scale,uint8_t zero_point,uint8_t qmin,uint8_t qmax)745 static inline double RequantizeApproximate( 746 int32_t value, 747 float scale, 748 uint8_t zero_point, 749 uint8_t qmin, 750 uint8_t qmax) 751 { 752 assert(scale < 1.0f); 753 assert(scale >= 0x1.0p-32f); 754 755 return std::min(std::max(double(value) * double(scale) + double(zero_point), double(qmin)), double(qmax)); 756 } 757 RequantizeApproximate(int32_t value,float scale,int8_t zero_point,int8_t qmin,int8_t qmax)758 static inline double RequantizeApproximate( 759 int32_t value, 760 float scale, 761 int8_t zero_point, 762 int8_t qmin, 763 int8_t qmax) 764 { 765 assert(scale < 1.0f); 766 assert(scale >= 0x1.0p-32f); 767 768 return std::min(std::max(double(value) * double(scale) + double(zero_point), double(qmin)), double(qmax)); 769 } 770 771 private: 772 uint32_t s_{1}; 773 int32_t zero_point_{0}; 774 int16_t qmin_{std::numeric_limits<int16_t>::min()}; 775 int16_t qmax_{std::numeric_limits<int16_t>::max()}; 776 size_t iterations_{1}; 777 }; 778