1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cstddef> 13 #include <cstdlib> 14 #include <functional> 15 #include <random> 16 #include <vector> 17 18 #include <fp16.h> 19 20 #include <xnnpack.h> 21 #include <xnnpack/microfnptr.h> 22 #include <xnnpack/microparams-init.h> 23 24 25 class VBinaryCMicrokernelTester { 26 public: 27 enum class OpType { 28 AddC, 29 DivC, 30 RDivC, 31 MaxC, 32 MinC, 33 MulC, 34 SqrDiffC, 35 SubC, 36 RSubC, 37 }; 38 batch_size(size_t batch_size)39 inline VBinaryCMicrokernelTester& batch_size(size_t batch_size) { 40 assert(batch_size != 0); 41 this->batch_size_ = batch_size; 42 return *this; 43 } 44 batch_size()45 inline size_t batch_size() const { 46 return this->batch_size_; 47 } 48 inplace(bool inplace)49 inline VBinaryCMicrokernelTester& inplace(bool inplace) { 50 this->inplace_ = inplace; 51 return *this; 52 } 53 inplace()54 inline bool inplace() const { 55 return this->inplace_; 56 } 57 qmin(uint8_t qmin)58 inline VBinaryCMicrokernelTester& qmin(uint8_t qmin) { 59 this->qmin_ = qmin; 60 return *this; 61 } 62 qmin()63 inline uint8_t qmin() const { 64 return this->qmin_; 65 } 66 qmax(uint8_t qmax)67 inline VBinaryCMicrokernelTester& qmax(uint8_t qmax) { 68 this->qmax_ = qmax; 69 return *this; 70 } 71 qmax()72 inline uint8_t qmax() const { 73 return this->qmax_; 74 } 75 iterations(size_t iterations)76 inline VBinaryCMicrokernelTester& iterations(size_t iterations) { 77 this->iterations_ = iterations; 78 return *this; 79 } 80 iterations()81 inline size_t iterations() const { 82 return this->iterations_; 83 } 84 Test(xnn_f16_vbinary_ukernel_function vbinaryc,OpType op_type)85 void Test(xnn_f16_vbinary_ukernel_function vbinaryc, OpType op_type) const { 86 std::random_device random_device; 87 auto rng = std::mt19937(random_device()); 88 std::uniform_real_distribution<float> f32dist(0.01f, 1.0f); 89 90 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 91 const uint16_t b = fp16_ieee_from_fp32_value(f32dist(rng)); 92 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 93 std::vector<float> y_ref(batch_size()); 94 for (size_t iteration = 0; iteration < iterations(); iteration++) { 95 std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 96 if (inplace()) { 97 std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 98 } else { 99 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 100 } 101 const uint16_t* a_data = inplace() ? y.data() : a.data(); 102 103 // Compute reference results. 104 for (size_t i = 0; i < batch_size(); i++) { 105 switch (op_type) { 106 case OpType::AddC: 107 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b); 108 break; 109 case OpType::DivC: 110 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b); 111 break; 112 case OpType::RDivC: 113 y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]); 114 break; 115 case OpType::MaxC: 116 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 117 break; 118 case OpType::MinC: 119 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 120 break; 121 case OpType::MulC: 122 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b); 123 break; 124 case OpType::SqrDiffC: 125 { 126 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 127 y_ref[i] = diff * diff; 128 break; 129 } 130 case OpType::SubC: 131 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 132 break; 133 case OpType::RSubC: 134 y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]); 135 break; 136 } 137 } 138 // Call optimized micro-kernel. 139 vbinaryc(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), nullptr); 140 141 // Verify results. 142 for (size_t i = 0; i < batch_size(); i++) { 143 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 144 << "at " << i << " / " << batch_size(); 145 } 146 } 147 } 148 Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)149 void Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const { 150 std::random_device random_device; 151 auto rng = std::mt19937(random_device()); 152 std::uniform_real_distribution<float> f32dist(0.01f, 1.0f); 153 154 std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 155 const uint16_t b = fp16_ieee_from_fp32_value(f32dist(rng)); 156 std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0)); 157 std::vector<float> y_ref(batch_size()); 158 for (size_t iteration = 0; iteration < iterations(); iteration++) { 159 std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 160 if (inplace()) { 161 std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 162 } else { 163 std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */); 164 } 165 const uint16_t* a_data = inplace() ? y.data() : a.data(); 166 167 // Compute reference results. 168 for (size_t i = 0; i < batch_size(); i++) { 169 switch (op_type) { 170 case OpType::AddC: 171 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b); 172 break; 173 case OpType::DivC: 174 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b); 175 break; 176 case OpType::RDivC: 177 y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]); 178 break; 179 case OpType::MaxC: 180 y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 181 break; 182 case OpType::MinC: 183 y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b)); 184 break; 185 case OpType::MulC: 186 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b); 187 break; 188 case OpType::SqrDiffC: 189 { 190 const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 191 y_ref[i] = diff * diff; 192 break; 193 } 194 case OpType::SubC: 195 y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b); 196 break; 197 case OpType::RSubC: 198 y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]); 199 break; 200 } 201 } 202 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 203 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 204 const float accumulated_range = accumulated_max - accumulated_min; 205 const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 206 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 207 +std::numeric_limits<float>::infinity())); 208 const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ? 209 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 210 -std::numeric_limits<float>::infinity())); 211 for (size_t i = 0; i < batch_size(); i++) { 212 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 213 } 214 215 // Prepare parameters. 216 xnn_f16_minmax_params params; 217 init_params(¶ms, 218 fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max)); 219 220 // Call optimized micro-kernel. 221 vbinaryc_minmax(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), ¶ms); 222 223 // Verify results. 224 for (size_t i = 0; i < batch_size(); i++) { 225 ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f)) 226 << "at " << i << " / " << batch_size(); 227 } 228 } 229 } 230 231 void Test(xnn_f32_vbinary_ukernel_function vbinaryc, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const { 232 std::random_device random_device; 233 auto rng = std::mt19937(random_device()); 234 std::uniform_real_distribution<float> f32dist(0.01f, 1.0f); 235 236 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 237 const float b = f32dist(rng); 238 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 239 std::vector<float> y_ref(batch_size()); 240 for (size_t iteration = 0; iteration < iterations(); iteration++) { 241 std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); }); 242 if (inplace()) { 243 std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); }); 244 } else { 245 std::fill(y.begin(), y.end(), nanf("")); 246 } 247 const float* a_data = inplace() ? y.data() : a.data(); 248 249 // Compute reference results. 250 for (size_t i = 0; i < batch_size(); i++) { 251 switch (op_type) { 252 case OpType::AddC: 253 y_ref[i] = a_data[i] + b; 254 break; 255 case OpType::DivC: 256 y_ref[i] = a_data[i] / b; 257 break; 258 case OpType::RDivC: 259 y_ref[i] = b / a_data[i]; 260 break; 261 case OpType::MaxC: 262 y_ref[i] = std::max<float>(a_data[i], b); 263 break; 264 case OpType::MinC: 265 y_ref[i] = std::min<float>(a_data[i], b); 266 break; 267 case OpType::MulC: 268 y_ref[i] = a_data[i] * b; 269 break; 270 case OpType::SqrDiffC: 271 { 272 const float diff = a_data[i] - b; 273 y_ref[i] = diff * diff; 274 break; 275 } 276 case OpType::SubC: 277 y_ref[i] = a_data[i] - b; 278 break; 279 case OpType::RSubC: 280 y_ref[i] = b - a_data[i]; 281 break; 282 } 283 } 284 285 // Prepare parameters. 286 xnn_f32_default_params params; 287 if (init_params) { 288 init_params(¶ms); 289 } 290 291 // Call optimized micro-kernel. 292 vbinaryc(batch_size() * sizeof(float), a_data, &b, y.data(), init_params != nullptr ? ¶ms : nullptr); 293 294 // Verify results. 295 for (size_t i = 0; i < batch_size(); i++) { 296 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 297 << "at " << i << " / " << batch_size(); 298 } 299 } 300 } 301 Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu,OpType op_type)302 void Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu, OpType op_type) const { 303 std::random_device random_device; 304 auto rng = std::mt19937(random_device()); 305 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 306 307 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 308 const float b = f32dist(rng); 309 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 310 std::vector<float> y_ref(batch_size()); 311 for (size_t iteration = 0; iteration < iterations(); iteration++) { 312 std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); }); 313 if (inplace()) { 314 std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); }); 315 } else { 316 std::fill(y.begin(), y.end(), nanf("")); 317 } 318 const float* a_data = inplace() ? y.data() : a.data(); 319 320 // Compute reference results. 321 for (size_t i = 0; i < batch_size(); i++) { 322 switch (op_type) { 323 case OpType::AddC: 324 y_ref[i] = a_data[i] + b; 325 break; 326 case OpType::DivC: 327 y_ref[i] = a_data[i] / b; 328 break; 329 case OpType::RDivC: 330 y_ref[i] = b / a_data[i]; 331 break; 332 case OpType::MaxC: 333 y_ref[i] = std::max<float>(a_data[i], b); 334 break; 335 case OpType::MinC: 336 y_ref[i] = std::min<float>(a_data[i], b); 337 break; 338 case OpType::MulC: 339 y_ref[i] = a_data[i] * b; 340 break; 341 case OpType::SqrDiffC: 342 { 343 const float diff = a_data[i] - b; 344 y_ref[i] = diff * diff; 345 break; 346 } 347 case OpType::SubC: 348 y_ref[i] = a_data[i] - b; 349 break; 350 case OpType::RSubC: 351 y_ref[i] = b - a_data[i]; 352 break; 353 } 354 } 355 for (size_t i = 0; i < batch_size(); i++) { 356 y_ref[i] = std::max(y_ref[i], 0.0f); 357 } 358 359 // Call optimized micro-kernel. 360 vbinaryc_relu(batch_size() * sizeof(float), a_data, &b, y.data(), nullptr); 361 362 // Verify results. 363 for (size_t i = 0; i < batch_size(); i++) { 364 ASSERT_GE(y[i], 0.0f) 365 << "at " << i << " / " << batch_size(); 366 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 367 << "at " << i << " / " << batch_size(); 368 } 369 } 370 } 371 Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)372 void Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const { 373 std::random_device random_device; 374 auto rng = std::mt19937(random_device()); 375 std::uniform_real_distribution<float> f32dist; 376 377 std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 378 const float b = f32dist(rng); 379 std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); 380 std::vector<float> y_ref(batch_size()); 381 for (size_t iteration = 0; iteration < iterations(); iteration++) { 382 std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); }); 383 if (inplace()) { 384 std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); }); 385 } else { 386 std::fill(y.begin(), y.end(), nanf("")); 387 } 388 const float* a_data = inplace() ? y.data() : a.data(); 389 390 // Compute reference results. 391 for (size_t i = 0; i < batch_size(); i++) { 392 switch (op_type) { 393 case OpType::AddC: 394 y_ref[i] = a_data[i] + b; 395 break; 396 case OpType::DivC: 397 y_ref[i] = a_data[i] / b; 398 break; 399 case OpType::RDivC: 400 y_ref[i] = b / a_data[i]; 401 break; 402 case OpType::MaxC: 403 y_ref[i] = std::max<float>(a_data[i], b); 404 break; 405 case OpType::MinC: 406 y_ref[i] = std::min<float>(a_data[i], b); 407 break; 408 case OpType::MulC: 409 y_ref[i] = a_data[i] * b; 410 break; 411 case OpType::SqrDiffC: 412 { 413 const float diff = a_data[i] - b; 414 y_ref[i] = diff * diff; 415 break; 416 } 417 case OpType::SubC: 418 y_ref[i] = a_data[i] - b; 419 break; 420 case OpType::RSubC: 421 y_ref[i] = b - a_data[i]; 422 break; 423 } 424 } 425 const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend()); 426 const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend()); 427 const float accumulated_range = accumulated_max - accumulated_min; 428 const float y_max = accumulated_range > 0.0f ? 429 (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) : 430 +std::numeric_limits<float>::infinity(); 431 const float y_min = accumulated_range > 0.0f ? 432 (accumulated_min + accumulated_range / 255.0f * float(qmin())) : 433 -std::numeric_limits<float>::infinity(); 434 for (size_t i = 0; i < batch_size(); i++) { 435 y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min); 436 } 437 438 // Prepare parameters. 439 xnn_f32_minmax_params params; 440 init_params(¶ms, y_min, y_max); 441 442 // Call optimized micro-kernel. 443 vbinaryc_minmax(batch_size() * sizeof(float), a_data, &b, y.data(), ¶ms); 444 445 // Verify results. 446 for (size_t i = 0; i < batch_size(); i++) { 447 ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f) 448 << "at " << i << " / " << batch_size(); 449 } 450 } 451 } 452 453 private: 454 size_t batch_size_{1}; 455 bool inplace_{false}; 456 uint8_t qmin_{0}; 457 uint8_t qmax_{255}; 458 size_t iterations_{15}; 459 }; 460