1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cstddef> 16 #include <cstdlib> 17 #include <limits> 18 #include <random> 19 #include <vector> 20 21 #include <fp16.h> 22 23 #include <xnnpack.h> 24 25 26 class ClampOperatorTester { 27 public: channels(size_t channels)28 inline ClampOperatorTester& channels(size_t channels) { 29 assert(channels != 0); 30 this->channels_ = channels; 31 return *this; 32 } 33 channels()34 inline size_t channels() const { 35 return this->channels_; 36 } 37 input_stride(size_t input_stride)38 inline ClampOperatorTester& input_stride(size_t input_stride) { 39 assert(input_stride != 0); 40 this->input_stride_ = input_stride; 41 return *this; 42 } 43 input_stride()44 inline size_t input_stride() const { 45 if (this->input_stride_ == 0) { 46 return this->channels_; 47 } else { 48 assert(this->input_stride_ >= this->channels_); 49 return this->input_stride_; 50 } 51 } 52 output_stride(size_t output_stride)53 inline ClampOperatorTester& output_stride(size_t output_stride) { 54 assert(output_stride != 0); 55 this->output_stride_ = output_stride; 56 return *this; 57 } 58 output_stride()59 inline size_t output_stride() const { 60 if (this->output_stride_ == 0) { 61 return this->channels_; 62 } else { 63 assert(this->output_stride_ >= this->channels_); 64 return this->output_stride_; 65 } 66 } 67 batch_size(size_t batch_size)68 inline ClampOperatorTester& batch_size(size_t batch_size) { 69 assert(batch_size != 0); 70 this->batch_size_ = batch_size; 71 return *this; 72 } 73 batch_size()74 inline size_t batch_size() const { 75 return this->batch_size_; 76 } 77 qmin(int16_t qmin)78 inline ClampOperatorTester& qmin(int16_t qmin) { 79 this->qmin_ = qmin; 80 return *this; 81 } 82 qmin()83 inline int16_t qmin() const { 84 return this->qmin_; 85 } 86 qmax(int16_t qmax)87 inline ClampOperatorTester& qmax(int16_t qmax) { 88 this->qmax_ = qmax; 89 return *this; 90 } 91 qmax()92 inline int16_t qmax() const { 93 return this->qmax_; 94 } 95 relu_activation(bool relu_activation)96 inline ClampOperatorTester& relu_activation(bool relu_activation) { 97 this->relu_activation_ = relu_activation; 98 return *this; 99 } 100 relu_activation()101 inline bool relu_activation() const { 102 return this->relu_activation_; 103 } 104 iterations(size_t iterations)105 inline ClampOperatorTester& iterations(size_t iterations) { 106 this->iterations_ = iterations; 107 return *this; 108 } 109 iterations()110 inline size_t iterations() const { 111 return this->iterations_; 112 } 113 TestF16()114 void TestF16() const { 115 ASSERT_LT(qmin(), qmax()); 116 ASSERT_FALSE(relu_activation()); 117 118 std::random_device random_device; 119 auto rng = std::mt19937(random_device()); 120 std::uniform_real_distribution<float> f32dist( 121 std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()); 122 123 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + 124 (batch_size() - 1) * input_stride() + channels()); 125 std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels()); 126 std::vector<float> output_ref(batch_size() * channels()); 127 for (size_t iteration = 0; iteration < iterations(); iteration++) { 128 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 129 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 130 131 // Compute reference results. 132 const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(float(qmin()))); 133 const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(float(qmax()))); 134 for (size_t i = 0; i < batch_size(); i++) { 135 for (size_t c = 0; c < channels(); c++) { 136 const float x = fp16_ieee_to_fp32_value(input[i * input_stride() + c]); 137 const float y = relu_activation() ? std::max(x, 0.f) : std::min(std::max(x, output_min), output_max); 138 output_ref[i * channels() + c] = y; 139 } 140 } 141 142 // Create, setup, run, and destroy Clamp operator. 143 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 144 xnn_operator_t clamp_op = nullptr; 145 146 const xnn_status status = xnn_create_clamp_nc_f16( 147 channels(), input_stride(), output_stride(), 148 output_min, output_max, 149 0, &clamp_op); 150 if (status == xnn_status_unsupported_hardware) { 151 GTEST_SKIP(); 152 } 153 ASSERT_EQ(xnn_status_success, status); 154 ASSERT_NE(nullptr, clamp_op); 155 156 // Smart pointer to automatically delete clamp_op. 157 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator); 158 159 ASSERT_EQ(xnn_status_success, 160 xnn_setup_clamp_nc_f16( 161 clamp_op, 162 batch_size(), 163 input.data(), output.data(), 164 nullptr /* thread pool */)); 165 166 ASSERT_EQ(xnn_status_success, 167 xnn_run_operator(clamp_op, nullptr /* thread pool */)); 168 169 // Verify results. 170 for (size_t i = 0; i < batch_size(); i++) { 171 for (size_t c = 0; c < channels(); c++) { 172 ASSERT_LE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_max) 173 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 174 ASSERT_GE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_min) 175 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 176 ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f)) 177 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels() 178 << ", min " << output_min << ", max " << output_max; 179 } 180 } 181 } 182 } 183 TestF32()184 void TestF32() const { 185 ASSERT_LT(qmin(), qmax()); 186 187 std::random_device random_device; 188 auto rng = std::mt19937(random_device()); 189 std::uniform_real_distribution<float> f32dist( 190 std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max()); 191 192 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + 193 (batch_size() - 1) * input_stride() + channels()); 194 std::vector<float> output((batch_size() - 1) * output_stride() + channels()); 195 std::vector<float> output_ref(batch_size() * channels()); 196 for (size_t iteration = 0; iteration < iterations(); iteration++) { 197 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 198 std::fill(output.begin(), output.end(), std::nanf("")); 199 200 // Compute reference results. 201 for (size_t i = 0; i < batch_size(); i++) { 202 for (size_t c = 0; c < channels(); c++) { 203 const float x = input[i * input_stride() + c]; 204 const float y = relu_activation() ? std::max(x, 0.f) : 205 std::min(std::max(x, float(qmin())), float(qmax())); 206 output_ref[i * channels() + c] = y; 207 } 208 } 209 210 // Create, setup, run, and destroy Clamp operator. 211 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 212 xnn_operator_t clamp_op = nullptr; 213 214 const float output_min = relu_activation() ? 0.0f : float(qmin()); 215 const float output_max = relu_activation() ? std::numeric_limits<float>::infinity() : float(qmax()); 216 ASSERT_EQ(xnn_status_success, 217 xnn_create_clamp_nc_f32( 218 channels(), input_stride(), output_stride(), 219 output_min, output_max, 220 0, &clamp_op)); 221 ASSERT_NE(nullptr, clamp_op); 222 223 // Smart pointer to automatically delete clamp_op. 224 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator); 225 226 ASSERT_EQ(xnn_status_success, 227 xnn_setup_clamp_nc_f32( 228 clamp_op, 229 batch_size(), 230 input.data(), output.data(), 231 nullptr /* thread pool */)); 232 233 ASSERT_EQ(xnn_status_success, 234 xnn_run_operator(clamp_op, nullptr /* thread pool */)); 235 236 // Verify results. 237 for (size_t i = 0; i < batch_size(); i++) { 238 for (size_t c = 0; c < channels(); c++) { 239 ASSERT_LE(output[i * output_stride() + c], output_max) 240 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 241 ASSERT_GE(output[i * output_stride() + c], output_min) 242 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 243 ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c]) 244 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels() 245 << ", min " << output_min << ", max " << output_max; 246 } 247 } 248 } 249 } 250 TestS8()251 void TestS8() const { 252 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 253 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 254 ASSERT_LT(qmin(), qmax()); 255 256 std::random_device random_device; 257 auto rng = std::mt19937(random_device()); 258 std::uniform_int_distribution<int32_t> i8dist( 259 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 260 261 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + 262 (batch_size() - 1) * input_stride() + channels()); 263 std::vector<int8_t> output((batch_size() - 1) * output_stride() + channels()); 264 std::vector<int8_t> output_ref(batch_size() * channels()); 265 for (size_t iteration = 0; iteration < iterations(); iteration++) { 266 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 267 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 268 269 // Compute reference results. 270 for (size_t i = 0; i < batch_size(); i++) { 271 for (size_t c = 0; c < channels(); c++) { 272 const int8_t x = input[i * input_stride() + c]; 273 const int8_t y = std::min(std::max(x, int8_t(qmin())), int8_t(qmax())); 274 output_ref[i * channels() + c] = y; 275 } 276 } 277 278 // Create, setup, run, and destroy Clamp operator. 279 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 280 xnn_operator_t clamp_op = nullptr; 281 282 ASSERT_EQ(xnn_status_success, 283 xnn_create_clamp_nc_s8( 284 channels(), input_stride(), output_stride(), 285 int8_t(qmin()), int8_t(qmax()), 286 0, &clamp_op)); 287 ASSERT_NE(nullptr, clamp_op); 288 289 // Smart pointer to automatically delete clamp_op. 290 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator); 291 292 ASSERT_EQ(xnn_status_success, 293 xnn_setup_clamp_nc_s8( 294 clamp_op, 295 batch_size(), 296 input.data(), output.data(), 297 nullptr /* thread pool */)); 298 299 ASSERT_EQ(xnn_status_success, 300 xnn_run_operator(clamp_op, nullptr /* thread pool */)); 301 302 // Verify results . 303 for (size_t i = 0; i < batch_size(); i++) { 304 for (size_t c = 0; c < channels(); c++) { 305 ASSERT_LE(int16_t(output[i * output_stride() + c]), qmax()) 306 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 307 ASSERT_GE(int16_t(output[i * output_stride() + c]), qmin()) 308 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 309 ASSERT_EQ(int16_t(output[i * output_stride() + c]), int16_t(output_ref[i * channels() + c])) 310 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels() 311 << ", min " << qmin() << ", max " << qmax(); 312 } 313 } 314 } 315 } 316 TestU8()317 void TestU8() const { 318 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 319 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 320 ASSERT_LT(qmin(), qmax()); 321 322 std::random_device random_device; 323 auto rng = std::mt19937(random_device()); 324 std::uniform_int_distribution<int32_t> u8dist( 325 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 326 327 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + 328 (batch_size() - 1) * input_stride() + channels()); 329 std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels()); 330 std::vector<uint8_t> output_ref(batch_size() * channels()); 331 for (size_t iteration = 0; iteration < iterations(); iteration++) { 332 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 333 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 334 335 // Compute reference results. 336 for (size_t i = 0; i < batch_size(); i++) { 337 for (size_t c = 0; c < channels(); c++) { 338 const uint8_t x = input[i * input_stride() + c]; 339 const uint8_t y = std::min(std::max(x, uint8_t(qmin())), uint8_t(qmax())); 340 output_ref[i * channels() + c] = y; 341 } 342 } 343 344 // Create, setup, run, and destroy Clamp operator. 345 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 346 xnn_operator_t clamp_op = nullptr; 347 348 ASSERT_EQ(xnn_status_success, 349 xnn_create_clamp_nc_u8( 350 channels(), input_stride(), output_stride(), 351 uint8_t(qmin()), uint8_t(qmax()), 352 0, &clamp_op)); 353 ASSERT_NE(nullptr, clamp_op); 354 355 // Smart pointer to automatically delete clamp_op. 356 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator); 357 358 ASSERT_EQ(xnn_status_success, 359 xnn_setup_clamp_nc_u8( 360 clamp_op, 361 batch_size(), 362 input.data(), output.data(), 363 nullptr /* thread pool */)); 364 365 ASSERT_EQ(xnn_status_success, 366 xnn_run_operator(clamp_op, nullptr /* thread pool */)); 367 368 // Verify results . 369 for (size_t i = 0; i < batch_size(); i++) { 370 for (size_t c = 0; c < channels(); c++) { 371 ASSERT_LE(int16_t(output[i * output_stride() + c]), qmax()) 372 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 373 ASSERT_GE(int16_t(output[i * output_stride() + c]), qmin()) 374 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 375 ASSERT_EQ(int16_t(output[i * output_stride() + c]), int16_t(output_ref[i * channels() + c])) 376 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels() 377 << ", min " << qmin() << ", max " << qmax(); 378 } 379 } 380 } 381 } 382 383 private: 384 size_t batch_size_{1}; 385 size_t channels_{1}; 386 size_t input_stride_{0}; 387 size_t output_stride_{0}; 388 int16_t qmin_{std::numeric_limits<int16_t>::min()}; 389 int16_t qmax_{std::numeric_limits<int16_t>::max()}; 390 bool relu_activation_{false}; 391 size_t iterations_{15}; 392 }; 393