1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <algorithm> 14 #include <cassert> 15 #include <cmath> 16 #include <cstddef> 17 #include <cstdlib> 18 #include <limits> 19 #include <random> 20 #include <vector> 21 22 #include <fp16.h> 23 24 #include <xnnpack.h> 25 26 27 class SoftMaxOperatorTester { 28 public: channels(size_t channels)29 inline SoftMaxOperatorTester& channels(size_t channels) { 30 assert(channels != 0); 31 this->channels_ = channels; 32 return *this; 33 } 34 channels()35 inline size_t channels() const { 36 return this->channels_; 37 } 38 input_stride(size_t input_stride)39 inline SoftMaxOperatorTester& input_stride(size_t input_stride) { 40 assert(input_stride != 0); 41 this->input_stride_ = input_stride; 42 return *this; 43 } 44 input_stride()45 inline size_t input_stride() const { 46 if (this->input_stride_ == 0) { 47 return this->channels_; 48 } else { 49 assert(this->input_stride_ >= this->channels_); 50 return this->input_stride_; 51 } 52 } 53 output_stride(size_t output_stride)54 inline SoftMaxOperatorTester& output_stride(size_t output_stride) { 55 assert(output_stride != 0); 56 this->output_stride_ = output_stride; 57 return *this; 58 } 59 output_stride()60 inline size_t output_stride() const { 61 if (this->output_stride_ == 0) { 62 return this->channels_; 63 } else { 64 assert(this->output_stride_ >= this->channels_); 65 return this->output_stride_; 66 } 67 } 68 batch_size(size_t batch_size)69 inline SoftMaxOperatorTester& batch_size(size_t batch_size) { 70 assert(batch_size != 0); 71 this->batch_size_ = batch_size; 72 return *this; 73 } 74 batch_size()75 inline size_t batch_size() const { 76 return this->batch_size_; 77 } 78 input_scale(float input_scale)79 inline SoftMaxOperatorTester& input_scale(float input_scale) { 80 assert(input_scale > 0.0f); 81 assert(std::isnormal(input_scale)); 82 this->input_scale_ = input_scale; 83 return *this; 84 } 85 input_scale()86 inline float input_scale() const { 87 return this->input_scale_; 88 } 89 input_zero_point(uint8_t input_zero_point)90 inline SoftMaxOperatorTester& input_zero_point(uint8_t input_zero_point) { 91 this->input_zero_point_ = input_zero_point; 92 return *this; 93 } 94 input_zero_point()95 inline uint8_t input_zero_point() const { 96 return this->input_zero_point_; 97 } 98 output_scale()99 inline float output_scale() const { 100 return 1.0f / 256.0f; 101 } 102 output_zero_point()103 inline uint8_t output_zero_point() const { 104 return 0; 105 } 106 iterations(size_t iterations)107 inline SoftMaxOperatorTester& iterations(size_t iterations) { 108 this->iterations_ = iterations; 109 return *this; 110 } 111 iterations()112 inline size_t iterations() const { 113 return this->iterations_; 114 } 115 TestF16()116 void TestF16() const { 117 std::random_device random_device; 118 auto rng = std::mt19937(random_device()); 119 // Choose such range that exph(x[i]) overflows, but exph(x[i] - x_max) doesn't. 120 // However, the range is still narrow enough that single-precision exp doesn't overflow. 121 std::uniform_real_distribution<float> f32dist(15.0f, 20.0f); 122 123 std::vector<uint16_t> input((batch_size() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 124 std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 125 std::vector<float> output_ref(batch_size() * channels()); 126 for (size_t iteration = 0; iteration < iterations(); iteration++) { 127 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 128 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 129 130 // Compute reference results. 131 for (size_t i = 0; i < batch_size(); i++) { 132 float sum_exp = 0.0; 133 for (size_t c = 0; c < channels(); c++) { 134 sum_exp += std::exp(fp16_ieee_to_fp32_value(input[i * input_stride() + c])); 135 } 136 for (size_t c = 0; c < channels(); c++) { 137 output_ref[i * channels() + c] = std::exp(fp16_ieee_to_fp32_value(input[i * input_stride() + c])) / sum_exp; 138 } 139 } 140 141 // Create, setup, run, and destroy SoftMax operator. 142 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 143 xnn_operator_t softmax_op = nullptr; 144 145 const xnn_status status = xnn_create_softmax_nc_f16( 146 channels(), input_stride(), output_stride(), 147 0, &softmax_op); 148 if (status == xnn_status_unsupported_hardware) { 149 GTEST_SKIP(); 150 } 151 ASSERT_EQ(xnn_status_success, status); 152 ASSERT_NE(nullptr, softmax_op); 153 154 // Smart pointer to automatically delete softmax_op. 155 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_softmax_op(softmax_op, xnn_delete_operator); 156 157 ASSERT_EQ(xnn_status_success, 158 xnn_setup_softmax_nc_f16( 159 softmax_op, 160 batch_size(), 161 input.data(), output.data(), 162 nullptr /* thread pool */)); 163 164 ASSERT_EQ(xnn_status_success, 165 xnn_run_operator(softmax_op, nullptr /* thread pool */)); 166 167 // Verify results. 168 for (size_t i = 0; i < batch_size(); i++) { 169 for (size_t c = 0; c < channels(); c++) { 170 ASSERT_NEAR( 171 fp16_ieee_to_fp32_value(output[i * output_stride() + c]), 172 output_ref[i * channels() + c], 173 std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 5.0e-3f)) 174 << "element " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 175 } 176 } 177 } 178 } 179 TestF32()180 void TestF32() const { 181 std::random_device random_device; 182 auto rng = std::mt19937(random_device()); 183 // Choose such range that expf(x[i]) overflows, but expf(x[i] - x_max) doesn't. 184 // However, the range is still narrow enough that single-precision exp doesn't overflow. 185 std::uniform_real_distribution<float> f32dist(90.0f, 100.0f); 186 187 std::vector<float> input((batch_size() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 188 std::vector<float> output((batch_size() - 1) * output_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float)); 189 std::vector<double> output_ref(batch_size() * channels()); 190 for (size_t iteration = 0; iteration < iterations(); iteration++) { 191 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 192 std::fill(output.begin(), output.end(), std::nanf("")); 193 194 // Compute reference results. 195 for (size_t i = 0; i < batch_size(); i++) { 196 double sum_exp = 0.0; 197 for (size_t c = 0; c < channels(); c++) { 198 sum_exp += std::exp(double(input[i * input_stride() + c])); 199 } 200 for (size_t c = 0; c < channels(); c++) { 201 output_ref[i * channels() + c] = std::exp(double(input[i * input_stride() + c])) / sum_exp; 202 } 203 } 204 205 // Create, setup, run, and destroy SoftMax operator. 206 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 207 xnn_operator_t softmax_op = nullptr; 208 209 ASSERT_EQ(xnn_status_success, 210 xnn_create_softmax_nc_f32( 211 channels(), input_stride(), output_stride(), 212 0, &softmax_op)); 213 ASSERT_NE(nullptr, softmax_op); 214 215 // Smart pointer to automatically delete softmax_op. 216 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_softmax_op(softmax_op, xnn_delete_operator); 217 218 ASSERT_EQ(xnn_status_success, 219 xnn_setup_softmax_nc_f32( 220 softmax_op, 221 batch_size(), 222 input.data(), output.data(), 223 nullptr /* thread pool */)); 224 225 ASSERT_EQ(xnn_status_success, 226 xnn_run_operator(softmax_op, nullptr /* thread pool */)); 227 228 // Verify results. 229 for (size_t i = 0; i < batch_size(); i++) { 230 for (size_t c = 0; c < channels(); c++) { 231 ASSERT_NEAR( 232 double(output[i * output_stride() + c]), 233 output_ref[i * channels() + c], 234 output_ref[i * channels() + c] * 1.0e-5) 235 << "element " << i << " / " << batch_size() << ", channel " << c << " / " << channels(); 236 } 237 } 238 } 239 } 240 TestQU8()241 void TestQU8() const { 242 std::random_device random_device; 243 auto rng = std::mt19937(random_device()); 244 std::uniform_int_distribution<int32_t> u8dist( 245 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 246 247 std::vector<uint8_t> input((batch_size() - 1) * input_stride() + channels()); 248 std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels()); 249 std::vector<float> output_ref(batch_size() * channels()); 250 for (size_t iteration = 0; iteration < iterations(); iteration++) { 251 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 252 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 253 254 // Compute reference results. 255 for (size_t i = 0; i < batch_size(); i++) { 256 const int32_t max_input = *std::max_element( 257 input.data() + i * input_stride(), 258 input.data() + i * input_stride() + channels()); 259 float sum_exp = 0.0f; 260 for (size_t c = 0; c < channels(); c++) { 261 sum_exp += 262 std::exp((int32_t(input[i * input_stride() + c]) - max_input) * 263 input_scale()); 264 } 265 for (size_t c = 0; c < channels(); c++) { 266 output_ref[i * channels() + c] = 267 std::exp((int32_t(input[i * input_stride() + c]) - max_input) * 268 input_scale()) / 269 (sum_exp * output_scale()); 270 output_ref[i * channels() + c] = std::min(output_ref[i * channels() + c], 255.0f); 271 } 272 } 273 274 // Create, setup, run, and destroy SoftMax operator. 275 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */)); 276 xnn_operator_t softmax_op = nullptr; 277 278 ASSERT_EQ(xnn_status_success, 279 xnn_create_softmax_nc_qu8( 280 channels(), input_stride(), output_stride(), 281 input_scale(), 282 output_zero_point(), output_scale(), 283 0, &softmax_op)); 284 ASSERT_NE(nullptr, softmax_op); 285 286 // Smart pointer to automatically delete softmax_op. 287 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_softmax_op(softmax_op, xnn_delete_operator); 288 289 ASSERT_EQ(xnn_status_success, 290 xnn_setup_softmax_nc_qu8( 291 softmax_op, 292 batch_size(), 293 input.data(), output.data(), 294 nullptr /* thread pool */)); 295 296 ASSERT_EQ(xnn_status_success, 297 xnn_run_operator(softmax_op, nullptr /* thread pool */)); 298 299 // Verify results. 300 for (size_t i = 0; i < batch_size(); i++) { 301 for (size_t c = 0; c < channels(); c++) { 302 ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.6f); 303 } 304 } 305 } 306 } 307 308 private: 309 size_t batch_size_{1}; 310 size_t channels_{1}; 311 size_t input_stride_{0}; 312 size_t output_stride_{0}; 313 float input_scale_{0.176080093}; 314 uint8_t input_zero_point_{121}; 315 size_t iterations_{15}; 316 }; 317