1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #pragma once
10
11 #include <gtest/gtest.h>
12
13 #include <algorithm>
14 #include <cassert>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdlib>
18 #include <limits>
19 #include <random>
20 #include <vector>
21
22 #include <fp16.h>
23
24 #include <xnnpack.h>
25
flush_fp16_denormal_to_zero(uint16_t v)26 static uint16_t flush_fp16_denormal_to_zero(uint16_t v) {
27 return (v & UINT16_C(0x7C00)) == 0 ? v & UINT16_C(0x8000) : v;
28 };
29
30
31 class LeakyReLUOperatorTester {
32 public:
channels(size_t channels)33 inline LeakyReLUOperatorTester& channels(size_t channels) {
34 assert(channels != 0);
35 this->channels_ = channels;
36 return *this;
37 }
38
channels()39 inline size_t channels() const {
40 return this->channels_;
41 }
42
input_stride(size_t input_stride)43 inline LeakyReLUOperatorTester& input_stride(size_t input_stride) {
44 assert(input_stride != 0);
45 this->input_stride_ = input_stride;
46 return *this;
47 }
48
input_stride()49 inline size_t input_stride() const {
50 if (this->input_stride_ == 0) {
51 return this->channels_;
52 } else {
53 assert(this->input_stride_ >= this->channels_);
54 return this->input_stride_;
55 }
56 }
57
output_stride(size_t output_stride)58 inline LeakyReLUOperatorTester& output_stride(size_t output_stride) {
59 assert(output_stride != 0);
60 this->output_stride_ = output_stride;
61 return *this;
62 }
63
output_stride()64 inline size_t output_stride() const {
65 if (this->output_stride_ == 0) {
66 return this->channels_;
67 } else {
68 assert(this->output_stride_ >= this->channels_);
69 return this->output_stride_;
70 }
71 }
72
batch_size(size_t batch_size)73 inline LeakyReLUOperatorTester& batch_size(size_t batch_size) {
74 assert(batch_size != 0);
75 this->batch_size_ = batch_size;
76 return *this;
77 }
78
batch_size()79 inline size_t batch_size() const {
80 return this->batch_size_;
81 }
82
negative_slope(float negative_slope)83 inline LeakyReLUOperatorTester& negative_slope(float negative_slope) {
84 assert(std::isnormal(negative_slope));
85 this->negative_slope_ = negative_slope;
86 return *this;
87 }
88
negative_slope()89 inline float negative_slope() const {
90 return this->negative_slope_;
91 }
92
input_scale(float input_scale)93 inline LeakyReLUOperatorTester& input_scale(float input_scale) {
94 assert(input_scale > 0.0f);
95 assert(std::isnormal(input_scale));
96 this->input_scale_ = input_scale;
97 return *this;
98 }
99
input_scale()100 inline float input_scale() const {
101 return this->input_scale_;
102 }
103
input_zero_point(int16_t input_zero_point)104 inline LeakyReLUOperatorTester& input_zero_point(int16_t input_zero_point) {
105 this->input_zero_point_ = input_zero_point;
106 return *this;
107 }
108
input_zero_point()109 inline int16_t input_zero_point() const {
110 return this->input_zero_point_;
111 }
112
output_scale(float output_scale)113 inline LeakyReLUOperatorTester& output_scale(float output_scale) {
114 assert(output_scale > 0.0f);
115 assert(std::isnormal(output_scale));
116 this->output_scale_ = output_scale;
117 return *this;
118 }
119
output_scale()120 inline float output_scale() const {
121 return this->output_scale_;
122 }
123
output_zero_point(int16_t output_zero_point)124 inline LeakyReLUOperatorTester& output_zero_point(int16_t output_zero_point) {
125 this->output_zero_point_ = output_zero_point;
126 return *this;
127 }
128
output_zero_point()129 inline int16_t output_zero_point() const {
130 return this->output_zero_point_;
131 }
132
iterations(size_t iterations)133 inline LeakyReLUOperatorTester& iterations(size_t iterations) {
134 this->iterations_ = iterations;
135 return *this;
136 }
137
iterations()138 inline size_t iterations() const {
139 return this->iterations_;
140 }
141
TestF16()142 void TestF16() const {
143 std::random_device random_device;
144 auto rng = std::mt19937(random_device());
145 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
146
147 std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + (batch_size() - 1) * input_stride() + channels());
148 std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels());
149 std::vector<float> output_ref(batch_size() * channels());
150 for (size_t iteration = 0; iteration < iterations(); iteration++) {
151 std::generate(input.begin(), input.end(), [&]() {
152 return flush_fp16_denormal_to_zero(fp16_ieee_from_fp32_value(f32dist(rng)));
153 });
154 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
155 const uint16_t negative_slope_as_half = fp16_ieee_from_fp32_value(negative_slope());
156 const float negative_slope_as_float = fp16_ieee_to_fp32_value(negative_slope_as_half);
157
158 // Compute reference results.
159 for (size_t i = 0; i < batch_size(); i++) {
160 for (size_t c = 0; c < channels(); c++) {
161 const float x = fp16_ieee_to_fp32_value(input[i * input_stride() + c]);
162 const float y = std::signbit(x) ? x * negative_slope_as_float : x;
163 output_ref[i * channels() + c] = y;
164 }
165 }
166
167 // Create, setup, run, and destroy Leaky ReLU operator.
168 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
169 xnn_operator_t leaky_relu_op = nullptr;
170
171 const xnn_status status = xnn_create_leaky_relu_nc_f16(
172 channels(), input_stride(), output_stride(),
173 negative_slope(),
174 0, &leaky_relu_op);
175 if (status == xnn_status_unsupported_hardware) {
176 GTEST_SKIP();
177 }
178 ASSERT_EQ(xnn_status_success, status);
179 ASSERT_NE(nullptr, leaky_relu_op);
180
181 // Smart pointer to automatically delete leaky_relu_op.
182 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
183
184 ASSERT_EQ(xnn_status_success,
185 xnn_setup_leaky_relu_nc_f16(
186 leaky_relu_op,
187 batch_size(),
188 input.data(), output.data(),
189 nullptr /* thread pool */));
190
191 ASSERT_EQ(xnn_status_success,
192 xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
193
194 // Verify results.
195 for (size_t i = 0; i < batch_size(); i++) {
196 for (size_t c = 0; c < channels(); c++) {
197 ASSERT_NEAR(
198 fp16_ieee_to_fp32_value(output[i * output_stride() + c]),
199 output_ref[i * channels() + c],
200 std::max(2.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-3f))
201 << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
202 }
203 }
204 }
205 }
206
TestF32()207 void TestF32() const {
208 std::random_device random_device;
209 auto rng = std::mt19937(random_device());
210 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
211
212 std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (batch_size() - 1) * input_stride() + channels());
213 std::vector<float> output((batch_size() - 1) * output_stride() + channels());
214 std::vector<float> output_ref(batch_size() * channels());
215 for (size_t iteration = 0; iteration < iterations(); iteration++) {
216 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
217 std::fill(output.begin(), output.end(), std::nanf(""));
218
219 // Compute reference results.
220 for (size_t i = 0; i < batch_size(); i++) {
221 for (size_t c = 0; c < channels(); c++) {
222 const float x = input[i * input_stride() + c];
223 const float y = std::signbit(x) ? x * negative_slope() : x;
224 output_ref[i * channels() + c] = y;
225 }
226 }
227
228 // Create, setup, run, and destroy Leaky ReLU operator.
229 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
230 xnn_operator_t leaky_relu_op = nullptr;
231
232 ASSERT_EQ(xnn_status_success,
233 xnn_create_leaky_relu_nc_f32(
234 channels(), input_stride(), output_stride(),
235 negative_slope(),
236 0, &leaky_relu_op));
237 ASSERT_NE(nullptr, leaky_relu_op);
238
239 // Smart pointer to automatically delete leaky_relu_op.
240 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
241
242 ASSERT_EQ(xnn_status_success,
243 xnn_setup_leaky_relu_nc_f32(
244 leaky_relu_op,
245 batch_size(),
246 input.data(), output.data(),
247 nullptr /* thread pool */));
248
249 ASSERT_EQ(xnn_status_success,
250 xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
251
252 // Verify results.
253 for (size_t i = 0; i < batch_size(); i++) {
254 for (size_t c = 0; c < channels(); c++) {
255 ASSERT_EQ(output[i * output_stride() + c], output_ref[i * channels() + c])
256 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
257 << ", input " << input[i * input_stride() + c] << ", negative slope " << negative_slope();
258 }
259 }
260 }
261 }
262
TestQS8()263 void TestQS8() const {
264 ASSERT_GE(input_zero_point(), std::numeric_limits<int8_t>::min());
265 ASSERT_LE(input_zero_point(), std::numeric_limits<int8_t>::max());
266 ASSERT_GE(output_zero_point(), std::numeric_limits<int8_t>::min());
267 ASSERT_LE(output_zero_point(), std::numeric_limits<int8_t>::max());
268
269 std::random_device random_device;
270 auto rng = std::mt19937(random_device());
271 std::uniform_int_distribution<int32_t> i8dist(
272 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
273
274 std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + (batch_size() - 1) * input_stride() + channels());
275 std::vector<int8_t> output((batch_size() - 1) * output_stride() + channels());
276 std::vector<float> output_ref(batch_size() * channels());
277 for (size_t iteration = 0; iteration < iterations(); iteration++) {
278 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
279 std::fill(output.begin(), output.end(), INT8_C(0xA5));
280
281 // Compute reference results.
282 for (size_t i = 0; i < batch_size(); i++) {
283 for (size_t c = 0; c < channels(); c++) {
284 const float x = input_scale() * (int32_t(input[i * input_stride() + c]) - input_zero_point());
285 float y = (x < 0.0f ? x * negative_slope() : x) / output_scale() + float(output_zero_point());
286 y = std::max<float>(y, std::numeric_limits<int8_t>::min());
287 y = std::min<float>(y, std::numeric_limits<int8_t>::max());
288 output_ref[i * channels() + c] = y;
289 }
290 }
291
292 // Create, setup, run, and destroy Leaky ReLU operator.
293 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
294 xnn_operator_t leaky_relu_op = nullptr;
295
296 ASSERT_EQ(xnn_status_success,
297 xnn_create_leaky_relu_nc_qs8(
298 channels(), input_stride(), output_stride(),
299 negative_slope(),
300 input_zero_point(), input_scale(),
301 output_zero_point(), output_scale(),
302 0, &leaky_relu_op));
303 ASSERT_NE(nullptr, leaky_relu_op);
304
305 // Smart pointer to automatically delete leaky_relu_op.
306 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
307
308 ASSERT_EQ(xnn_status_success,
309 xnn_setup_leaky_relu_nc_qs8(
310 leaky_relu_op,
311 batch_size(),
312 input.data(), output.data(),
313 nullptr /* thread pool */));
314
315 ASSERT_EQ(xnn_status_success,
316 xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
317
318 // Verify results.
319 for (size_t i = 0; i < batch_size(); i++) {
320 for (size_t c = 0; c < channels(); c++) {
321 ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.9f)
322 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
323 << ", input " << int32_t(input[i * input_stride() + c])
324 << ", input zero point " << input_zero_point() << ", output zero point " << output_zero_point()
325 << ", positive input-to-output ratio " << (input_scale() / output_scale())
326 << ", negative input-to-output ratio " << (input_scale() / output_scale() * negative_slope());
327 }
328 }
329 }
330 }
331
TestQU8()332 void TestQU8() const {
333 ASSERT_GE(input_zero_point(), std::numeric_limits<uint8_t>::min());
334 ASSERT_LE(input_zero_point(), std::numeric_limits<uint8_t>::max());
335 ASSERT_GE(output_zero_point(), std::numeric_limits<uint8_t>::min());
336 ASSERT_LE(output_zero_point(), std::numeric_limits<uint8_t>::max());
337
338 std::random_device random_device;
339 auto rng = std::mt19937(random_device());
340 std::uniform_int_distribution<int32_t> u8dist(
341 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
342
343 std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + (batch_size() - 1) * input_stride() + channels());
344 std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels());
345 std::vector<float> output_ref(batch_size() * channels());
346 for (size_t iteration = 0; iteration < iterations(); iteration++) {
347 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
348 std::fill(output.begin(), output.end(), UINT8_C(0xA5));
349
350 // Compute reference results.
351 for (size_t i = 0; i < batch_size(); i++) {
352 for (size_t c = 0; c < channels(); c++) {
353 const float x = input_scale() * (int32_t(input[i * input_stride() + c]) - input_zero_point());
354 float y = (x < 0.0f ? x * negative_slope() : x) / output_scale() + float(output_zero_point());
355 y = std::max<float>(y, std::numeric_limits<uint8_t>::min());
356 y = std::min<float>(y, std::numeric_limits<uint8_t>::max());
357 output_ref[i * channels() + c] = y;
358 }
359 }
360
361 // Create, setup, run, and destroy Leaky ReLU operator.
362 ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
363 xnn_operator_t leaky_relu_op = nullptr;
364
365 ASSERT_EQ(xnn_status_success,
366 xnn_create_leaky_relu_nc_qu8(
367 channels(), input_stride(), output_stride(),
368 negative_slope(),
369 input_zero_point(), input_scale(),
370 output_zero_point(), output_scale(),
371 0, &leaky_relu_op));
372 ASSERT_NE(nullptr, leaky_relu_op);
373
374 // Smart pointer to automatically delete leaky_relu_op.
375 std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
376
377 ASSERT_EQ(xnn_status_success,
378 xnn_setup_leaky_relu_nc_qu8(
379 leaky_relu_op,
380 batch_size(),
381 input.data(), output.data(),
382 nullptr /* thread pool */));
383
384 ASSERT_EQ(xnn_status_success,
385 xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
386
387 // Verify results.
388 for (size_t i = 0; i < batch_size(); i++) {
389 for (size_t c = 0; c < channels(); c++) {
390 ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.9f)
391 << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
392 << ", input " << int32_t(input[i * input_stride() + c])
393 << ", input zero point " << input_zero_point() << ", output zero point " << output_zero_point()
394 << ", positive input-to-output ratio " << (input_scale() / output_scale())
395 << ", negative input-to-output ratio " << (input_scale() / output_scale() * negative_slope());
396 }
397 }
398 }
399 }
400
401 private:
402 size_t batch_size_{1};
403 size_t channels_{1};
404 size_t input_stride_{0};
405 size_t output_stride_{0};
406 float negative_slope_{0.3f};
407 float output_scale_{0.75f};
408 int16_t output_zero_point_{53};
409 float input_scale_{1.25f};
410 int16_t input_zero_point_{41};
411 size_t iterations_{15};
412 };
413