xref: /aosp_15_r20/external/XNNPACK/test/leaky-relu-operator-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdlib>
18 #include <limits>
19 #include <random>
20 #include <vector>
21 
22 #include <fp16.h>
23 
24 #include <xnnpack.h>
25 
flush_fp16_denormal_to_zero(uint16_t v)26 static uint16_t flush_fp16_denormal_to_zero(uint16_t v) {
27   return (v & UINT16_C(0x7C00)) == 0 ? v & UINT16_C(0x8000) : v;
28 };
29 
30 
31 class LeakyReLUOperatorTester {
32  public:
channels(size_t channels)33   inline LeakyReLUOperatorTester& channels(size_t channels) {
34     assert(channels != 0);
35     this->channels_ = channels;
36     return *this;
37   }
38 
channels()39   inline size_t channels() const {
40     return this->channels_;
41   }
42 
input_stride(size_t input_stride)43   inline LeakyReLUOperatorTester& input_stride(size_t input_stride) {
44     assert(input_stride != 0);
45     this->input_stride_ = input_stride;
46     return *this;
47   }
48 
input_stride()49   inline size_t input_stride() const {
50     if (this->input_stride_ == 0) {
51       return this->channels_;
52     } else {
53       assert(this->input_stride_ >= this->channels_);
54       return this->input_stride_;
55     }
56   }
57 
output_stride(size_t output_stride)58   inline LeakyReLUOperatorTester& output_stride(size_t output_stride) {
59     assert(output_stride != 0);
60     this->output_stride_ = output_stride;
61     return *this;
62   }
63 
output_stride()64   inline size_t output_stride() const {
65     if (this->output_stride_ == 0) {
66       return this->channels_;
67     } else {
68       assert(this->output_stride_ >= this->channels_);
69       return this->output_stride_;
70     }
71   }
72 
batch_size(size_t batch_size)73   inline LeakyReLUOperatorTester& batch_size(size_t batch_size) {
74     assert(batch_size != 0);
75     this->batch_size_ = batch_size;
76     return *this;
77   }
78 
batch_size()79   inline size_t batch_size() const {
80     return this->batch_size_;
81   }
82 
negative_slope(float negative_slope)83   inline LeakyReLUOperatorTester& negative_slope(float negative_slope) {
84     assert(std::isnormal(negative_slope));
85     this->negative_slope_ = negative_slope;
86     return *this;
87   }
88 
negative_slope()89   inline float negative_slope() const {
90     return this->negative_slope_;
91   }
92 
input_scale(float input_scale)93   inline LeakyReLUOperatorTester& input_scale(float input_scale) {
94     assert(input_scale > 0.0f);
95     assert(std::isnormal(input_scale));
96     this->input_scale_ = input_scale;
97     return *this;
98   }
99 
input_scale()100   inline float input_scale() const {
101     return this->input_scale_;
102   }
103 
input_zero_point(int16_t input_zero_point)104   inline LeakyReLUOperatorTester& input_zero_point(int16_t input_zero_point) {
105     this->input_zero_point_ = input_zero_point;
106     return *this;
107   }
108 
input_zero_point()109   inline int16_t input_zero_point() const {
110     return this->input_zero_point_;
111   }
112 
output_scale(float output_scale)113   inline LeakyReLUOperatorTester& output_scale(float output_scale) {
114     assert(output_scale > 0.0f);
115     assert(std::isnormal(output_scale));
116     this->output_scale_ = output_scale;
117     return *this;
118   }
119 
output_scale()120   inline float output_scale() const {
121     return this->output_scale_;
122   }
123 
output_zero_point(int16_t output_zero_point)124   inline LeakyReLUOperatorTester& output_zero_point(int16_t output_zero_point) {
125     this->output_zero_point_ = output_zero_point;
126     return *this;
127   }
128 
output_zero_point()129   inline int16_t output_zero_point() const {
130     return this->output_zero_point_;
131   }
132 
iterations(size_t iterations)133   inline LeakyReLUOperatorTester& iterations(size_t iterations) {
134     this->iterations_ = iterations;
135     return *this;
136   }
137 
iterations()138   inline size_t iterations() const {
139     return this->iterations_;
140   }
141 
TestF16()142   void TestF16() const {
143     std::random_device random_device;
144     auto rng = std::mt19937(random_device());
145     std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
146 
147     std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + (batch_size() - 1) * input_stride() + channels());
148     std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels());
149     std::vector<float> output_ref(batch_size() * channels());
150     for (size_t iteration = 0; iteration < iterations(); iteration++) {
151       std::generate(input.begin(), input.end(), [&]() {
152         return flush_fp16_denormal_to_zero(fp16_ieee_from_fp32_value(f32dist(rng)));
153       });
154       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
155       const uint16_t negative_slope_as_half = fp16_ieee_from_fp32_value(negative_slope());
156       const float negative_slope_as_float = fp16_ieee_to_fp32_value(negative_slope_as_half);
157 
158       // Compute reference results.
159       for (size_t i = 0; i < batch_size(); i++) {
160         for (size_t c = 0; c < channels(); c++) {
161           const float x = fp16_ieee_to_fp32_value(input[i * input_stride() + c]);
162           const float y = std::signbit(x) ? x * negative_slope_as_float : x;
163           output_ref[i * channels() + c] = y;
164         }
165       }
166 
167       // Create, setup, run, and destroy Leaky ReLU operator.
168       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
169       xnn_operator_t leaky_relu_op = nullptr;
170 
171       const xnn_status status = xnn_create_leaky_relu_nc_f16(
172           channels(), input_stride(), output_stride(),
173           negative_slope(),
174           0, &leaky_relu_op);
175       if (status == xnn_status_unsupported_hardware) {
176         GTEST_SKIP();
177       }
178       ASSERT_EQ(xnn_status_success, status);
179       ASSERT_NE(nullptr, leaky_relu_op);
180 
181       // Smart pointer to automatically delete leaky_relu_op.
182       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
183 
184       ASSERT_EQ(xnn_status_success,
185         xnn_setup_leaky_relu_nc_f16(
186           leaky_relu_op,
187           batch_size(),
188           input.data(), output.data(),
189           nullptr /* thread pool */));
190 
191       ASSERT_EQ(xnn_status_success,
192         xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
193 
194       // Verify results.
195       for (size_t i = 0; i < batch_size(); i++) {
196         for (size_t c = 0; c < channels(); c++) {
197           ASSERT_NEAR(
198               fp16_ieee_to_fp32_value(output[i * output_stride() + c]),
199               output_ref[i * channels() + c],
200               std::max(2.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-3f))
201             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
202         }
203       }
204     }
205   }
206 
TestF32()207   void TestF32() const {
208     std::random_device random_device;
209     auto rng = std::mt19937(random_device());
210     std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
211 
212     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (batch_size() - 1) * input_stride() + channels());
213     std::vector<float> output((batch_size() - 1) * output_stride() + channels());
214     std::vector<float> output_ref(batch_size() * channels());
215     for (size_t iteration = 0; iteration < iterations(); iteration++) {
216       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
217       std::fill(output.begin(), output.end(), std::nanf(""));
218 
219       // Compute reference results.
220       for (size_t i = 0; i < batch_size(); i++) {
221         for (size_t c = 0; c < channels(); c++) {
222           const float x = input[i * input_stride() + c];
223           const float y = std::signbit(x) ? x * negative_slope() : x;
224           output_ref[i * channels() + c] = y;
225         }
226       }
227 
228       // Create, setup, run, and destroy Leaky ReLU operator.
229       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
230       xnn_operator_t leaky_relu_op = nullptr;
231 
232       ASSERT_EQ(xnn_status_success,
233         xnn_create_leaky_relu_nc_f32(
234           channels(), input_stride(), output_stride(),
235           negative_slope(),
236           0, &leaky_relu_op));
237       ASSERT_NE(nullptr, leaky_relu_op);
238 
239       // Smart pointer to automatically delete leaky_relu_op.
240       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
241 
242       ASSERT_EQ(xnn_status_success,
243         xnn_setup_leaky_relu_nc_f32(
244           leaky_relu_op,
245           batch_size(),
246           input.data(), output.data(),
247           nullptr /* thread pool */));
248 
249       ASSERT_EQ(xnn_status_success,
250         xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
251 
252       // Verify results.
253       for (size_t i = 0; i < batch_size(); i++) {
254         for (size_t c = 0; c < channels(); c++) {
255           ASSERT_EQ(output[i * output_stride() + c], output_ref[i * channels() + c])
256             << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
257             << ", input " << input[i * input_stride() + c] << ", negative slope " << negative_slope();
258         }
259       }
260     }
261   }
262 
TestQS8()263   void TestQS8() const {
264     ASSERT_GE(input_zero_point(), std::numeric_limits<int8_t>::min());
265     ASSERT_LE(input_zero_point(), std::numeric_limits<int8_t>::max());
266     ASSERT_GE(output_zero_point(), std::numeric_limits<int8_t>::min());
267     ASSERT_LE(output_zero_point(), std::numeric_limits<int8_t>::max());
268 
269     std::random_device random_device;
270     auto rng = std::mt19937(random_device());
271     std::uniform_int_distribution<int32_t> i8dist(
272       std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
273 
274     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + (batch_size() - 1) * input_stride() + channels());
275     std::vector<int8_t> output((batch_size() - 1) * output_stride() + channels());
276     std::vector<float> output_ref(batch_size() * channels());
277     for (size_t iteration = 0; iteration < iterations(); iteration++) {
278       std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
279       std::fill(output.begin(), output.end(), INT8_C(0xA5));
280 
281       // Compute reference results.
282       for (size_t i = 0; i < batch_size(); i++) {
283         for (size_t c = 0; c < channels(); c++) {
284           const float x = input_scale() * (int32_t(input[i * input_stride() + c]) - input_zero_point());
285           float y = (x < 0.0f ? x * negative_slope() : x) / output_scale() + float(output_zero_point());
286           y = std::max<float>(y, std::numeric_limits<int8_t>::min());
287           y = std::min<float>(y, std::numeric_limits<int8_t>::max());
288           output_ref[i * channels() + c] = y;
289         }
290       }
291 
292       // Create, setup, run, and destroy Leaky ReLU operator.
293       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
294       xnn_operator_t leaky_relu_op = nullptr;
295 
296       ASSERT_EQ(xnn_status_success,
297         xnn_create_leaky_relu_nc_qs8(
298           channels(), input_stride(), output_stride(),
299           negative_slope(),
300           input_zero_point(), input_scale(),
301           output_zero_point(), output_scale(),
302           0, &leaky_relu_op));
303       ASSERT_NE(nullptr, leaky_relu_op);
304 
305       // Smart pointer to automatically delete leaky_relu_op.
306       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
307 
308       ASSERT_EQ(xnn_status_success,
309         xnn_setup_leaky_relu_nc_qs8(
310           leaky_relu_op,
311           batch_size(),
312           input.data(), output.data(),
313           nullptr /* thread pool */));
314 
315       ASSERT_EQ(xnn_status_success,
316         xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
317 
318       // Verify results.
319       for (size_t i = 0; i < batch_size(); i++) {
320         for (size_t c = 0; c < channels(); c++) {
321           ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.9f)
322             << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
323             << ", input " << int32_t(input[i * input_stride() + c])
324             << ", input zero point " << input_zero_point() << ", output zero point " << output_zero_point()
325             << ", positive input-to-output ratio " << (input_scale() / output_scale())
326             << ", negative input-to-output ratio " << (input_scale() / output_scale() * negative_slope());
327         }
328       }
329     }
330   }
331 
TestQU8()332   void TestQU8() const {
333     ASSERT_GE(input_zero_point(), std::numeric_limits<uint8_t>::min());
334     ASSERT_LE(input_zero_point(), std::numeric_limits<uint8_t>::max());
335     ASSERT_GE(output_zero_point(), std::numeric_limits<uint8_t>::min());
336     ASSERT_LE(output_zero_point(), std::numeric_limits<uint8_t>::max());
337 
338     std::random_device random_device;
339     auto rng = std::mt19937(random_device());
340     std::uniform_int_distribution<int32_t> u8dist(
341       std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
342 
343     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + (batch_size() - 1) * input_stride() + channels());
344     std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels());
345     std::vector<float> output_ref(batch_size() * channels());
346     for (size_t iteration = 0; iteration < iterations(); iteration++) {
347       std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
348       std::fill(output.begin(), output.end(), UINT8_C(0xA5));
349 
350       // Compute reference results.
351       for (size_t i = 0; i < batch_size(); i++) {
352         for (size_t c = 0; c < channels(); c++) {
353           const float x = input_scale() * (int32_t(input[i * input_stride() + c]) - input_zero_point());
354           float y = (x < 0.0f ? x * negative_slope() : x) / output_scale() + float(output_zero_point());
355           y = std::max<float>(y, std::numeric_limits<uint8_t>::min());
356           y = std::min<float>(y, std::numeric_limits<uint8_t>::max());
357           output_ref[i * channels() + c] = y;
358         }
359       }
360 
361       // Create, setup, run, and destroy Leaky ReLU operator.
362       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
363       xnn_operator_t leaky_relu_op = nullptr;
364 
365       ASSERT_EQ(xnn_status_success,
366         xnn_create_leaky_relu_nc_qu8(
367           channels(), input_stride(), output_stride(),
368           negative_slope(),
369           input_zero_point(), input_scale(),
370           output_zero_point(), output_scale(),
371           0, &leaky_relu_op));
372       ASSERT_NE(nullptr, leaky_relu_op);
373 
374       // Smart pointer to automatically delete leaky_relu_op.
375       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_leaky_relu_op(leaky_relu_op, xnn_delete_operator);
376 
377       ASSERT_EQ(xnn_status_success,
378         xnn_setup_leaky_relu_nc_qu8(
379           leaky_relu_op,
380           batch_size(),
381           input.data(), output.data(),
382           nullptr /* thread pool */));
383 
384       ASSERT_EQ(xnn_status_success,
385         xnn_run_operator(leaky_relu_op, nullptr /* thread pool */));
386 
387       // Verify results.
388       for (size_t i = 0; i < batch_size(); i++) {
389         for (size_t c = 0; c < channels(); c++) {
390           ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.9f)
391             << "at batch " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
392             << ", input " << int32_t(input[i * input_stride() + c])
393             << ", input zero point " << input_zero_point() << ", output zero point " << output_zero_point()
394             << ", positive input-to-output ratio " << (input_scale() / output_scale())
395             << ", negative input-to-output ratio " << (input_scale() / output_scale() * negative_slope());
396         }
397       }
398     }
399   }
400 
401  private:
402   size_t batch_size_{1};
403   size_t channels_{1};
404   size_t input_stride_{0};
405   size_t output_stride_{0};
406   float negative_slope_{0.3f};
407   float output_scale_{0.75f};
408   int16_t output_zero_point_{53};
409   float input_scale_{1.25f};
410   int16_t input_zero_point_{41};
411   size_t iterations_{15};
412 };
413