xref: /aosp_15_r20/external/XNNPACK/test/clamp-operator-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cstddef>
16 #include <cstdlib>
17 #include <limits>
18 #include <random>
19 #include <vector>
20 
21 #include <fp16.h>
22 
23 #include <xnnpack.h>
24 
25 
26 class ClampOperatorTester {
27  public:
channels(size_t channels)28   inline ClampOperatorTester& channels(size_t channels) {
29     assert(channels != 0);
30     this->channels_ = channels;
31     return *this;
32   }
33 
channels()34   inline size_t channels() const {
35     return this->channels_;
36   }
37 
input_stride(size_t input_stride)38   inline ClampOperatorTester& input_stride(size_t input_stride) {
39     assert(input_stride != 0);
40     this->input_stride_ = input_stride;
41     return *this;
42   }
43 
input_stride()44   inline size_t input_stride() const {
45     if (this->input_stride_ == 0) {
46       return this->channels_;
47     } else {
48       assert(this->input_stride_ >= this->channels_);
49       return this->input_stride_;
50     }
51   }
52 
output_stride(size_t output_stride)53   inline ClampOperatorTester& output_stride(size_t output_stride) {
54     assert(output_stride != 0);
55     this->output_stride_ = output_stride;
56     return *this;
57   }
58 
output_stride()59   inline size_t output_stride() const {
60     if (this->output_stride_ == 0) {
61       return this->channels_;
62     } else {
63       assert(this->output_stride_ >= this->channels_);
64       return this->output_stride_;
65     }
66   }
67 
batch_size(size_t batch_size)68   inline ClampOperatorTester& batch_size(size_t batch_size) {
69     assert(batch_size != 0);
70     this->batch_size_ = batch_size;
71     return *this;
72   }
73 
batch_size()74   inline size_t batch_size() const {
75     return this->batch_size_;
76   }
77 
qmin(int16_t qmin)78   inline ClampOperatorTester& qmin(int16_t qmin) {
79     this->qmin_ = qmin;
80     return *this;
81   }
82 
qmin()83   inline int16_t qmin() const {
84     return this->qmin_;
85   }
86 
qmax(int16_t qmax)87   inline ClampOperatorTester& qmax(int16_t qmax) {
88     this->qmax_ = qmax;
89     return *this;
90   }
91 
qmax()92   inline int16_t qmax() const {
93     return this->qmax_;
94   }
95 
relu_activation(bool relu_activation)96   inline ClampOperatorTester& relu_activation(bool relu_activation) {
97     this->relu_activation_ = relu_activation;
98     return *this;
99   }
100 
relu_activation()101   inline bool relu_activation() const {
102     return this->relu_activation_;
103   }
104 
iterations(size_t iterations)105   inline ClampOperatorTester& iterations(size_t iterations) {
106     this->iterations_ = iterations;
107     return *this;
108   }
109 
iterations()110   inline size_t iterations() const {
111     return this->iterations_;
112   }
113 
TestF16()114   void TestF16() const {
115     ASSERT_LT(qmin(), qmax());
116     ASSERT_FALSE(relu_activation());
117 
118     std::random_device random_device;
119     auto rng = std::mt19937(random_device());
120     std::uniform_real_distribution<float> f32dist(
121       std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max());
122 
123     std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +
124       (batch_size() - 1) * input_stride() + channels());
125     std::vector<uint16_t> output((batch_size() - 1) * output_stride() + channels());
126     std::vector<float> output_ref(batch_size() * channels());
127     for (size_t iteration = 0; iteration < iterations(); iteration++) {
128       std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
129       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
130 
131       // Compute reference results.
132       const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(float(qmin())));
133       const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(float(qmax())));
134       for (size_t i = 0; i < batch_size(); i++) {
135         for (size_t c = 0; c < channels(); c++) {
136           const float x = fp16_ieee_to_fp32_value(input[i * input_stride() + c]);
137           const float y = relu_activation() ? std::max(x, 0.f) : std::min(std::max(x, output_min), output_max);
138           output_ref[i * channels() + c] = y;
139         }
140       }
141 
142       // Create, setup, run, and destroy Clamp operator.
143       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
144       xnn_operator_t clamp_op = nullptr;
145 
146       const xnn_status status = xnn_create_clamp_nc_f16(
147         channels(), input_stride(), output_stride(),
148         output_min, output_max,
149         0, &clamp_op);
150       if (status == xnn_status_unsupported_hardware) {
151         GTEST_SKIP();
152       }
153       ASSERT_EQ(xnn_status_success, status);
154       ASSERT_NE(nullptr, clamp_op);
155 
156       // Smart pointer to automatically delete clamp_op.
157       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator);
158 
159       ASSERT_EQ(xnn_status_success,
160         xnn_setup_clamp_nc_f16(
161           clamp_op,
162           batch_size(),
163           input.data(), output.data(),
164           nullptr /* thread pool */));
165 
166       ASSERT_EQ(xnn_status_success,
167         xnn_run_operator(clamp_op, nullptr /* thread pool */));
168 
169       // Verify results.
170       for (size_t i = 0; i < batch_size(); i++) {
171         for (size_t c = 0; c < channels(); c++) {
172           ASSERT_LE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_max)
173             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
174           ASSERT_GE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_min)
175             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
176           ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f))
177             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
178             << ", min " << output_min << ", max " << output_max;
179         }
180       }
181     }
182   }
183 
TestF32()184   void TestF32() const {
185     ASSERT_LT(qmin(), qmax());
186 
187     std::random_device random_device;
188     auto rng = std::mt19937(random_device());
189     std::uniform_real_distribution<float> f32dist(
190       std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max());
191 
192     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
193       (batch_size() - 1) * input_stride() + channels());
194     std::vector<float> output((batch_size() - 1) * output_stride() + channels());
195     std::vector<float> output_ref(batch_size() * channels());
196     for (size_t iteration = 0; iteration < iterations(); iteration++) {
197       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
198       std::fill(output.begin(), output.end(), std::nanf(""));
199 
200       // Compute reference results.
201       for (size_t i = 0; i < batch_size(); i++) {
202         for (size_t c = 0; c < channels(); c++) {
203           const float x = input[i * input_stride() + c];
204           const float y = relu_activation() ? std::max(x, 0.f) :
205             std::min(std::max(x, float(qmin())), float(qmax()));
206           output_ref[i * channels() + c] = y;
207         }
208       }
209 
210       // Create, setup, run, and destroy Clamp operator.
211       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
212       xnn_operator_t clamp_op = nullptr;
213 
214       const float output_min = relu_activation() ? 0.0f : float(qmin());
215       const float output_max = relu_activation() ? std::numeric_limits<float>::infinity() : float(qmax());
216       ASSERT_EQ(xnn_status_success,
217         xnn_create_clamp_nc_f32(
218           channels(), input_stride(), output_stride(),
219           output_min, output_max,
220           0, &clamp_op));
221       ASSERT_NE(nullptr, clamp_op);
222 
223       // Smart pointer to automatically delete clamp_op.
224       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator);
225 
226       ASSERT_EQ(xnn_status_success,
227         xnn_setup_clamp_nc_f32(
228           clamp_op,
229           batch_size(),
230           input.data(), output.data(),
231           nullptr /* thread pool */));
232 
233       ASSERT_EQ(xnn_status_success,
234         xnn_run_operator(clamp_op, nullptr /* thread pool */));
235 
236       // Verify results.
237       for (size_t i = 0; i < batch_size(); i++) {
238         for (size_t c = 0; c < channels(); c++) {
239           ASSERT_LE(output[i * output_stride() + c], output_max)
240             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
241           ASSERT_GE(output[i * output_stride() + c], output_min)
242             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
243           ASSERT_EQ(output_ref[i * channels() + c], output[i * output_stride() + c])
244             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
245             << ", min " << output_min << ", max " << output_max;
246         }
247       }
248     }
249   }
250 
TestS8()251   void TestS8() const {
252     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
253     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
254     ASSERT_LT(qmin(), qmax());
255 
256     std::random_device random_device;
257     auto rng = std::mt19937(random_device());
258     std::uniform_int_distribution<int32_t> i8dist(
259       std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
260 
261     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) +
262       (batch_size() - 1) * input_stride() + channels());
263     std::vector<int8_t> output((batch_size() - 1) * output_stride() + channels());
264     std::vector<int8_t> output_ref(batch_size() * channels());
265     for (size_t iteration = 0; iteration < iterations(); iteration++) {
266       std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
267       std::fill(output.begin(), output.end(), INT8_C(0xA5));
268 
269       // Compute reference results.
270       for (size_t i = 0; i < batch_size(); i++) {
271         for (size_t c = 0; c < channels(); c++) {
272           const int8_t x = input[i * input_stride() + c];
273           const int8_t y = std::min(std::max(x, int8_t(qmin())), int8_t(qmax()));
274           output_ref[i * channels() + c] = y;
275         }
276       }
277 
278       // Create, setup, run, and destroy Clamp operator.
279       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
280       xnn_operator_t clamp_op = nullptr;
281 
282       ASSERT_EQ(xnn_status_success,
283         xnn_create_clamp_nc_s8(
284           channels(), input_stride(), output_stride(),
285           int8_t(qmin()), int8_t(qmax()),
286           0, &clamp_op));
287       ASSERT_NE(nullptr, clamp_op);
288 
289       // Smart pointer to automatically delete clamp_op.
290       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator);
291 
292       ASSERT_EQ(xnn_status_success,
293         xnn_setup_clamp_nc_s8(
294           clamp_op,
295           batch_size(),
296           input.data(), output.data(),
297           nullptr /* thread pool */));
298 
299       ASSERT_EQ(xnn_status_success,
300         xnn_run_operator(clamp_op, nullptr /* thread pool */));
301 
302       // Verify results .
303       for (size_t i = 0; i < batch_size(); i++) {
304         for (size_t c = 0; c < channels(); c++) {
305           ASSERT_LE(int16_t(output[i * output_stride() + c]), qmax())
306             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
307           ASSERT_GE(int16_t(output[i * output_stride() + c]), qmin())
308             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
309           ASSERT_EQ(int16_t(output[i * output_stride() + c]), int16_t(output_ref[i * channels() + c]))
310             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
311             << ", min " << qmin() << ", max " << qmax();
312         }
313       }
314     }
315   }
316 
TestU8()317   void TestU8() const {
318     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
319     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
320     ASSERT_LT(qmin(), qmax());
321 
322     std::random_device random_device;
323     auto rng = std::mt19937(random_device());
324     std::uniform_int_distribution<int32_t> u8dist(
325       std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
326 
327     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
328       (batch_size() - 1) * input_stride() + channels());
329     std::vector<uint8_t> output((batch_size() - 1) * output_stride() + channels());
330     std::vector<uint8_t> output_ref(batch_size() * channels());
331     for (size_t iteration = 0; iteration < iterations(); iteration++) {
332       std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
333       std::fill(output.begin(), output.end(), UINT8_C(0xA5));
334 
335       // Compute reference results.
336       for (size_t i = 0; i < batch_size(); i++) {
337         for (size_t c = 0; c < channels(); c++) {
338           const uint8_t x = input[i * input_stride() + c];
339           const uint8_t y = std::min(std::max(x, uint8_t(qmin())), uint8_t(qmax()));
340           output_ref[i * channels() + c] = y;
341         }
342       }
343 
344       // Create, setup, run, and destroy Clamp operator.
345       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
346       xnn_operator_t clamp_op = nullptr;
347 
348       ASSERT_EQ(xnn_status_success,
349         xnn_create_clamp_nc_u8(
350           channels(), input_stride(), output_stride(),
351           uint8_t(qmin()), uint8_t(qmax()),
352           0, &clamp_op));
353       ASSERT_NE(nullptr, clamp_op);
354 
355       // Smart pointer to automatically delete clamp_op.
356       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_clamp_op(clamp_op, xnn_delete_operator);
357 
358       ASSERT_EQ(xnn_status_success,
359         xnn_setup_clamp_nc_u8(
360           clamp_op,
361           batch_size(),
362           input.data(), output.data(),
363           nullptr /* thread pool */));
364 
365       ASSERT_EQ(xnn_status_success,
366         xnn_run_operator(clamp_op, nullptr /* thread pool */));
367 
368       // Verify results .
369       for (size_t i = 0; i < batch_size(); i++) {
370         for (size_t c = 0; c < channels(); c++) {
371           ASSERT_LE(int16_t(output[i * output_stride() + c]), qmax())
372             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
373           ASSERT_GE(int16_t(output[i * output_stride() + c]), qmin())
374             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels();
375           ASSERT_EQ(int16_t(output[i * output_stride() + c]), int16_t(output_ref[i * channels() + c]))
376             << "at position " << i << " / " << batch_size() << ", channel " << c << " / " << channels()
377             << ", min " << qmin() << ", max " << qmax();
378         }
379       }
380     }
381   }
382 
383  private:
384   size_t batch_size_{1};
385   size_t channels_{1};
386   size_t input_stride_{0};
387   size_t output_stride_{0};
388   int16_t qmin_{std::numeric_limits<int16_t>::min()};
389   int16_t qmax_{std::numeric_limits<int16_t>::max()};
390   bool relu_activation_{false};
391   size_t iterations_{15};
392 };
393