xref: /aosp_15_r20/external/XNNPACK/test/requantization-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cfloat>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdlib>
18 #include <functional>
19 #include <limits>
20 #include <random>
21 #include <vector>
22 
23 #include <xnnpack/requantization-stubs.h>
24 #include <xnnpack/requantization.h>
25 
26 
27 class RequantizationTester {
28  public:
s(uint32_t s)29   inline RequantizationTester& s(uint32_t s) {
30     this->s_ = s;
31     return *this;
32   }
33 
s()34   inline uint32_t s() const {
35     return this->s_;
36   }
37 
scale()38   inline float scale() const {
39     return ldexpf(1.0f, -s());
40   }
41 
zero_point(int32_t zero_point)42   inline RequantizationTester& zero_point(int32_t zero_point) {
43     this->zero_point_ = zero_point;
44     return *this;
45   }
46 
zero_point()47   inline int32_t zero_point() const {
48     return this->zero_point_;
49   }
50 
qmin(int16_t qmin)51   inline RequantizationTester& qmin(int16_t qmin) {
52     this->qmin_ = qmin;
53     return *this;
54   }
55 
qmin()56   inline int16_t qmin() const {
57     return this->qmin_;
58   }
59 
qmax(int16_t qmax)60   inline RequantizationTester& qmax(int16_t qmax) {
61     this->qmax_ = qmax;
62     return *this;
63   }
64 
qmax()65   inline int16_t qmax() const {
66     return this->qmax_;
67   }
68 
iterations(size_t iterations)69   inline RequantizationTester& iterations(size_t iterations) {
70     this->iterations_ = iterations;
71     return *this;
72   }
73 
iterations()74   inline size_t iterations() const {
75     return this->iterations_;
76   }
77 
78   /*
79    * Test that requantization of numbers ((i - zero point) * 2**s) with
80    * - scale = exp2(-s)
81    * - zero point in [0, 255]
82    * - no output clamping
83    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
84    */
TestExactDivideByPO2(xnn_qu8_requantization_function requantize)85   void TestExactDivideByPO2(xnn_qu8_requantization_function requantize) const {
86     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
87     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
88     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
89     ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max());
90     ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min());
91     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
92     ASSERT_LT(qmin(), qmax());
93 
94     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
95     ASSERT_GE(s(), 1);
96     ASSERT_LT(s(), 32);
97 
98     std::vector<int32_t> inputs(256);
99     std::vector<uint8_t> outputs(inputs.size());
100     const int32_t max_i = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zero_point();
101     const int32_t min_i = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zero_point();
102     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
103       const int32_t clamped_i = std::max(min_i, std::min(max_i, i));
104       inputs[i] = int32_t(uint32_t(clamped_i - zero_point()) << s());
105     }
106     requantize(inputs.size(), inputs.data(),
107         scale(), zero_point(), qmin(), qmax(),
108         outputs.data());
109     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
110       const int32_t clamped_i = std::max(min_i, std::min(max_i, i));
111       ASSERT_EQ(uint32_t(clamped_i), uint32_t(outputs[i]))
112         << "i = " << i << ", clamped i = " << clamped_i << ", input = " << inputs[i]
113         << ", min i = " << min_i << ", max i = " << max_i
114         << ", s = " << s() << ", zero point = " << zero_point();
115     }
116   }
117 
118   /*
119    * Test that requantization of numbers ((i - zero point) * 2**s) with
120    * - scale = exp2(-s)
121    * - zero point in [-128, 127]
122    * - no output clamping
123    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
124    */
TestExactDivideByPO2(xnn_qs8_requantization_function requantize)125   void TestExactDivideByPO2(xnn_qs8_requantization_function requantize) const {
126     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
127     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
128     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
129     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
130     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
131     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
132     ASSERT_LT(qmin(), qmax());
133 
134     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
135     ASSERT_GE(s(), 1);
136     ASSERT_LT(s(), 32);
137 
138     std::vector<int32_t> inputs(256);
139     std::vector<int8_t> outputs(inputs.size());
140     const int32_t max_i = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zero_point();
141     const int32_t min_i = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zero_point();
142     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
143       const int32_t clamped_i = std::max(min_i, std::min(max_i, i));
144       inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(uint32_t(clamped_i - zero_point()) << s());
145     }
146     requantize(inputs.size(), inputs.data(),
147         scale(), zero_point(), qmin(), qmax(),
148         outputs.data());
149     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
150       const int32_t clamped_i = std::max(min_i, std::min(max_i, i));
151       ASSERT_EQ(clamped_i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()]))
152         << "i = " << i << ", clamped i = " << clamped_i
153         << ", input = " << inputs[i - std::numeric_limits<int8_t>::min()]
154         << ", min i = " << min_i << ", max i = " << max_i
155         << ", s = " << s() << ", zero point = " << zero_point();
156     }
157   }
158 
159   /*
160    * Test that requantization of numbers ((i - zero point) * 2**s - 2**(s-1) + 1) with
161    * - scale = exp2(-s)
162    * - zero point in [1, 255]
163    * - no output clamping
164    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
165    */
TestDivideByPO2WithRoundingUp(xnn_qu8_requantization_function requantize)166   void TestDivideByPO2WithRoundingUp(xnn_qu8_requantization_function requantize) {
167     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
168     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
169     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
170     ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max());
171     ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min());
172     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
173     ASSERT_LT(qmin(), qmax());
174 
175     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
176     ASSERT_GE(s(), 1);
177     ASSERT_LT(s(), 32);
178 
179     std::vector<int32_t> inputs(256);
180     std::vector<uint8_t> outputs(inputs.size());
181     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
182       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
183         (INT64_C(1) << (s() - 1)) + 1;
184       inputs[i] = int32_t(input);
185     }
186     requantize(inputs.size(), inputs.data(),
187         scale(), zero_point(), qmin(), qmax(),
188         outputs.data());
189     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
190       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
191         (INT64_C(1) << (s() - 1)) + 1;
192       if (int32_t(input) == input) {
193         ASSERT_EQ(i, int32_t(outputs[i]))
194           << "i = " << i << ", input = " << input
195           << ", s = " << s() << ", zero point = " << zero_point();
196       }
197     }
198   }
199 
200   /*
201    * Test that requantization of numbers ((i - zero point) * 2**s - 2**(s-1) + 1) with
202    * - scale = exp2(-s)
203    * - zero point in [-128, 127]
204    * - no output clamping
205    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
206    */
TestDivideByPO2WithRoundingUp(xnn_qs8_requantization_function requantize)207   void TestDivideByPO2WithRoundingUp(xnn_qs8_requantization_function requantize) {
208     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
209     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
210     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
211     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
212     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
213     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
214     ASSERT_LT(qmin(), qmax());
215 
216     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
217     ASSERT_GE(s(), 1);
218     ASSERT_LT(s(), 32);
219 
220     std::vector<int32_t> inputs(256);
221     std::vector<int8_t> outputs(inputs.size());
222     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
223       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
224         (INT64_C(1) << (s() - 1)) + 1;
225       inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input);
226     }
227     requantize(inputs.size(), inputs.data(),
228         scale(), zero_point(), qmin(), qmax(),
229         outputs.data());
230     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
231       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
232         (INT64_C(1) << (s() - 1)) + 1;
233       if (int32_t(input) == input) {
234         ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()]))
235           << "i = " << i << ", input = " << input
236           << ", s = " << s() << ", zero point = " << zero_point();
237       }
238     }
239   }
240 
241   /*
242    * Test that requantization of numbers ((i - zero point) * 2**s + 2**(s-1) - 1) with
243    * - scale = exp2(-s)
244    * - zero point in [1, 255]
245    * - no output clamping
246    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
247    */
TestDivideByPO2WithRoundingDown(xnn_qu8_requantization_function requantize)248   void TestDivideByPO2WithRoundingDown(xnn_qu8_requantization_function requantize) {
249     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
250     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
251     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
252     ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max());
253     ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min());
254     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
255     ASSERT_LT(qmin(), qmax());
256 
257     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
258     ASSERT_GE(s(), 1);
259     ASSERT_LT(s(), 32);
260 
261     std::vector<int32_t> inputs(256);
262     std::vector<uint8_t> outputs(inputs.size());
263     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
264       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
265         (INT64_C(1) << (s() - 1)) - 1;
266       inputs[i] = int32_t(input);
267     }
268     requantize(inputs.size(), inputs.data(),
269         scale(), zero_point(), qmin(), qmax(),
270         outputs.data());
271     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
272       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
273         (INT64_C(1) << (s() - 1)) - 1;
274       if (int32_t(input) == input) {
275         ASSERT_EQ(i, int32_t(outputs[i]))
276           << "i = " << i << ", input = " << input
277           << ", s = " << s() << ", zero point = " << zero_point();
278       }
279     }
280   }
281 
282   /*
283    * Test that requantization of numbers ((i - zero point) * 2**s + 2**(s-1) - 1) with
284    * - scale = exp2(-s)
285    * - zero point in [-128, 127]
286    * - no output clamping
287    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
288    */
TestDivideByPO2WithRoundingDown(xnn_qs8_requantization_function requantize)289   void TestDivideByPO2WithRoundingDown(xnn_qs8_requantization_function requantize) {
290     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
291     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
292     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
293     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
294     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
295     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
296     ASSERT_LT(qmin(), qmax());
297 
298     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
299     ASSERT_GE(s(), 1);
300     ASSERT_LT(s(), 32);
301 
302     std::vector<int32_t> inputs(256);
303     std::vector<int8_t> outputs(inputs.size());
304     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
305       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
306         (INT64_C(1) << (s() - 1)) - 1;
307       inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input);
308     }
309     requantize(inputs.size(), inputs.data(),
310         scale(), zero_point(), qmin(), qmax(),
311         outputs.data());
312     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
313       const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
314         (INT64_C(1) << (s() - 1)) - 1;
315       if (int32_t(input) == input) {
316         ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()]))
317           << "i = " << i << ", input = " << input
318           << ", s = " << s() << ", zero point = " << zero_point();
319       }
320     }
321   }
322 
TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantization_function requantize)323   void TestDivideByPO2WithRoundingTiesAway(xnn_qu8_requantization_function requantize) {
324     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
325     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
326     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
327     ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max());
328     ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min());
329     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
330     ASSERT_LT(qmin(), qmax());
331 
332     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
333     ASSERT_GE(s(), 1);
334     ASSERT_LT(s(), 32);
335 
336     std::vector<int32_t> inputs(256);
337     std::vector<uint8_t> outputs(inputs.size());
338     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
339       int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
340       if (input > 0) {
341         input -= INT64_C(1) << (s() - 1);
342       } else if (input < 0) {
343         input += INT64_C(1) << (s() - 1);
344       }
345       inputs[i] = int32_t(input);
346     }
347     requantize(inputs.size(), inputs.data(),
348         scale(), zero_point(), qmin(), qmax(),
349         outputs.data());
350     for (int32_t i = 0; i <= std::numeric_limits<uint8_t>::max(); i++) {
351       int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
352       if (input > 0) {
353         input -= INT64_C(1) << (s() - 1);
354       } else if (input < 0) {
355         input += INT64_C(1) << (s() - 1);
356       }
357       if (int32_t(input) == input) {
358         ASSERT_EQ(i, int32_t(outputs[i]))
359           << "i = " << i << ", input = " << input
360           << ", s = " << s() << ", zero point = " << zero_point();
361       }
362     }
363   }
364 
TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantization_function requantize)365   void TestDivideByPO2WithRoundingTiesAway(xnn_qs8_requantization_function requantize) {
366     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
367     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
368     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
369     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
370     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
371     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
372     ASSERT_LT(qmin(), qmax());
373 
374     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
375     ASSERT_GE(s(), 1);
376     ASSERT_LT(s(), 32);
377 
378     std::vector<int32_t> inputs(256);
379     std::vector<int8_t> outputs(inputs.size());
380     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
381       int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
382       if (input > 0) {
383         input -= INT64_C(1) << (s() - 1);
384       } else if (input < 0) {
385         input += INT64_C(1) << (s() - 1);
386       }
387       inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input);
388     }
389     requantize(inputs.size(), inputs.data(),
390         scale(), zero_point(), qmin(), qmax(),
391         outputs.data());
392     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
393       int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
394       if (input > 0) {
395         input -= INT64_C(1) << (s() - 1);
396       } else if (input < 0) {
397         input += INT64_C(1) << (s() - 1);
398       }
399       if (int32_t(input) == input) {
400         ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()]))
401           << "i = " << i << ", input = " << input
402           << ", s = " << s() << ", zero point = " << zero_point();
403       }
404     }
405   }
406 
TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantization_function requantize)407   void TestDivideByPO2WithRoundingTiesUp(xnn_qs8_requantization_function requantize) {
408     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
409     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
410     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
411     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
412     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
413     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
414     ASSERT_LT(qmin(), qmax());
415 
416     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
417     ASSERT_GE(s(), 1);
418     ASSERT_LT(s(), 32);
419 
420     std::vector<int32_t> inputs(256);
421     std::vector<int8_t> outputs(inputs.size());
422     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
423       int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
424       input -= INT64_C(1) << (s() - 1);
425       inputs[i - std::numeric_limits<int8_t>::min()] = int32_t(input);
426     }
427     requantize(inputs.size(), inputs.data(),
428         scale(), zero_point(), qmin(), qmax(),
429         outputs.data());
430     for (int32_t i = std::numeric_limits<int8_t>::min(); i <= std::numeric_limits<int8_t>::max(); i++) {
431       int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
432       input -= INT64_C(1) << (s() - 1);
433       if (int32_t(input) == input) {
434         ASSERT_EQ(i, int32_t(outputs[i - std::numeric_limits<int8_t>::min()]))
435           << "i = " << i << ", input = " << input
436           << ", s = " << s() << ", zero point = " << zero_point();
437       }
438     }
439   }
440 
TestSpecialCases(xnn_qu8_requantization_function requantize)441   void TestSpecialCases(xnn_qu8_requantization_function requantize) {
442     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
443     ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max());
444     ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min());
445     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
446     ASSERT_LT(qmin(), qmax());
447 
448     std::vector<int32_t> inputs(256);
449     std::vector<uint8_t> outputs(inputs.size());
450 
451     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min());
452     for (int32_t zero_point = 0; zero_point <= std::numeric_limits<uint8_t>::max(); zero_point++) {
453       requantize(
454           inputs.size(),
455           inputs.data(),
456           ldexpf(1.0f, -32) /* scale */,
457           zero_point /* zero point */,
458           std::numeric_limits<uint8_t>::min(),
459           std::numeric_limits<uint8_t>::max(),
460           outputs.data());
461       for (size_t i = 0; i < outputs.size(); i++) {
462         ASSERT_EQ(std::max(int32_t(int32_t(std::numeric_limits<uint8_t>::min())), zero_point - 1), int32_t(outputs[i]));
463       }
464     }
465 
466     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max());
467     requantize(
468         inputs.size(),
469         inputs.data(),
470         0x1.FFFFFEp-1f /* scale */,
471         std::numeric_limits<uint8_t>::max() /* zero point */,
472         std::numeric_limits<uint8_t>::min(),
473         std::numeric_limits<uint8_t>::max(),
474         outputs.data());
475     for (size_t i = 0; i < outputs.size(); i++) {
476       ASSERT_EQ(std::numeric_limits<uint8_t>::max(), int32_t(outputs[i]));
477     }
478   }
479 
TestSpecialCases(xnn_qs8_requantization_function requantize)480   void TestSpecialCases(xnn_qs8_requantization_function requantize) {
481     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
482     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
483     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
484     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
485     ASSERT_LT(qmin(), qmax());
486 
487     std::vector<int32_t> inputs(256);
488     std::vector<int8_t> outputs(inputs.size());
489 
490     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min());
491     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
492          zero_point <= std::numeric_limits<int8_t>::max();
493          zero_point++)
494     {
495       requantize(
496           inputs.size(),
497           inputs.data(),
498           ldexpf(1.0f, -32) /* scale */,
499           zero_point,
500           std::numeric_limits<int8_t>::min(),
501           std::numeric_limits<int8_t>::max(),
502           outputs.data());
503       for (size_t i = 0; i < outputs.size(); i++) {
504         ASSERT_EQ(std::max(int32_t(std::numeric_limits<int8_t>::min()), zero_point - 1), int32_t(outputs[i]));
505       }
506     }
507 
508     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max());
509     requantize(
510         inputs.size(),
511         inputs.data(),
512         0x1.FFFFFEp-1f /* scale */,
513         std::numeric_limits<int8_t>::max() /* zero point */,
514         std::numeric_limits<int8_t>::min(),
515         std::numeric_limits<int8_t>::max(),
516         outputs.data());
517     for (size_t i = 0; i < outputs.size(); i++) {
518       ASSERT_EQ(std::numeric_limits<int8_t>::max(), int32_t(outputs[i]));
519     }
520   }
521 
TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantization_function requantize)522   void TestRandomCasesRoundToNearestTiesAway(xnn_qu8_requantization_function requantize) {
523     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
524     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
525     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
526     ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max());
527     ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min());
528     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
529     ASSERT_LT(qmin(), qmax());
530 
531     std::random_device random_device;
532     std::mt19937 rng(random_device());
533     for (size_t iteration = 0; iteration < iterations(); iteration++) {
534       auto u8rng =
535         std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
536 
537       std::vector<int32_t> inputs(4096);
538       std::vector<uint8_t> outputs(inputs.size());
539 
540       std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
541       const float scale = scale_distribution(rng);
542       for (size_t i = 0; i < inputs.size(); i++) {
543         const uint8_t approximate_output = std::min(std::max(uint8_t(u8rng()), uint8_t(qmin())), uint8_t(qmax()));
544         const int32_t input = int32_t(double(approximate_output) / double(scale));
545         inputs[i] = input;
546       }
547 
548       requantize(
549         inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(),
550         outputs.data());
551 
552       /* Ensure that outputs are not all identical, as in this case the test doesn't validate much */
553       ASSERT_NE(
554         *std::max_element(outputs.cbegin(), outputs.cend()),
555         *std::min_element(outputs.cbegin(), outputs.cend()));
556 
557       for (size_t i = 0; i < inputs.size(); i++) {
558         const uint8_t reference_output = xnn_qu8_requantize_rndna(
559           inputs[i], scale, zero_point(), qmin(), qmax());
560         ASSERT_EQ(uint32_t(reference_output), uint32_t(outputs[i]));
561       }
562     }
563   }
564 
TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantization_function requantize)565   void TestRandomCasesRoundToNearestTiesAway(xnn_qs8_requantization_function requantize) {
566     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
567     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
568     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
569     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
570     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
571     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
572     ASSERT_LT(qmin(), qmax());
573 
574     std::random_device random_device;
575     std::mt19937 rng(random_device());
576     for (size_t iteration = 0; iteration < iterations(); iteration++) {
577       auto i8rng = std::bind(
578         std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
579 
580       std::vector<int32_t> inputs(4096);
581       std::vector<int8_t> outputs(inputs.size());
582 
583       std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
584       const float scale = scale_distribution(rng);
585       for (size_t i = 0; i < inputs.size(); i++) {
586         const int8_t approximate_output = std::min(std::max(int8_t(i8rng()), int8_t(qmin())), int8_t(qmax()));
587         const int32_t input = int32_t(double(approximate_output) / double(scale));
588         inputs[i] = input;
589       }
590 
591       requantize(
592         inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(),
593         outputs.data());
594 
595       /* Ensure that outputs are not all identical, as in this case the test doesn't validate much */
596       ASSERT_NE(
597         *std::max_element(outputs.cbegin(), outputs.cend()),
598         *std::min_element(outputs.cbegin(), outputs.cend()));
599 
600       for (size_t i = 0; i < inputs.size(); i++) {
601         const int8_t reference_output = xnn_qs8_requantize_rndna(
602           inputs[i], scale, zero_point(), qmin(), qmax());
603         ASSERT_EQ(int32_t(reference_output), int32_t(outputs[i]));
604       }
605     }
606   }
607 
TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantization_function requantize)608   void TestRandomCasesRoundToNearestTiesUp(xnn_qs8_requantization_function requantize) {
609     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
610     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
611     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
612     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
613     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
614     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
615     ASSERT_LT(qmin(), qmax());
616 
617     std::random_device random_device;
618     std::mt19937 rng(random_device());
619     for (size_t iteration = 0; iteration < iterations(); iteration++) {
620       auto i8rng = std::bind(
621         std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
622 
623       std::vector<int32_t> inputs(4096);
624       std::vector<int8_t> outputs(inputs.size());
625 
626       std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
627       const float scale = scale_distribution(rng);
628       for (size_t i = 0; i < inputs.size(); i++) {
629         const int8_t approximate_output = std::min(std::max(int8_t(i8rng()), int8_t(qmin())), int8_t(qmax()));
630         const int32_t input = int32_t(double(approximate_output) / double(scale));
631         inputs[i] = input;
632       }
633 
634       requantize(
635         inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(),
636         outputs.data());
637 
638       /* Ensure that outputs are not all identical, as in this case the test doesn't validate much */
639       ASSERT_NE(
640         *std::max_element(outputs.cbegin(), outputs.cend()),
641         *std::min_element(outputs.cbegin(), outputs.cend()));
642 
643       for (size_t i = 0; i < inputs.size(); i++) {
644         const int8_t reference_output = xnn_qs8_requantize_rndnu(
645           inputs[i], scale, zero_point(), qmin(), qmax());
646         ASSERT_EQ(int32_t(reference_output), int32_t(outputs[i]));
647       }
648     }
649   }
650 
TestRandomCasesApproximate(xnn_qu8_requantization_function requantize)651   void TestRandomCasesApproximate(xnn_qu8_requantization_function requantize) {
652     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
653     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
654     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
655     ASSERT_LE(qmin(), std::numeric_limits<uint8_t>::max());
656     ASSERT_GE(qmax(), std::numeric_limits<uint8_t>::min());
657     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
658     ASSERT_LT(qmin(), qmax());
659 
660     std::random_device random_device;
661     std::mt19937 rng(random_device());
662     for (size_t iteration = 0; iteration < iterations(); iteration++) {
663       auto u8rng =
664         std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
665 
666       std::vector<int32_t> inputs(4096);
667       std::vector<uint8_t> outputs(inputs.size());
668 
669       std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
670       const float scale = scale_distribution(rng);
671       for (size_t i = 0; i < inputs.size(); i++) {
672         const uint8_t approximate_output = std::min(std::max(uint8_t(u8rng()), uint8_t(qmin())), uint8_t(qmax()));
673         const int32_t input = int32_t(double(approximate_output) / double(scale));
674         inputs[i] = input;
675       }
676 
677       requantize(
678         inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(),
679         outputs.data());
680 
681       /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
682       ASSERT_NE(
683         *std::max_element(outputs.cbegin(), outputs.cend()),
684         *std::min_element(outputs.cbegin(), outputs.cend()));
685 
686       for (size_t i = 0; i < inputs.size(); i++) {
687         const double reference_output = RequantizationTester::RequantizeApproximate(
688           inputs[i], scale, uint8_t(zero_point()), uint8_t(qmin()), uint8_t(qmax()));
689         ASSERT_LE(std::abs(reference_output - double(outputs[i])), 0.55)
690           << "input = " << inputs[i] << ", output = " << int32_t(outputs[i])
691           << ", reference output = " << reference_output;
692       }
693     }
694   }
695 
TestRandomCasesApproximate(xnn_qs8_requantization_function requantize)696   void TestRandomCasesApproximate(xnn_qs8_requantization_function requantize) {
697     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
698     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
699     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
700     ASSERT_LE(qmin(), std::numeric_limits<int8_t>::max());
701     ASSERT_GE(qmax(), std::numeric_limits<int8_t>::min());
702     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
703     ASSERT_LT(qmin(), qmax());
704 
705     std::random_device random_device;
706     std::mt19937 rng(random_device());
707     for (size_t iteration = 0; iteration < iterations(); iteration++) {
708       auto i8rng = std::bind(
709         std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), std::ref(rng));
710 
711       std::vector<int32_t> inputs(4096);
712       std::vector<int8_t> outputs(inputs.size());
713 
714       std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
715       const float scale = scale_distribution(rng);
716       for (size_t i = 0; i < inputs.size(); i++) {
717         const int8_t approximate_output = std::min(std::max(int8_t(i8rng()), int8_t(qmin())), int8_t(qmax()));
718         const int32_t input = int32_t(double(approximate_output) / double(scale));
719         inputs[i] = input;
720       }
721 
722       requantize(
723         inputs.size(), inputs.data(), scale, zero_point(), qmin(), qmax(),
724         outputs.data());
725 
726       /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
727       ASSERT_NE(
728         *std::max_element(outputs.cbegin(), outputs.cend()),
729         *std::min_element(outputs.cbegin(), outputs.cend()));
730 
731       for (size_t i = 0; i < inputs.size(); i++) {
732         const double reference_output = RequantizationTester::RequantizeApproximate(
733           inputs[i], scale, int8_t(zero_point()), int8_t(qmin()), int8_t(qmax()));
734         ASSERT_LE(std::abs(reference_output - double(outputs[i])), 0.55)
735           << "input = " << inputs[i] << ", output = " << int32_t(outputs[i])
736           << ", reference output = " << reference_output;
737       }
738     }
739   }
740 
ShiftLeft(int64_t w,uint32_t n)741   static inline int64_t ShiftLeft(int64_t w, uint32_t n) {
742     return (int64_t) ((uint64_t) w << n);
743   }
744 
RequantizeApproximate(int32_t value,float scale,uint8_t zero_point,uint8_t qmin,uint8_t qmax)745   static inline double RequantizeApproximate(
746     int32_t value,
747     float scale,
748     uint8_t zero_point,
749     uint8_t qmin,
750     uint8_t qmax)
751   {
752     assert(scale < 1.0f);
753     assert(scale >= 0x1.0p-32f);
754 
755     return std::min(std::max(double(value) * double(scale) + double(zero_point), double(qmin)), double(qmax));
756   }
757 
RequantizeApproximate(int32_t value,float scale,int8_t zero_point,int8_t qmin,int8_t qmax)758   static inline double RequantizeApproximate(
759     int32_t value,
760     float scale,
761     int8_t zero_point,
762     int8_t qmin,
763     int8_t qmax)
764   {
765     assert(scale < 1.0f);
766     assert(scale >= 0x1.0p-32f);
767 
768     return std::min(std::max(double(value) * double(scale) + double(zero_point), double(qmin)), double(qmax));
769   }
770 
771  private:
772   uint32_t s_{1};
773   int32_t zero_point_{0};
774   int16_t qmin_{std::numeric_limits<int16_t>::min()};
775   int16_t qmax_{std::numeric_limits<int16_t>::max()};
776   size_t iterations_{1};
777 };
778