xref: /aosp_15_r20/external/XNNPACK/test/gavgpool-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdlib>
18 #include <limits>
19 #include <random>
20 #include <vector>
21 
22 #include <fp16.h>
23 
24 #include <xnnpack.h>
25 #include <xnnpack/aligned-allocator.h>
26 #include <xnnpack/microfnptr.h>
27 #include <xnnpack/microparams-init.h>
28 #include <xnnpack/requantization.h>
29 
30 
31 class GAvgPoolMicrokernelTester {
32  public:
rows(size_t rows)33   inline GAvgPoolMicrokernelTester& rows(size_t rows) {
34     assert(rows != 0);
35     this->rows_ = rows;
36     return *this;
37   }
38 
rows()39   inline size_t rows() const {
40     return this->rows_;
41   }
42 
channels(size_t channels)43   inline GAvgPoolMicrokernelTester& channels(size_t channels) {
44     assert(channels != 0);
45     this->channels_ = channels;
46     return *this;
47   }
48 
channels()49   inline size_t channels() const {
50     return this->channels_;
51   }
52 
channel_tile(size_t channel_tile)53   inline GAvgPoolMicrokernelTester& channel_tile(size_t channel_tile) {
54     assert(channel_tile != 0);
55     this->channel_tile_ = channel_tile;
56     return *this;
57   }
58 
channel_tile()59   inline size_t channel_tile() const {
60     return this->channel_tile_;
61   }
62 
input_stride(size_t input_stride)63   inline GAvgPoolMicrokernelTester& input_stride(size_t input_stride) {
64     assert(input_stride != 0);
65     this->input_stride_ = input_stride;
66     return *this;
67   }
68 
input_stride()69   inline size_t input_stride() const {
70     if (this->input_stride_ == 0) {
71       return channels();
72     } else {
73       assert(this->input_stride_ >= channels());
74       return this->input_stride_;
75     }
76   }
77 
input_scale(float input_scale)78   inline GAvgPoolMicrokernelTester& input_scale(float input_scale) {
79     assert(input_scale > 0.0f);
80     assert(std::isnormal(input_scale));
81     this->input_scale_ = input_scale;
82     return *this;
83   }
84 
input_scale()85   inline float input_scale() const {
86     return this->input_scale_;
87   }
88 
input_zero_point(uint8_t input_zero_point)89   inline GAvgPoolMicrokernelTester& input_zero_point(uint8_t input_zero_point) {
90     this->input_zero_point_ = input_zero_point;
91     return *this;
92   }
93 
input_zero_point()94   inline uint8_t input_zero_point() const {
95     return this->input_zero_point_;
96   }
97 
output_scale(float output_scale)98   inline GAvgPoolMicrokernelTester& output_scale(float output_scale) {
99     assert(output_scale > 0.0f);
100     assert(std::isnormal(output_scale));
101     this->output_scale_ = output_scale;
102     return *this;
103   }
104 
output_scale()105   inline float output_scale() const {
106     return this->output_scale_;
107   }
108 
output_zero_point(uint8_t output_zero_point)109   inline GAvgPoolMicrokernelTester& output_zero_point(uint8_t output_zero_point) {
110     this->output_zero_point_ = output_zero_point;
111     return *this;
112   }
113 
output_zero_point()114   inline uint8_t output_zero_point() const {
115     return this->output_zero_point_;
116   }
117 
qmin(uint8_t qmin)118   inline GAvgPoolMicrokernelTester& qmin(uint8_t qmin) {
119     this->qmin_ = qmin;
120     return *this;
121   }
122 
qmin()123   inline uint8_t qmin() const {
124     return this->qmin_;
125   }
126 
qmax(uint8_t qmax)127   inline GAvgPoolMicrokernelTester& qmax(uint8_t qmax) {
128     this->qmax_ = qmax;
129     return *this;
130   }
131 
qmax()132   inline uint8_t qmax() const {
133     return this->qmax_;
134   }
135 
iterations(size_t iterations)136   inline GAvgPoolMicrokernelTester& iterations(size_t iterations) {
137     this->iterations_ = iterations;
138     return *this;
139   }
140 
iterations()141   inline size_t iterations() const {
142     return this->iterations_;
143   }
144 
Test(xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)145   void Test(
146       xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,
147       xnn_init_qu8_avgpool_minmax_params_fn init_params,
148       xnn_qu8_requantize_fn requantize) const
149   {
150     std::random_device random_device;
151     auto rng = std::mt19937(random_device());
152     std::uniform_int_distribution<int32_t> u8dist(
153       std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
154 
155     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
156       (rows() - 1) * input_stride() + channels());
157     std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
158     std::vector<uint8_t> output(channels());
159     std::vector<uint8_t> output_ref(channels());
160     std::vector<float> output_fp(channels());
161     std::vector<int32_t> accumulators(channels());
162     for (size_t iteration = 0; iteration < iterations(); iteration++) {
163       std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
164       std::fill(output.begin(), output.end(), UINT8_C(0xA5));
165 
166       // Prepare parameters.
167       union xnn_qu8_avgpool_minmax_params params;
168       init_params(
169         &params,
170         -int32_t(input_zero_point()) * int32_t(rows()),
171         input_scale() / (output_scale() * float(rows())),
172         output_zero_point(), qmin(), qmax());
173 
174       // Compute reference results.
175       for (size_t c = 0; c < channels(); c++) {
176         int32_t acc = 0;
177         for (size_t n = 0; n < rows(); n++) {
178           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point());
179         }
180         accumulators[c] = acc;
181         output_ref[c] = requantize(
182           acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax());
183         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point());
184         output_fp[c] = std::min<float>(output_fp[c], float(qmax()));
185         output_fp[c] = std::max<float>(output_fp[c], float(qmin()));
186       }
187 
188       // Call optimized micro-kernel.
189       gavgpool_minmax(rows(), channels(),
190         input.data(), input_stride() * sizeof(uint8_t),
191         zero.data(),
192         output.data(),
193         &params);
194 
195       // Verify results.
196       for (size_t c = 0; c < channels(); c++) {
197         ASSERT_LE(uint32_t(output[c]), uint32_t(qmax()))
198           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
199         ASSERT_GE(uint32_t(output[c]), uint32_t(qmin()))
200           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
201         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f)
202           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
203           << ", acc = " << accumulators[c];
204         ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c]))
205           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
206           << ", acc = " << accumulators[c];
207       }
208     }
209   }
210 
Test(xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)211   void Test(
212       xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,
213       xnn_init_qu8_avgpool_minmax_params_fn init_params,
214       xnn_qu8_requantize_fn requantize) const
215   {
216     std::random_device random_device;
217     auto rng = std::mt19937(random_device());
218     std::uniform_int_distribution<int32_t> u8dist(
219       std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
220 
221     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
222       (rows() - 1) * input_stride() + channels());
223     std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
224     std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
225     std::vector<uint8_t> output(channels());
226     std::vector<uint8_t> output_ref(channels());
227     std::vector<float> output_fp(channels());
228     std::vector<int32_t> accumulators(channels());
229     for (size_t iteration = 0; iteration < iterations(); iteration++) {
230       std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
231       std::fill(output.begin(), output.end(), UINT8_C(0xA5));
232 
233       // Prepare parameters.
234       union xnn_qu8_avgpool_minmax_params params;
235       init_params(
236         &params,
237         -int32_t(input_zero_point()) * int32_t(rows()),
238         input_scale() / (output_scale() * float(rows())),
239         output_zero_point(), qmin(), qmax());
240 
241       // Compute reference results.
242       for (size_t c = 0; c < channels(); c++) {
243         int32_t acc = 0;
244         for (size_t n = 0; n < rows(); n++) {
245           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point());
246         }
247 
248         accumulators[c] = acc;
249         output_ref[c] = requantize(
250           acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax());
251         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point());
252         output_fp[c] = std::min<float>(output_fp[c], float(qmax()));
253         output_fp[c] = std::max<float>(output_fp[c], float(qmin()));
254       }
255 
256       // Call optimized micro-kernel.
257       gavgpool_minmax(rows(), channels(),
258         input.data(), input_stride() * sizeof(uint8_t),
259         zero.data(),
260         buffer.data(),
261         output.data(),
262         &params);
263 
264       // Verify results.
265       for (size_t c = 0; c < channels(); c++) {
266         ASSERT_LE(uint32_t(output[c]), uint32_t(qmax()))
267           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
268         ASSERT_GE(uint32_t(output[c]), uint32_t(qmin()))
269           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
270         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f)
271           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
272           << ", acc = " << accumulators[c];
273         ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c]))
274           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
275           << ", acc = " << accumulators[c];
276       }
277     }
278   }
279 
Test(xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)280   void Test(
281       xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,
282       xnn_init_qs8_avgpool_minmax_params_fn init_params,
283       xnn_qs8_requantize_fn requantize) const
284   {
285     std::random_device random_device;
286     auto rng = std::mt19937(random_device());
287     std::uniform_int_distribution<int32_t> i8dist(
288       std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
289 
290     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) +
291       (rows() - 1) * input_stride() + channels());
292     std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
293     std::vector<int8_t> output(channels());
294     std::vector<int8_t> output_ref(channels());
295     std::vector<float> output_fp(channels());
296     std::vector<int32_t> accumulators(channels());
297     for (size_t iteration = 0; iteration < iterations(); iteration++) {
298       std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
299       std::fill(output.begin(), output.end(), INT8_C(0xA5));
300 
301       // Prepare parameters.
302       union xnn_qs8_avgpool_minmax_params params;
303       init_params(
304         &params,
305         -int32_t(input_zero_point() - 0x80) * int32_t(rows()),
306         input_scale() / (output_scale() * float(rows())),
307         int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
308 
309       // Compute reference results.
310       for (size_t c = 0; c < channels(); c++) {
311         int32_t acc = 0;
312         for (size_t n = 0; n < rows(); n++) {
313           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80);
314         }
315         accumulators[c] = acc;
316         output_ref[c] = requantize(
317           acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
318         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80);
319         output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80));
320         output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80));
321       }
322 
323       // Call optimized micro-kernel.
324       gavgpool_minmax(rows(), channels(),
325         input.data(), input_stride() * sizeof(int8_t),
326         zero.data(),
327         output.data(),
328         &params);
329 
330       // Verify results.
331       for (size_t c = 0; c < channels(); c++) {
332         ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80))
333           << "at channel " << c << " / " << channels() << ", rows = " << rows();
334         ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80))
335           << "at channel " << c << " / " << channels() << ", rows = " << rows();
336         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f)
337           << "at channel " << c << " / " << channels() << ", rows = " << rows()
338           << ", accumulator = " << accumulators[c];
339         ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c]))
340           << "at channel " << c << " / " << channels() << ", rows = " << rows()
341           << ", accumulator = " << accumulators[c];
342       }
343     }
344   }
345 
Test(xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)346   void Test(
347       xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,
348       xnn_init_qs8_avgpool_minmax_params_fn init_params,
349       xnn_qs8_requantize_fn requantize) const
350   {
351     std::random_device random_device;
352     auto rng = std::mt19937(random_device());
353     std::uniform_int_distribution<int32_t> i8dist(
354       std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
355 
356     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) +
357       (rows() - 1) * input_stride() + channels());
358     std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
359     std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
360     std::vector<int8_t> output(channels());
361     std::vector<int8_t> output_ref(channels());
362     std::vector<float> output_fp(channels());
363     std::vector<int32_t> accumulators(channels());
364     for (size_t iteration = 0; iteration < iterations(); iteration++) {
365       std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
366       std::fill(output.begin(), output.end(), INT8_C(0xA5));
367 
368       // Prepare parameters.
369       union xnn_qs8_avgpool_minmax_params params;
370       init_params(
371         &params,
372         -int32_t(input_zero_point() - 0x80) * int32_t(rows()),
373         input_scale() / (output_scale() * float(rows())),
374         int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
375 
376       // Compute reference results.
377       for (size_t c = 0; c < channels(); c++) {
378         int32_t acc = 0;
379         for (size_t n = 0; n < rows(); n++) {
380           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80);
381         }
382         accumulators[c] = acc;
383         output_ref[c] = requantize(
384           acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
385         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80);
386         output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80));
387         output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80));
388       }
389 
390       // Call optimized micro-kernel.
391       gavgpool_minmax(rows(), channels(),
392         input.data(), input_stride() * sizeof(int8_t),
393         zero.data(),
394         buffer.data(),
395         output.data(),
396         &params);
397 
398       // Verify results.
399       for (size_t c = 0; c < channels(); c++) {
400         ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80))
401           << "at channel " << c << " / " << channels() << ", rows = " << rows();
402         ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80))
403           << "at channel " << c << " / " << channels() << ", rows = " << rows();
404         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.55f)
405           << "at channel " << c << " / " << channels() << ", rows = " << rows()
406           << ", accumulator = " << accumulators[c];
407         ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c]))
408           << "at channel " << c << " / " << channels() << ", rows = " << rows()
409           << ", accumulator = " << accumulators[c];
410       }
411     }
412   }
413 
Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)414   void Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const {
415     std::random_device random_device;
416     auto rng = std::mt19937(random_device());
417     std::uniform_real_distribution<float> f32dist;
418 
419     std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
420     std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
421     std::vector<uint16_t> output(channels());
422     std::vector<float> output_ref(channels());
423 
424     std::fill(zero.begin(), zero.end(), 0);
425     for (size_t iteration = 0; iteration < iterations(); iteration++) {
426       std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
427       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
428 
429       // Compute reference results, without clamping.
430       for (size_t c = 0; c < channels(); c++) {
431         float acc = 0.0f;
432         for (size_t n = 0; n < rows(); n++) {
433           acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]);
434         }
435         output_ref[c] = acc / float(rows());
436       }
437 
438       // Compute clamping parameters.
439       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
440       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
441       const float accumulated_range = accumulated_max - accumulated_min;
442       const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range));
443       const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range));
444 
445       // Clamp reference results.
446       for (float& output_values : output_ref) {
447         output_values = std::max(std::min(output_values, output_max), output_min);
448       }
449 
450       // Prepare parameters.
451       xnn_f16_scaleminmax_params params;
452       init_params(&params,
453         fp16_ieee_from_fp32_value(1.0f / float(rows())),
454         fp16_ieee_from_fp32_value(output_min),
455         fp16_ieee_from_fp32_value(output_max));
456 
457       // Call optimized micro-kernel.
458       gavgpool_minmax(rows(), channels(),
459         input.data(), input_stride() * sizeof(uint16_t),
460         zero.data(),
461         output.data(),
462         &params);
463 
464       // Verify results.
465       for (size_t c = 0; c < channels(); c++) {
466         ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max)
467           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
468         ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min)
469           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
470         ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::max(1.0e-4f, std::abs(output_ref[c]) * 1.0e-2f))
471           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
472       }
473     }
474   }
475 
Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)476   void Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const {
477     std::random_device random_device;
478     auto rng = std::mt19937(random_device());
479     std::uniform_real_distribution<float> f32dist;
480 
481     std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
482     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
483     std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
484     std::vector<uint16_t> output(channels());
485     std::vector<float> output_ref(channels());
486     for (size_t iteration = 0; iteration < iterations(); iteration++) {
487       std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
488       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
489 
490       // Compute reference results, without clamping.
491       for (size_t c = 0; c < channels(); c++) {
492         float acc = 0.0f;
493         for (size_t n = 0; n < rows(); n++) {
494           acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]);
495         }
496         output_ref[c] = acc / float(rows());
497       }
498 
499       // Compute clamping parameters.
500       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
501       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
502       const float accumulated_range = accumulated_max - accumulated_min;
503       const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range));
504       const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range));
505 
506       // Prepare parameters.
507       xnn_f16_scaleminmax_params params;
508       init_params(&params,
509         fp16_ieee_from_fp32_value(1.0f / float(rows())),
510         fp16_ieee_from_fp32_value(output_min),
511         fp16_ieee_from_fp32_value(output_max));
512 
513       // Clamp reference results.
514       for (float& output_values : output_ref) {
515         output_values = std::max(std::min(output_values, output_max), output_min);
516       }
517 
518       // Call optimized micro-kernel.
519       gavgpool_minmax(rows(), channels(),
520         input.data(), input_stride() * sizeof(uint16_t),
521         zero.data(),
522         buffer.data(),
523         output.data(),
524         &params);
525 
526       // Verify results.
527       for (size_t c = 0; c < channels(); c++) {
528         ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max)
529           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
530         ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min)
531           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
532         ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::abs(output_ref[c]) * 1.0e-0f)
533           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
534       }
535     }
536   }
537 
Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)538   void Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const {
539     std::random_device random_device;
540     auto rng = std::mt19937(random_device());
541     std::uniform_real_distribution<float> f32dist;
542 
543     std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
544     std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float));
545     std::vector<float> output(channels());
546     std::vector<float> output_ref(channels());
547 
548     std::fill(zero.begin(), zero.end(), 0.0f);
549     for (size_t iteration = 0; iteration < iterations(); iteration++) {
550       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
551       std::fill(output.begin(), output.end(), std::nanf(""));
552 
553       // Compute reference results, without clamping.
554       for (size_t c = 0; c < channels(); c++) {
555         float acc = 0.0f;
556         for (size_t n = 0; n < rows(); n++) {
557           acc += input[n * input_stride() + c];
558         }
559         output_ref[c] = acc / float(rows());
560       }
561 
562       // Compute clamping parameters.
563       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
564       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
565       const float accumulated_range = accumulated_max - accumulated_min;
566       const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range;
567       const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range;
568 
569       // Clamp reference results.
570       for (float& output_values : output_ref) {
571         output_values = std::max(std::min(output_values, output_max), output_min);
572       }
573 
574       // Prepare parameters.
575       union xnn_f32_scaleminmax_params params;
576       init_params(&params, 1.0f / float(rows()), output_min, output_max);
577 
578       // Call optimized micro-kernel.
579       gavgpool_minmax(rows(), channels(),
580         input.data(), input_stride() * sizeof(float),
581         zero.data(),
582         output.data(),
583         &params);
584 
585       // Verify results.
586       for (size_t c = 0; c < channels(); c++) {
587         ASSERT_LE(output[c], output_max)
588           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
589         ASSERT_GE(output[c], output_min)
590           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
591         ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f)
592           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
593       }
594     }
595   }
596 
Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)597   void Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const {
598     std::random_device random_device;
599     auto rng = std::mt19937(random_device());
600     std::uniform_real_distribution<float> f32dist;
601 
602     std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
603     std::vector<float, AlignedAllocator<float, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(float));
604     std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float));
605     std::vector<float> output(channels());
606     std::vector<float> output_ref(channels());
607     for (size_t iteration = 0; iteration < iterations(); iteration++) {
608       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
609       std::fill(output.begin(), output.end(), std::nanf(""));
610 
611       // Compute reference results, without clamping.
612       for (size_t c = 0; c < channels(); c++) {
613         float acc = 0.0f;
614         for (size_t n = 0; n < rows(); n++) {
615           acc += input[n * input_stride() + c];
616         }
617         output_ref[c] = acc / float(rows());
618       }
619 
620       // Compute clamping parameters.
621       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
622       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
623       const float accumulated_range = accumulated_max - accumulated_min;
624       const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range;
625       const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range;
626 
627       // Prepare parameters.
628       union xnn_f32_scaleminmax_params params;
629       init_params(&params, 1.0f / float(rows()), output_min, output_max);
630 
631       // Clamp reference results.
632       for (float& output_values : output_ref) {
633         output_values = std::max(std::min(output_values, output_max), output_min);
634       }
635 
636       // Call optimized micro-kernel.
637       gavgpool_minmax(rows(), channels(),
638         input.data(), input_stride() * sizeof(float),
639         zero.data(),
640         buffer.data(),
641         output.data(),
642         &params);
643 
644       // Verify results.
645       for (size_t c = 0; c < channels(); c++) {
646         ASSERT_LE(output[c], output_max)
647           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
648         ASSERT_GE(output[c], output_min)
649           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
650         ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f)
651           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
652       }
653     }
654   }
655 
656  private:
657   size_t rows_{1};
658   size_t channels_{1};
659   size_t channel_tile_{1};
660   size_t input_stride_{0};
661   float input_scale_{1.25f};
662   float output_scale_{0.75f};
663   uint8_t input_zero_point_{121};
664   uint8_t output_zero_point_{133};
665   uint8_t qmin_{0};
666   uint8_t qmax_{255};
667   size_t iterations_{15};
668 };
669