xref: /aosp_15_r20/external/XNNPACK/test/global-average-pooling-operator-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <cstddef>
14 #include <cstdlib>
15 #include <algorithm>
16 #include <cmath>
17 #include <limits>
18 #include <random>
19 #include <vector>
20 
21 #include <fp16.h>
22 
23 #include <xnnpack.h>
24 
25 
26 class GlobalAveragePoolingOperatorTester {
27  public:
channels(size_t channels)28   inline GlobalAveragePoolingOperatorTester& channels(size_t channels) {
29     assert(channels != 0);
30     this->channels_ = channels;
31     return *this;
32   }
33 
channels()34   inline size_t channels() const {
35     return this->channels_;
36   }
37 
width(size_t width)38   inline GlobalAveragePoolingOperatorTester& width(size_t width) {
39     assert(width != 0);
40     this->width_ = width;
41     return *this;
42   }
43 
width()44   inline size_t width() const {
45     return this->width_;
46   }
47 
input_stride(size_t input_stride)48   inline GlobalAveragePoolingOperatorTester& input_stride(size_t input_stride) {
49     assert(input_stride != 0);
50     this->input_stride_ = input_stride;
51     return *this;
52   }
53 
input_stride()54   inline size_t input_stride() const {
55     if (this->input_stride_ == 0) {
56       return channels();
57     } else {
58       assert(this->input_stride_ >= channels());
59       return this->input_stride_;
60     }
61   }
62 
output_stride(size_t output_stride)63   inline GlobalAveragePoolingOperatorTester& output_stride(size_t output_stride) {
64     assert(output_stride != 0);
65     this->output_stride_ = output_stride;
66     return *this;
67   }
68 
output_stride()69   inline size_t output_stride() const {
70     if (this->output_stride_ == 0) {
71       return channels();
72     } else {
73       assert(this->output_stride_ >= channels());
74       return this->output_stride_;
75     }
76   }
77 
batch_size(size_t batch_size)78   inline GlobalAveragePoolingOperatorTester& batch_size(size_t batch_size) {
79     assert(batch_size != 0);
80     this->batch_size_ = batch_size;
81     return *this;
82   }
83 
batch_size()84   inline size_t batch_size() const {
85     return this->batch_size_;
86   }
87 
input_scale(float input_scale)88   inline GlobalAveragePoolingOperatorTester& input_scale(float input_scale) {
89     assert(input_scale > 0.0f);
90     assert(std::isnormal(input_scale));
91     this->input_scale_ = input_scale;
92     return *this;
93   }
94 
input_scale()95   inline float input_scale() const {
96     return this->input_scale_;
97   }
98 
input_zero_point(uint8_t input_zero_point)99   inline GlobalAveragePoolingOperatorTester& input_zero_point(uint8_t input_zero_point) {
100     this->input_zero_point_ = input_zero_point;
101     return *this;
102   }
103 
input_zero_point()104   inline uint8_t input_zero_point() const {
105     return this->input_zero_point_;
106   }
107 
output_scale(float output_scale)108   inline GlobalAveragePoolingOperatorTester& output_scale(float output_scale) {
109     assert(output_scale > 0.0f);
110     assert(std::isnormal(output_scale));
111     this->output_scale_ = output_scale;
112     return *this;
113   }
114 
output_scale()115   inline float output_scale() const {
116     return this->output_scale_;
117   }
118 
output_zero_point(uint8_t output_zero_point)119   inline GlobalAveragePoolingOperatorTester& output_zero_point(uint8_t output_zero_point) {
120     this->output_zero_point_ = output_zero_point;
121     return *this;
122   }
123 
output_zero_point()124   inline uint8_t output_zero_point() const {
125     return this->output_zero_point_;
126   }
127 
qmin(uint8_t qmin)128   inline GlobalAveragePoolingOperatorTester& qmin(uint8_t qmin) {
129     this->qmin_ = qmin;
130     return *this;
131   }
132 
qmin()133   inline uint8_t qmin() const {
134     return this->qmin_;
135   }
136 
qmax(uint8_t qmax)137   inline GlobalAveragePoolingOperatorTester& qmax(uint8_t qmax) {
138     this->qmax_ = qmax;
139     return *this;
140   }
141 
qmax()142   inline uint8_t qmax() const {
143     return this->qmax_;
144   }
145 
iterations(size_t iterations)146   inline GlobalAveragePoolingOperatorTester& iterations(size_t iterations) {
147     this->iterations_ = iterations;
148     return *this;
149   }
150 
iterations()151   inline size_t iterations() const {
152     return this->iterations_;
153   }
154 
TestNWCxQU8()155   void TestNWCxQU8() const {
156     std::random_device random_device;
157     auto rng = std::mt19937(random_device());
158     std::uniform_int_distribution<int32_t> u8dist(
159       std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
160 
161     std::vector<uint8_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
162     std::vector<uint8_t> output(batch_size() * output_stride());
163     std::vector<float> output_ref(batch_size() * channels());
164     for (size_t iteration = 0; iteration < iterations(); iteration++) {
165       std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); });
166       std::fill(output.begin(), output.end(), UINT8_C(0xA5));
167 
168       // Compute reference results.
169       const double scale = double(input_scale()) / (double(width()) * double(output_scale()));
170       for (size_t i = 0; i < batch_size(); i++) {
171         for (size_t j = 0; j < channels(); j++) {
172           double acc = 0.0f;
173           for (size_t k = 0; k < width(); k++) {
174             acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point()));
175           }
176           output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point()));
177           output_ref[i * channels() + j] = std::min<float>(output_ref[i * channels() + j], float(qmax()));
178           output_ref[i * channels() + j] = std::max<float>(output_ref[i * channels() + j], float(qmin()));
179         }
180       }
181 
182       // Create, setup, run, and destroy Global Average Pooling operator.
183       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
184       xnn_operator_t global_average_pooling_op = nullptr;
185 
186       xnn_status status = xnn_create_global_average_pooling_nwc_qu8(
187           channels(), input_stride(), output_stride(),
188           input_zero_point(), input_scale(),
189           output_zero_point(), output_scale(),
190           qmin(), qmax(),
191           0, &global_average_pooling_op);
192       if (status == xnn_status_unsupported_hardware) {
193         GTEST_SKIP();
194       }
195       ASSERT_EQ(xnn_status_success, status);
196       ASSERT_NE(nullptr, global_average_pooling_op);
197 
198       // Smart pointer to automatically delete global_average_pooling_op.
199       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator);
200 
201       ASSERT_EQ(xnn_status_success,
202         xnn_setup_global_average_pooling_nwc_qu8(
203           global_average_pooling_op,
204           batch_size(), width(),
205           input.data(), output.data(),
206           nullptr /* thread pool */));
207 
208       ASSERT_EQ(xnn_status_success,
209         xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */));
210 
211       // Verify results.
212       for (size_t i = 0; i < batch_size(); i++) {
213         for (size_t c = 0; c < channels(); c++) {
214           ASSERT_LE(uint32_t(output[i * output_stride() + c]), uint32_t(qmax()));
215           ASSERT_GE(uint32_t(output[i * output_stride() + c]), uint32_t(qmin()));
216           ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f)
217             << "at batch index " << i << " / " << batch_size()
218             << ", channel " << c << " / " << channels();
219         }
220       }
221     }
222   }
223 
TestNWCxQS8()224   void TestNWCxQS8() const {
225     std::random_device random_device;
226     auto rng = std::mt19937(random_device());
227     std::uniform_int_distribution<int32_t> i8dist(
228       std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
229 
230     std::vector<int8_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
231     std::vector<int8_t> output(batch_size() * output_stride());
232     std::vector<float> output_ref(batch_size() * channels());
233     for (size_t iteration = 0; iteration < iterations(); iteration++) {
234       std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); });
235       std::fill(output.begin(), output.end(), INT8_C(0xA5));
236 
237       // Compute reference results.
238       const double scale = double(input_scale()) / (double(width()) * double(output_scale()));
239       for (size_t i = 0; i < batch_size(); i++) {
240         for (size_t j = 0; j < channels(); j++) {
241           double acc = 0.0f;
242           for (size_t k = 0; k < width(); k++) {
243             acc += double(int32_t(input[(i * width() + k) * input_stride() + j]) - int32_t(input_zero_point() - 0x80));
244           }
245           output_ref[i * channels() + j] = float(acc * scale + double(output_zero_point() - 0x80));
246           output_ref[i * channels() + j] = std::min<float>(output_ref[i * channels() + j], float(qmax() - 0x80));
247           output_ref[i * channels() + j] = std::max<float>(output_ref[i * channels() + j], float(qmin() - 0x80));
248         }
249       }
250 
251       // Create, setup, run, and destroy Global Average Pooling operator.
252       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
253       xnn_operator_t global_average_pooling_op = nullptr;
254 
255       xnn_status status = xnn_create_global_average_pooling_nwc_qs8(
256           channels(), input_stride(), output_stride(),
257           int8_t(input_zero_point() - 0x80), input_scale(),
258           int8_t(output_zero_point() - 0x80), output_scale(),
259           int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),
260           0, &global_average_pooling_op);
261       if (status == xnn_status_unsupported_hardware) {
262         GTEST_SKIP();
263       }
264       ASSERT_EQ(xnn_status_success, status);
265       ASSERT_NE(nullptr, global_average_pooling_op);
266 
267       // Smart pointer to automatically delete global_average_pooling_op.
268       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator);
269 
270       ASSERT_EQ(xnn_status_success,
271         xnn_setup_global_average_pooling_nwc_qs8(
272           global_average_pooling_op,
273           batch_size(), width(),
274           input.data(), output.data(),
275           nullptr /* thread pool */));
276 
277       ASSERT_EQ(xnn_status_success,
278         xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */));
279 
280       // Verify results.
281       for (size_t i = 0; i < batch_size(); i++) {
282         for (size_t c = 0; c < channels(); c++) {
283           ASSERT_LE(int32_t(output[i * output_stride() + c]), int32_t(qmax() - 0x80));
284           ASSERT_GE(int32_t(output[i * output_stride() + c]), int32_t(qmin() - 0x80));
285           ASSERT_NEAR(float(int32_t(output[i * output_stride() + c])), output_ref[i * channels() + c], 0.80f)
286             << "at batch index " << i << " / " << batch_size()
287             << ", channel " << c << " / " << channels();
288         }
289       }
290     }
291   }
292 
TestNWCxF16()293   void TestNWCxF16() const {
294     std::random_device random_device;
295     auto rng = std::mt19937(random_device());
296     std::uniform_real_distribution<float> f32dist(1.0e-3f, 1.0f);
297 
298     std::vector<uint16_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
299     std::vector<uint16_t> output(batch_size() * output_stride());
300     std::vector<float> output_ref(batch_size() * channels());
301     for (size_t iteration = 0; iteration < iterations(); iteration++) {
302       std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
303       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
304 
305       // Compute reference results, without clamping.
306       for (size_t i = 0; i < batch_size(); i++) {
307         for (size_t j = 0; j < channels(); j++) {
308           float acc = 0.0f;
309           for (size_t k = 0; k < width(); k++) {
310             acc += fp16_ieee_to_fp32_value(input[(i * width() + k) * input_stride() + j]);
311           }
312           output_ref[i * channels() + j] = acc / float(width());
313         }
314       }
315 
316       // Compute clamping parameters.
317       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
318       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
319       const float accumulated_range = accumulated_max - accumulated_min;
320       const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));
321       const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));
322       const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;
323       const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;
324 
325       // Clamp reference results.
326       for (float& value : output_ref) {
327         value = std::max(std::min(value, output_max), output_min);
328       }
329 
330       // Create, setup, run, and destroy Global Average Pooling operator.
331       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
332       xnn_operator_t global_average_pooling_op = nullptr;
333 
334       xnn_status status = xnn_create_global_average_pooling_nwc_f16(
335           channels(), input_stride(), output_stride(),
336           output_min, output_max,
337           0, &global_average_pooling_op);
338       if (status == xnn_status_unsupported_hardware) {
339         GTEST_SKIP();
340       }
341       ASSERT_EQ(xnn_status_success, status);
342       ASSERT_NE(nullptr, global_average_pooling_op);
343 
344       // Smart pointer to automatically delete global_average_pooling_op.
345       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator);
346 
347       ASSERT_EQ(xnn_status_success,
348         xnn_setup_global_average_pooling_nwc_f16(
349           global_average_pooling_op,
350           batch_size(), width(),
351           input.data(), output.data(),
352           nullptr /* thread pool */));
353 
354       ASSERT_EQ(xnn_status_success,
355         xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */));
356 
357       // Verify results.
358       for (size_t i = 0; i < batch_size(); i++) {
359         for (size_t c = 0; c < channels(); c++) {
360           ASSERT_LE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_max);
361           ASSERT_GE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_min);
362           ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::max(1.0e-4f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f))
363             << "at batch index " << i << " / " << batch_size()
364             << ", channel " << c << " / " << channels();
365         }
366       }
367     }
368   }
369 
TestNWCxF32()370   void TestNWCxF32() const {
371     std::random_device random_device;
372     auto rng = std::mt19937(random_device());
373     std::uniform_real_distribution<float> f32dist;
374 
375     std::vector<float> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
376     std::vector<float> output(batch_size() * output_stride());
377     std::vector<float> output_ref(batch_size() * channels());
378     for (size_t iteration = 0; iteration < iterations(); iteration++) {
379       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
380       std::fill(output.begin(), output.end(), std::nanf(""));
381 
382       // Compute reference results, without clamping.
383       for (size_t i = 0; i < batch_size(); i++) {
384         for (size_t j = 0; j < channels(); j++) {
385           float acc = 0.0f;
386           for (size_t k = 0; k < width(); k++) {
387             acc += input[(i * width() + k) * input_stride() + j];
388           }
389           output_ref[i * channels() + j] = acc / float(width());
390         }
391       }
392 
393       // Compute clamping parameters.
394       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
395       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
396       const float accumulated_range = accumulated_max - accumulated_min;
397       const float output_min = accumulated_range == 0.0f ?
398         -std::numeric_limits<float>::infinity() :
399         accumulated_min + accumulated_range / 255.0f * float(qmin());
400       const float output_max = accumulated_range == 0.0f ?
401         +std::numeric_limits<float>::infinity() :
402         accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
403 
404       // Clamp reference results.
405       for (float& value : output_ref) {
406         value = std::max(std::min(value, output_max), output_min);
407       }
408 
409       // Create, setup, run, and destroy Global Average Pooling operator.
410       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
411       xnn_operator_t global_average_pooling_op = nullptr;
412 
413       xnn_status status = xnn_create_global_average_pooling_nwc_f32(
414           channels(), input_stride(), output_stride(),
415           output_min, output_max,
416           0, &global_average_pooling_op);
417       if (status == xnn_status_unsupported_hardware) {
418         GTEST_SKIP();
419       }
420       ASSERT_EQ(xnn_status_success, status);
421       ASSERT_NE(nullptr, global_average_pooling_op);
422 
423       // Smart pointer to automatically delete global_average_pooling_op.
424       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator);
425 
426       ASSERT_EQ(xnn_status_success,
427         xnn_setup_global_average_pooling_nwc_f32(
428           global_average_pooling_op,
429           batch_size(), width(),
430           input.data(), output.data(),
431           nullptr /* thread pool */));
432 
433       ASSERT_EQ(xnn_status_success,
434         xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */));
435 
436       // Verify results.
437       for (size_t i = 0; i < batch_size(); i++) {
438         for (size_t c = 0; c < channels(); c++) {
439           ASSERT_LE(output[i * output_stride() + c], output_max);
440           ASSERT_GE(output[i * output_stride() + c], output_min);
441           ASSERT_NEAR(output[i * output_stride() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-6f)
442             << "at batch index " << i << " / " << batch_size()
443             << ", channel " << c << " / " << channels();
444         }
445       }
446     }
447   }
448 
TestNCWxF32()449   void TestNCWxF32() const {
450     std::random_device random_device;
451     auto rng = std::mt19937(random_device());
452     std::uniform_real_distribution<float> f32dist;
453 
454     std::vector<float> input(batch_size() * channels() * width() + XNN_EXTRA_BYTES / sizeof(float));
455     std::vector<float> output(batch_size() * channels());
456     std::vector<float> output_ref(batch_size() * channels());
457     for (size_t iteration = 0; iteration < iterations(); iteration++) {
458       std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); });
459       std::fill(output.begin(), output.end(), std::nanf(""));
460 
461       // Compute reference results, without clamping.
462       for (size_t i = 0; i < batch_size(); i++) {
463         for (size_t j = 0; j < channels(); j++) {
464           float acc = 0.0f;
465           for (size_t k = 0; k < width(); k++) {
466             acc += input[(i * channels() + j) * width() + k];
467           }
468           output_ref[i * channels() + j] = acc / float(width());
469         }
470       }
471 
472       // Compute clamping parameters.
473       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
474       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
475       const float accumulated_range = accumulated_max - accumulated_min;
476       const float output_min = accumulated_range == 0.0f ?
477         -std::numeric_limits<float>::infinity() :
478         accumulated_min + accumulated_range / 255.0f * float(qmin());
479       const float output_max = accumulated_range == 0.0f ?
480         +std::numeric_limits<float>::infinity() :
481         accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
482 
483       // Clamp reference results.
484       for (float& value : output_ref) {
485         value = std::max(std::min(value, output_max), output_min);
486       }
487 
488       // Create, setup, run, and destroy Global Average Pooling operator.
489       ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
490       xnn_operator_t global_average_pooling_op = nullptr;
491 
492       xnn_status status = xnn_create_global_average_pooling_ncw_f32(
493         channels(), output_min, output_max,
494         0, &global_average_pooling_op);
495       if (status == xnn_status_unsupported_parameter) {
496         GTEST_SKIP();
497       }
498       ASSERT_EQ(xnn_status_success, status);
499 
500       // Smart pointer to automatically delete global_average_pooling_op.
501       std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_global_average_pooling_op(global_average_pooling_op, xnn_delete_operator);
502 
503       ASSERT_EQ(xnn_status_success,
504         xnn_setup_global_average_pooling_ncw_f32(
505           global_average_pooling_op,
506           batch_size(), width(),
507           input.data(), output.data(),
508           nullptr /* thread pool */));
509 
510       ASSERT_EQ(xnn_status_success,
511         xnn_run_operator(global_average_pooling_op, nullptr /* thread pool */));
512 
513       // Verify results.
514       for (size_t i = 0; i < batch_size(); i++) {
515         for (size_t c = 0; c < channels(); c++) {
516           ASSERT_LE(output[i * channels() + c], output_max);
517           ASSERT_GE(output[i * channels() + c], output_min);
518           ASSERT_NEAR(output[i * channels() + c], output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-5f)
519             << "at batch index " << i << " / " << batch_size()
520             << ", channel " << c << " / " << channels();
521         }
522       }
523     }
524   }
525 
526  private:
527   size_t batch_size_{1};
528   size_t width_{1};
529   size_t channels_{1};
530   size_t input_stride_{0};
531   size_t output_stride_{0};
532   float input_scale_{1.0f};
533   float output_scale_{1.0f};
534   uint8_t input_zero_point_{121};
535   uint8_t output_zero_point_{133};
536   uint8_t qmin_{0};
537   uint8_t qmax_{255};
538   size_t iterations_{1};
539 };
540