xref: /aosp_15_r20/external/XNNPACK/test/vbinaryc-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <random>
16 #include <vector>
17 
18 #include <fp16.h>
19 
20 #include <xnnpack.h>
21 #include <xnnpack/microfnptr.h>
22 #include <xnnpack/microparams-init.h>
23 
24 
25 class VBinaryCMicrokernelTester {
26  public:
27   enum class OpType {
28     AddC,
29     DivC,
30     RDivC,
31     MaxC,
32     MinC,
33     MulC,
34     SqrDiffC,
35     SubC,
36     RSubC,
37   };
38 
batch_size(size_t batch_size)39   inline VBinaryCMicrokernelTester& batch_size(size_t batch_size) {
40     assert(batch_size != 0);
41     this->batch_size_ = batch_size;
42     return *this;
43   }
44 
batch_size()45   inline size_t batch_size() const {
46     return this->batch_size_;
47   }
48 
inplace(bool inplace)49   inline VBinaryCMicrokernelTester& inplace(bool inplace) {
50     this->inplace_ = inplace;
51     return *this;
52   }
53 
inplace()54   inline bool inplace() const {
55     return this->inplace_;
56   }
57 
qmin(uint8_t qmin)58   inline VBinaryCMicrokernelTester& qmin(uint8_t qmin) {
59     this->qmin_ = qmin;
60     return *this;
61   }
62 
qmin()63   inline uint8_t qmin() const {
64     return this->qmin_;
65   }
66 
qmax(uint8_t qmax)67   inline VBinaryCMicrokernelTester& qmax(uint8_t qmax) {
68     this->qmax_ = qmax;
69     return *this;
70   }
71 
qmax()72   inline uint8_t qmax() const {
73     return this->qmax_;
74   }
75 
iterations(size_t iterations)76   inline VBinaryCMicrokernelTester& iterations(size_t iterations) {
77     this->iterations_ = iterations;
78     return *this;
79   }
80 
iterations()81   inline size_t iterations() const {
82     return this->iterations_;
83   }
84 
Test(xnn_f16_vbinary_ukernel_function vbinaryc,OpType op_type)85   void Test(xnn_f16_vbinary_ukernel_function vbinaryc, OpType op_type) const {
86     std::random_device random_device;
87     auto rng = std::mt19937(random_device());
88     std::uniform_real_distribution<float> f32dist(0.01f, 1.0f);
89 
90     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
91     const uint16_t b = fp16_ieee_from_fp32_value(f32dist(rng));
92     std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
93     std::vector<float> y_ref(batch_size());
94     for (size_t iteration = 0; iteration < iterations(); iteration++) {
95       std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
96       if (inplace()) {
97         std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
98       } else {
99         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
100       }
101       const uint16_t* a_data = inplace() ? y.data() : a.data();
102 
103       // Compute reference results.
104       for (size_t i = 0; i < batch_size(); i++) {
105         switch (op_type) {
106           case OpType::AddC:
107             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b);
108             break;
109           case OpType::DivC:
110             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b);
111             break;
112           case OpType::RDivC:
113             y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]);
114             break;
115           case OpType::MaxC:
116             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
117             break;
118           case OpType::MinC:
119             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
120             break;
121           case OpType::MulC:
122             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b);
123             break;
124           case OpType::SqrDiffC:
125           {
126             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
127             y_ref[i] = diff * diff;
128             break;
129           }
130           case OpType::SubC:
131             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
132             break;
133           case OpType::RSubC:
134             y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]);
135             break;
136         }
137       }
138       // Call optimized micro-kernel.
139       vbinaryc(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), nullptr);
140 
141       // Verify results.
142       for (size_t i = 0; i < batch_size(); i++) {
143         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
144           << "at " << i << " / " << batch_size();
145       }
146     }
147   }
148 
Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)149   void Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const {
150     std::random_device random_device;
151     auto rng = std::mt19937(random_device());
152     std::uniform_real_distribution<float> f32dist(0.01f, 1.0f);
153 
154     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
155     const uint16_t b = fp16_ieee_from_fp32_value(f32dist(rng));
156     std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
157     std::vector<float> y_ref(batch_size());
158     for (size_t iteration = 0; iteration < iterations(); iteration++) {
159       std::generate(a.begin(), a.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
160       if (inplace()) {
161         std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
162       } else {
163         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
164       }
165       const uint16_t* a_data = inplace() ? y.data() : a.data();
166 
167       // Compute reference results.
168       for (size_t i = 0; i < batch_size(); i++) {
169         switch (op_type) {
170           case OpType::AddC:
171             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b);
172             break;
173           case OpType::DivC:
174             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b);
175             break;
176           case OpType::RDivC:
177             y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]);
178             break;
179           case OpType::MaxC:
180             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
181             break;
182           case OpType::MinC:
183             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
184             break;
185           case OpType::MulC:
186             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b);
187             break;
188           case OpType::SqrDiffC:
189           {
190             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
191             y_ref[i] = diff * diff;
192             break;
193           }
194           case OpType::SubC:
195             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
196             break;
197           case OpType::RSubC:
198             y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]);
199             break;
200         }
201       }
202       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
203       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
204       const float accumulated_range = accumulated_max - accumulated_min;
205       const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
206         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
207         +std::numeric_limits<float>::infinity()));
208       const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
209         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
210         -std::numeric_limits<float>::infinity()));
211       for (size_t i = 0; i < batch_size(); i++) {
212         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
213       }
214 
215       // Prepare parameters.
216       xnn_f16_minmax_params params;
217       init_params(&params,
218         fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max));
219 
220       // Call optimized micro-kernel.
221       vbinaryc_minmax(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), &params);
222 
223       // Verify results.
224       for (size_t i = 0; i < batch_size(); i++) {
225         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
226           << "at " << i << " / " << batch_size();
227       }
228     }
229   }
230 
231   void Test(xnn_f32_vbinary_ukernel_function vbinaryc, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const {
232     std::random_device random_device;
233     auto rng = std::mt19937(random_device());
234     std::uniform_real_distribution<float> f32dist(0.01f, 1.0f);
235 
236     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
237     const float b = f32dist(rng);
238     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
239     std::vector<float> y_ref(batch_size());
240     for (size_t iteration = 0; iteration < iterations(); iteration++) {
241       std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); });
242       if (inplace()) {
243         std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); });
244       } else {
245         std::fill(y.begin(), y.end(), nanf(""));
246       }
247       const float* a_data = inplace() ? y.data() : a.data();
248 
249       // Compute reference results.
250       for (size_t i = 0; i < batch_size(); i++) {
251         switch (op_type) {
252           case OpType::AddC:
253             y_ref[i] = a_data[i] + b;
254             break;
255           case OpType::DivC:
256             y_ref[i] = a_data[i] / b;
257             break;
258           case OpType::RDivC:
259             y_ref[i] = b / a_data[i];
260             break;
261           case OpType::MaxC:
262             y_ref[i] = std::max<float>(a_data[i], b);
263             break;
264           case OpType::MinC:
265             y_ref[i] = std::min<float>(a_data[i], b);
266             break;
267           case OpType::MulC:
268             y_ref[i] = a_data[i] * b;
269             break;
270           case OpType::SqrDiffC:
271           {
272             const float diff = a_data[i] - b;
273             y_ref[i] = diff * diff;
274             break;
275           }
276           case OpType::SubC:
277             y_ref[i] = a_data[i] - b;
278             break;
279           case OpType::RSubC:
280             y_ref[i] = b - a_data[i];
281             break;
282         }
283       }
284 
285       // Prepare parameters.
286       xnn_f32_default_params params;
287       if (init_params) {
288         init_params(&params);
289       }
290 
291       // Call optimized micro-kernel.
292       vbinaryc(batch_size() * sizeof(float), a_data, &b, y.data(), init_params != nullptr ? &params : nullptr);
293 
294       // Verify results.
295       for (size_t i = 0; i < batch_size(); i++) {
296         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
297           << "at " << i << " / " << batch_size();
298       }
299     }
300   }
301 
Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu,OpType op_type)302   void Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu, OpType op_type) const {
303     std::random_device random_device;
304     auto rng = std::mt19937(random_device());
305     std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
306 
307     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
308     const float b = f32dist(rng);
309     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
310     std::vector<float> y_ref(batch_size());
311     for (size_t iteration = 0; iteration < iterations(); iteration++) {
312       std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); });
313       if (inplace()) {
314         std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); });
315       } else {
316         std::fill(y.begin(), y.end(), nanf(""));
317       }
318       const float* a_data = inplace() ? y.data() : a.data();
319 
320       // Compute reference results.
321       for (size_t i = 0; i < batch_size(); i++) {
322         switch (op_type) {
323           case OpType::AddC:
324             y_ref[i] = a_data[i] + b;
325             break;
326           case OpType::DivC:
327             y_ref[i] = a_data[i] / b;
328             break;
329           case OpType::RDivC:
330             y_ref[i] = b / a_data[i];
331             break;
332           case OpType::MaxC:
333             y_ref[i] = std::max<float>(a_data[i], b);
334             break;
335           case OpType::MinC:
336             y_ref[i] = std::min<float>(a_data[i], b);
337             break;
338           case OpType::MulC:
339             y_ref[i] = a_data[i] * b;
340             break;
341           case OpType::SqrDiffC:
342           {
343             const float diff = a_data[i] - b;
344             y_ref[i] = diff * diff;
345             break;
346           }
347           case OpType::SubC:
348             y_ref[i] = a_data[i] - b;
349             break;
350           case OpType::RSubC:
351             y_ref[i] = b - a_data[i];
352             break;
353         }
354       }
355       for (size_t i = 0; i < batch_size(); i++) {
356         y_ref[i] = std::max(y_ref[i], 0.0f);
357       }
358 
359       // Call optimized micro-kernel.
360       vbinaryc_relu(batch_size() * sizeof(float), a_data, &b, y.data(), nullptr);
361 
362       // Verify results.
363       for (size_t i = 0; i < batch_size(); i++) {
364         ASSERT_GE(y[i], 0.0f)
365           << "at " << i << " / " << batch_size();
366         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
367           << "at " << i << " / " << batch_size();
368       }
369     }
370   }
371 
Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)372   void Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const {
373     std::random_device random_device;
374     auto rng = std::mt19937(random_device());
375     std::uniform_real_distribution<float> f32dist;
376 
377     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
378     const float b = f32dist(rng);
379     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
380     std::vector<float> y_ref(batch_size());
381     for (size_t iteration = 0; iteration < iterations(); iteration++) {
382       std::generate(a.begin(), a.end(), [&]() { return f32dist(rng); });
383       if (inplace()) {
384         std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); });
385       } else {
386         std::fill(y.begin(), y.end(), nanf(""));
387       }
388       const float* a_data = inplace() ? y.data() : a.data();
389 
390       // Compute reference results.
391       for (size_t i = 0; i < batch_size(); i++) {
392         switch (op_type) {
393           case OpType::AddC:
394             y_ref[i] = a_data[i] + b;
395             break;
396           case OpType::DivC:
397             y_ref[i] = a_data[i] / b;
398             break;
399           case OpType::RDivC:
400             y_ref[i] = b / a_data[i];
401             break;
402           case OpType::MaxC:
403             y_ref[i] = std::max<float>(a_data[i], b);
404             break;
405           case OpType::MinC:
406             y_ref[i] = std::min<float>(a_data[i], b);
407             break;
408           case OpType::MulC:
409             y_ref[i] = a_data[i] * b;
410             break;
411           case OpType::SqrDiffC:
412           {
413             const float diff = a_data[i] - b;
414             y_ref[i] = diff * diff;
415             break;
416           }
417           case OpType::SubC:
418             y_ref[i] = a_data[i] - b;
419             break;
420           case OpType::RSubC:
421             y_ref[i] = b - a_data[i];
422             break;
423         }
424       }
425       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
426       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
427       const float accumulated_range = accumulated_max - accumulated_min;
428       const float y_max = accumulated_range > 0.0f ?
429         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
430         +std::numeric_limits<float>::infinity();
431       const float y_min = accumulated_range > 0.0f ?
432         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
433         -std::numeric_limits<float>::infinity();
434       for (size_t i = 0; i < batch_size(); i++) {
435         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
436       }
437 
438       // Prepare parameters.
439       xnn_f32_minmax_params params;
440       init_params(&params, y_min, y_max);
441 
442       // Call optimized micro-kernel.
443       vbinaryc_minmax(batch_size() * sizeof(float), a_data, &b, y.data(), &params);
444 
445       // Verify results.
446       for (size_t i = 0; i < batch_size(); i++) {
447         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
448           << "at " << i << " / " << batch_size();
449       }
450     }
451   }
452 
453  private:
454   size_t batch_size_{1};
455   bool inplace_{false};
456   uint8_t qmin_{0};
457   uint8_t qmax_{255};
458   size_t iterations_{15};
459 };
460