xref: /aosp_15_r20/external/XNNPACK/test/transpose-microkernel-tester.h (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <numeric>
9 #include <cassert>
10 #include <cstddef>
11 #include <cstdlib>
12 #include <cstring>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <xnnpack.h>
18 #include <xnnpack/microfnptr.h>
19 
20 
21 class TransposeMicrokernelTester {
22  public:
element_size(size_t element_size)23   inline TransposeMicrokernelTester& element_size(size_t element_size) {
24     assert(element_size != 0);
25     this->element_size_ = element_size;
26     return *this;
27   }
28 
element_size()29   inline size_t element_size() const { return this->element_size_; }
30 
block_height(size_t block_height)31   inline TransposeMicrokernelTester& block_height(size_t block_height) {
32     assert(block_height != 0);
33     this->block_height_ = block_height;
34     return *this;
35   }
36 
block_height()37   inline size_t block_height() const { return this->block_height_; }
38 
block_width(size_t block_width)39   inline TransposeMicrokernelTester& block_width(size_t block_width) {
40     assert(block_width != 0);
41     this->block_width_ = block_width;
42     return *this;
43   }
44 
block_width()45   inline size_t block_width() const { return this->block_width_; }
46 
input_stride(size_t input_stride)47   inline TransposeMicrokernelTester& input_stride(size_t input_stride) {
48     this->input_stride_ = input_stride;
49     return *this;
50   }
51 
input_stride()52   inline size_t input_stride() const { return this->input_stride_; }
53 
output_stride(size_t output_stride)54   inline TransposeMicrokernelTester& output_stride(size_t output_stride) {
55     this->output_stride_ = output_stride;
56     return *this;
57   }
58 
output_stride()59   inline size_t output_stride() const { return this->output_stride_; }
60 
input_element_stride(size_t input_element_stride)61   inline TransposeMicrokernelTester& input_element_stride(size_t input_element_stride) {
62     assert(input_element_stride >=  element_size_);
63     this->input_element_stride_ = input_element_stride;
64     return *this;
65   }
66 
input_element_stride()67   inline size_t input_element_stride() const {
68     if (input_element_stride_ == 0) {
69       return element_size_;
70     } else {
71       return input_element_stride_;
72     }
73   }
74 
output_element_stride(size_t output_element_stride)75   inline TransposeMicrokernelTester& output_element_stride(size_t output_element_stride) {
76     assert(output_element_stride >=  element_size_);
77     this->output_element_stride_ = output_element_stride;
78     return *this;
79   }
80 
output_element_stride()81   inline size_t output_element_stride() const {
82     if (output_element_stride_ == 0) {
83       return element_size_;
84     } else {
85       return output_element_stride_;
86     }
87   }
88 
iterations(size_t iterations)89   inline TransposeMicrokernelTester& iterations(size_t iterations) {
90     this->iterations_ = iterations;
91     return *this;
92   }
93 
iterations()94   inline size_t iterations() const { return this->iterations_; }
95 
Test(xnn_transposev_ukernel_function transpose)96   void Test(xnn_transposev_ukernel_function transpose) const {
97     std::vector<uint8_t> input(input_stride() * block_height() * input_element_stride() + XNN_EXTRA_BYTES);
98     std::vector<uint8_t> output(output_stride() * block_width() * output_element_stride());
99     std::iota(input.begin(), input.end(), 0);
100     std::fill(output.begin(), output.end(), UINT8_C(0xA5));
101 
102     // Call optimized micro-kernel.
103     transpose(input.data(),
104               output.data(),
105               input_stride() * input_element_stride(),
106               output_stride() * output_element_stride(),
107               input_element_stride(),
108               output_element_stride(),
109               element_size(),
110               block_width(),
111               block_height());
112 
113     // Verify results.
114     for (size_t c = 0; c < block_width(); c++) {
115       for (size_t r = 0; r < block_height(); r++) {
116         ASSERT_EQ(std::memcmp(&input[input_element_stride() * (c+ r * input_stride())],
117                               &output[output_element_stride() * (r + c * output_stride())],
118                               element_size()), 0)
119             << "at row " << r << " / " << block_height()
120             << ", at column " << c << " / " << block_width();
121       }
122     }
123   }
124 
Test(xnn_x64_transposec_ukernel_function transpose)125   void Test(xnn_x64_transposec_ukernel_function transpose) const {
126     std::vector<uint64_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES / sizeof(uint64_t));
127     std::vector<uint64_t> output(input_stride() * output_stride());
128     for (size_t iteration = 0; iteration < iterations(); iteration++) {
129       std::iota(input.begin(), input.end(), 0);
130       std::fill(output.begin(), output.end(), UINT64_C(0xBADC0FFEE0DDF00D));
131 
132       // Call optimized micro-kernel.
133       transpose(input.data(),
134                 output.data(),
135                 input_stride() * sizeof(uint64_t),
136                 output_stride() * sizeof(uint64_t),
137                 block_width(),
138                 block_height());
139 
140       // Verify results.
141       for (size_t c = 0; c < block_width(); c++) {
142         for (size_t r = 0; r < block_height(); r++) {
143           ASSERT_EQ(input[c + r * input_stride()], output[r + c * output_stride()])
144               << "at row " << r << " / " << block_height()
145               << ", at column " << c << " / " << block_width();
146         }
147       }
148     }
149   }
150 
Test(xnn_x32_transposec_ukernel_function transpose)151   void Test(xnn_x32_transposec_ukernel_function transpose) const {
152     std::vector<uint32_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES / sizeof(uint32_t));
153     std::vector<uint32_t> output(input_stride() * output_stride());
154     for (size_t iteration = 0; iteration < iterations(); iteration++) {
155       std::iota(input.begin(), input.end(), 0);
156       std::fill(output.begin(), output.end(), UINT32_C(0xDEADBEEF));
157 
158       // Call optimized micro-kernel.
159       transpose(input.data(),
160                 output.data(),
161                 input_stride() * sizeof(uint32_t),
162                 output_stride() * sizeof(uint32_t),
163                 block_width(),
164                 block_height());
165 
166       // Verify results.
167       for (size_t c = 0; c < block_width(); c++) {
168         for (size_t r = 0; r < block_height(); r++) {
169           ASSERT_EQ(input[c + r * input_stride()], output[r + c * output_stride()])
170               << "at row " << r << " / " << block_height()
171               << ", at column " << c << " / " << block_width();
172         }
173       }
174     }
175   }
176 
Test(xnn_x24_transposec_ukernel_function transpose)177   void Test(xnn_x24_transposec_ukernel_function transpose) const {
178     std::vector<uint8_t> input(input_stride() * output_stride() * element_size() + XNN_EXTRA_BYTES);
179     std::vector<uint8_t> output(input_stride() * output_stride() * element_size());
180     std::iota(input.begin(), input.end(), 0);
181     std::fill(output.begin(), output.end(), UINT8_C(0xA5));
182 
183     // Call optimized micro-kernel.
184     transpose(input.data(),
185               output.data(),
186               input_stride() * element_size(),
187               output_stride() * element_size(),
188               block_width(),
189               block_height());
190 
191     // Verify results.
192     for (size_t c = 0; c < block_width(); c++) {
193       for (size_t r = 0; r < block_height(); r++) {
194         ASSERT_EQ(std::memcmp(&input[element_size() * (c+ r * input_stride())],
195                               &output[element_size() * (r + c * output_stride())],
196                               element_size()), 0)
197             << "at row " << r << " / " << block_height()
198             << ", at column " << c << " / " << block_width();
199       }
200     }
201   }
202 
Test(xnn_x16_transposec_ukernel_function transpose)203   void Test(xnn_x16_transposec_ukernel_function transpose) const {
204     std::vector<uint16_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES / sizeof(uint16_t));
205     std::vector<uint16_t> output(input_stride() * output_stride());
206     for (size_t iteration = 0; iteration < iterations(); iteration++) {
207       std::iota(input.begin(), input.end(), 0);
208       std::fill(output.begin(), output.end(), UINT16_C(0xDEAD));
209 
210       // Call optimized micro-kernel.
211       transpose(input.data(),
212                 output.data(),
213                 input_stride() * sizeof(uint16_t),
214                 output_stride() * sizeof(uint16_t),
215                 block_width(),
216                 block_height());
217 
218       // Verify results.
219       for (size_t c = 0; c < block_width(); c++) {
220         for (size_t r = 0; r < block_height(); r++) {
221           ASSERT_EQ(input[c + r * input_stride()], output[r + c * output_stride()])
222               << "at row " << r << " / " << block_height()
223               << ", at column " << c << " / " << block_width();
224         }
225       }
226     }
227   }
228 
Test(xnn_x8_transposec_ukernel_function transpose)229   void Test(xnn_x8_transposec_ukernel_function transpose) const {
230     std::vector<uint8_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES);
231     std::vector<uint8_t> output(input_stride() * output_stride());
232     for (size_t iteration = 0; iteration < iterations(); iteration++) {
233       std::iota(input.begin(), input.end(), 0);
234       std::fill(output.begin(), output.end(), UINT8_C(0xA5));
235 
236       // Call optimized micro-kernel.
237       transpose(input.data(),
238                 output.data(),
239                 input_stride() * sizeof(uint8_t),
240                 output_stride() * sizeof(uint8_t),
241                 block_width(),
242                 block_height());
243 
244       // Verify results.
245       for (size_t c = 0; c < block_width(); c++) {
246         for (size_t r = 0; r < block_height(); r++) {
247           ASSERT_EQ((int)input[c + r * input_stride()], (int)output[r + c * output_stride()])
248               << "at row " << r << " / " << block_height()
249               << ", at column " << c << " / " << block_width();
250         }
251       }
252     }
253   }
254 
255  private:
256   size_t element_size_ = 1;
257   size_t input_stride_ = 1;
258   size_t output_stride_ = 1;
259   size_t input_element_stride_ = 0;
260   size_t output_element_stride_ = 0;
261   size_t block_height_ = 1;
262   size_t block_width_ = 1;
263   size_t iterations_ = 15;
264 };
265