1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <numeric> 9 #include <cassert> 10 #include <cstddef> 11 #include <cstdlib> 12 #include <cstring> 13 #include <vector> 14 15 #include <gtest/gtest.h> 16 17 #include <xnnpack.h> 18 #include <xnnpack/microfnptr.h> 19 20 21 class TransposeMicrokernelTester { 22 public: element_size(size_t element_size)23 inline TransposeMicrokernelTester& element_size(size_t element_size) { 24 assert(element_size != 0); 25 this->element_size_ = element_size; 26 return *this; 27 } 28 element_size()29 inline size_t element_size() const { return this->element_size_; } 30 block_height(size_t block_height)31 inline TransposeMicrokernelTester& block_height(size_t block_height) { 32 assert(block_height != 0); 33 this->block_height_ = block_height; 34 return *this; 35 } 36 block_height()37 inline size_t block_height() const { return this->block_height_; } 38 block_width(size_t block_width)39 inline TransposeMicrokernelTester& block_width(size_t block_width) { 40 assert(block_width != 0); 41 this->block_width_ = block_width; 42 return *this; 43 } 44 block_width()45 inline size_t block_width() const { return this->block_width_; } 46 input_stride(size_t input_stride)47 inline TransposeMicrokernelTester& input_stride(size_t input_stride) { 48 this->input_stride_ = input_stride; 49 return *this; 50 } 51 input_stride()52 inline size_t input_stride() const { return this->input_stride_; } 53 output_stride(size_t output_stride)54 inline TransposeMicrokernelTester& output_stride(size_t output_stride) { 55 this->output_stride_ = output_stride; 56 return *this; 57 } 58 output_stride()59 inline size_t output_stride() const { return this->output_stride_; } 60 input_element_stride(size_t input_element_stride)61 inline TransposeMicrokernelTester& input_element_stride(size_t input_element_stride) { 62 assert(input_element_stride >= element_size_); 63 this->input_element_stride_ = input_element_stride; 64 return *this; 65 } 66 input_element_stride()67 inline size_t input_element_stride() const { 68 if (input_element_stride_ == 0) { 69 return element_size_; 70 } else { 71 return input_element_stride_; 72 } 73 } 74 output_element_stride(size_t output_element_stride)75 inline TransposeMicrokernelTester& output_element_stride(size_t output_element_stride) { 76 assert(output_element_stride >= element_size_); 77 this->output_element_stride_ = output_element_stride; 78 return *this; 79 } 80 output_element_stride()81 inline size_t output_element_stride() const { 82 if (output_element_stride_ == 0) { 83 return element_size_; 84 } else { 85 return output_element_stride_; 86 } 87 } 88 iterations(size_t iterations)89 inline TransposeMicrokernelTester& iterations(size_t iterations) { 90 this->iterations_ = iterations; 91 return *this; 92 } 93 iterations()94 inline size_t iterations() const { return this->iterations_; } 95 Test(xnn_transposev_ukernel_function transpose)96 void Test(xnn_transposev_ukernel_function transpose) const { 97 std::vector<uint8_t> input(input_stride() * block_height() * input_element_stride() + XNN_EXTRA_BYTES); 98 std::vector<uint8_t> output(output_stride() * block_width() * output_element_stride()); 99 std::iota(input.begin(), input.end(), 0); 100 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 101 102 // Call optimized micro-kernel. 103 transpose(input.data(), 104 output.data(), 105 input_stride() * input_element_stride(), 106 output_stride() * output_element_stride(), 107 input_element_stride(), 108 output_element_stride(), 109 element_size(), 110 block_width(), 111 block_height()); 112 113 // Verify results. 114 for (size_t c = 0; c < block_width(); c++) { 115 for (size_t r = 0; r < block_height(); r++) { 116 ASSERT_EQ(std::memcmp(&input[input_element_stride() * (c+ r * input_stride())], 117 &output[output_element_stride() * (r + c * output_stride())], 118 element_size()), 0) 119 << "at row " << r << " / " << block_height() 120 << ", at column " << c << " / " << block_width(); 121 } 122 } 123 } 124 Test(xnn_x64_transposec_ukernel_function transpose)125 void Test(xnn_x64_transposec_ukernel_function transpose) const { 126 std::vector<uint64_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES / sizeof(uint64_t)); 127 std::vector<uint64_t> output(input_stride() * output_stride()); 128 for (size_t iteration = 0; iteration < iterations(); iteration++) { 129 std::iota(input.begin(), input.end(), 0); 130 std::fill(output.begin(), output.end(), UINT64_C(0xBADC0FFEE0DDF00D)); 131 132 // Call optimized micro-kernel. 133 transpose(input.data(), 134 output.data(), 135 input_stride() * sizeof(uint64_t), 136 output_stride() * sizeof(uint64_t), 137 block_width(), 138 block_height()); 139 140 // Verify results. 141 for (size_t c = 0; c < block_width(); c++) { 142 for (size_t r = 0; r < block_height(); r++) { 143 ASSERT_EQ(input[c + r * input_stride()], output[r + c * output_stride()]) 144 << "at row " << r << " / " << block_height() 145 << ", at column " << c << " / " << block_width(); 146 } 147 } 148 } 149 } 150 Test(xnn_x32_transposec_ukernel_function transpose)151 void Test(xnn_x32_transposec_ukernel_function transpose) const { 152 std::vector<uint32_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES / sizeof(uint32_t)); 153 std::vector<uint32_t> output(input_stride() * output_stride()); 154 for (size_t iteration = 0; iteration < iterations(); iteration++) { 155 std::iota(input.begin(), input.end(), 0); 156 std::fill(output.begin(), output.end(), UINT32_C(0xDEADBEEF)); 157 158 // Call optimized micro-kernel. 159 transpose(input.data(), 160 output.data(), 161 input_stride() * sizeof(uint32_t), 162 output_stride() * sizeof(uint32_t), 163 block_width(), 164 block_height()); 165 166 // Verify results. 167 for (size_t c = 0; c < block_width(); c++) { 168 for (size_t r = 0; r < block_height(); r++) { 169 ASSERT_EQ(input[c + r * input_stride()], output[r + c * output_stride()]) 170 << "at row " << r << " / " << block_height() 171 << ", at column " << c << " / " << block_width(); 172 } 173 } 174 } 175 } 176 Test(xnn_x24_transposec_ukernel_function transpose)177 void Test(xnn_x24_transposec_ukernel_function transpose) const { 178 std::vector<uint8_t> input(input_stride() * output_stride() * element_size() + XNN_EXTRA_BYTES); 179 std::vector<uint8_t> output(input_stride() * output_stride() * element_size()); 180 std::iota(input.begin(), input.end(), 0); 181 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 182 183 // Call optimized micro-kernel. 184 transpose(input.data(), 185 output.data(), 186 input_stride() * element_size(), 187 output_stride() * element_size(), 188 block_width(), 189 block_height()); 190 191 // Verify results. 192 for (size_t c = 0; c < block_width(); c++) { 193 for (size_t r = 0; r < block_height(); r++) { 194 ASSERT_EQ(std::memcmp(&input[element_size() * (c+ r * input_stride())], 195 &output[element_size() * (r + c * output_stride())], 196 element_size()), 0) 197 << "at row " << r << " / " << block_height() 198 << ", at column " << c << " / " << block_width(); 199 } 200 } 201 } 202 Test(xnn_x16_transposec_ukernel_function transpose)203 void Test(xnn_x16_transposec_ukernel_function transpose) const { 204 std::vector<uint16_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 205 std::vector<uint16_t> output(input_stride() * output_stride()); 206 for (size_t iteration = 0; iteration < iterations(); iteration++) { 207 std::iota(input.begin(), input.end(), 0); 208 std::fill(output.begin(), output.end(), UINT16_C(0xDEAD)); 209 210 // Call optimized micro-kernel. 211 transpose(input.data(), 212 output.data(), 213 input_stride() * sizeof(uint16_t), 214 output_stride() * sizeof(uint16_t), 215 block_width(), 216 block_height()); 217 218 // Verify results. 219 for (size_t c = 0; c < block_width(); c++) { 220 for (size_t r = 0; r < block_height(); r++) { 221 ASSERT_EQ(input[c + r * input_stride()], output[r + c * output_stride()]) 222 << "at row " << r << " / " << block_height() 223 << ", at column " << c << " / " << block_width(); 224 } 225 } 226 } 227 } 228 Test(xnn_x8_transposec_ukernel_function transpose)229 void Test(xnn_x8_transposec_ukernel_function transpose) const { 230 std::vector<uint8_t> input(input_stride() * output_stride() + XNN_EXTRA_BYTES); 231 std::vector<uint8_t> output(input_stride() * output_stride()); 232 for (size_t iteration = 0; iteration < iterations(); iteration++) { 233 std::iota(input.begin(), input.end(), 0); 234 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 235 236 // Call optimized micro-kernel. 237 transpose(input.data(), 238 output.data(), 239 input_stride() * sizeof(uint8_t), 240 output_stride() * sizeof(uint8_t), 241 block_width(), 242 block_height()); 243 244 // Verify results. 245 for (size_t c = 0; c < block_width(); c++) { 246 for (size_t r = 0; r < block_height(); r++) { 247 ASSERT_EQ((int)input[c + r * input_stride()], (int)output[r + c * output_stride()]) 248 << "at row " << r << " / " << block_height() 249 << ", at column " << c << " / " << block_width(); 250 } 251 } 252 } 253 } 254 255 private: 256 size_t element_size_ = 1; 257 size_t input_stride_ = 1; 258 size_t output_stride_ = 1; 259 size_t input_element_stride_ = 0; 260 size_t output_element_stride_ = 0; 261 size_t block_height_ = 1; 262 size_t block_width_ = 1; 263 size_t iterations_ = 15; 264 }; 265