xref: /aosp_15_r20/external/libvpx/test/hadamard_test.cc (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <algorithm>
12 
13 #include "gtest/gtest.h"
14 
15 #include "./vpx_dsp_rtcd.h"
16 #include "vpx_ports/vpx_timer.h"
17 
18 #include "test/acm_random.h"
19 #include "test/register_state_check.h"
20 #include "vpx_config.h"
21 
22 namespace {
23 
24 using ::libvpx_test::ACMRandom;
25 
26 typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
27                              tran_low_t *b);
28 
hadamard_loop(const tran_low_t * a,tran_low_t * out)29 void hadamard_loop(const tran_low_t *a, tran_low_t *out) {
30   tran_low_t b[8];
31   for (int i = 0; i < 8; i += 2) {
32     b[i + 0] = a[i * 8] + a[(i + 1) * 8];
33     b[i + 1] = a[i * 8] - a[(i + 1) * 8];
34   }
35   tran_low_t c[8];
36   for (int i = 0; i < 8; i += 4) {
37     c[i + 0] = b[i + 0] + b[i + 2];
38     c[i + 1] = b[i + 1] + b[i + 3];
39     c[i + 2] = b[i + 0] - b[i + 2];
40     c[i + 3] = b[i + 1] - b[i + 3];
41   }
42   out[0] = c[0] + c[4];
43   out[7] = c[1] + c[5];
44   out[3] = c[2] + c[6];
45   out[4] = c[3] + c[7];
46   out[2] = c[0] - c[4];
47   out[6] = c[1] - c[5];
48   out[1] = c[2] - c[6];
49   out[5] = c[3] - c[7];
50 }
51 
reference_hadamard8x8(const int16_t * a,int a_stride,tran_low_t * b)52 void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
53   tran_low_t input[64];
54   tran_low_t buf[64];
55   for (int i = 0; i < 8; ++i) {
56     for (int j = 0; j < 8; ++j) {
57       input[i * 8 + j] = static_cast<tran_low_t>(a[i * a_stride + j]);
58     }
59   }
60   for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8);
61   for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8);
62 }
63 
reference_hadamard16x16(const int16_t * a,int a_stride,tran_low_t * b)64 void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
65   /* The source is a 16x16 block. The destination is rearranged to 8x32.
66    * Input is 9 bit. */
67   reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
68   reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
69   reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
70   reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
71 
72   /* Overlay the 8x8 blocks and combine. */
73   for (int i = 0; i < 64; ++i) {
74     /* 8x8 steps the range up to 15 bits. */
75     const tran_low_t a0 = b[0];
76     const tran_low_t a1 = b[64];
77     const tran_low_t a2 = b[128];
78     const tran_low_t a3 = b[192];
79 
80     /* Prevent the result from escaping int16_t. */
81     const tran_low_t b0 = (a0 + a1) >> 1;
82     const tran_low_t b1 = (a0 - a1) >> 1;
83     const tran_low_t b2 = (a2 + a3) >> 1;
84     const tran_low_t b3 = (a2 - a3) >> 1;
85 
86     /* Store a 16 bit value. */
87     b[0] = b0 + b2;
88     b[64] = b1 + b3;
89     b[128] = b0 - b2;
90     b[192] = b1 - b3;
91 
92     ++b;
93   }
94 }
95 
reference_hadamard32x32(const int16_t * a,int a_stride,tran_low_t * b)96 void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) {
97   reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
98   reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
99   reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
100   reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
101 
102   for (int i = 0; i < 256; ++i) {
103     const tran_low_t a0 = b[0];
104     const tran_low_t a1 = b[256];
105     const tran_low_t a2 = b[512];
106     const tran_low_t a3 = b[768];
107 
108     const tran_low_t b0 = (a0 + a1) >> 2;
109     const tran_low_t b1 = (a0 - a1) >> 2;
110     const tran_low_t b2 = (a2 + a3) >> 2;
111     const tran_low_t b3 = (a2 - a3) >> 2;
112 
113     b[0] = b0 + b2;
114     b[256] = b1 + b3;
115     b[512] = b0 - b2;
116     b[768] = b1 - b3;
117 
118     ++b;
119   }
120 }
121 
122 struct HadamardFuncWithSize {
HadamardFuncWithSize__anon54e2e8ff0111::HadamardFuncWithSize123   HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {}
124   HadamardFunc func;
125   int block_size;
126 };
127 
operator <<(std::ostream & os,const HadamardFuncWithSize & hfs)128 std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) {
129   return os << "block size: " << hfs.block_size;
130 }
131 
132 class HadamardTestBase : public ::testing::TestWithParam<HadamardFuncWithSize> {
133  public:
SetUp()134   void SetUp() override {
135     h_func_ = GetParam().func;
136     bwh_ = GetParam().block_size;
137     block_size_ = bwh_ * bwh_;
138     rnd_.Reset(ACMRandom::DeterministicSeed());
139   }
140 
141   // The Rand() function generates values in the range [-((1 << BitDepth) - 1),
142   // (1 << BitDepth) - 1]. This is because the input to the Hadamard transform
143   // is the residual pixel, which is defined as 'source pixel - predicted
144   // pixel'. Source pixel and predicted pixel take values in the range
145   // [0, (1 << BitDepth) - 1] and thus the residual pixel ranges from
146   // -((1 << BitDepth) - 1) to ((1 << BitDepth) - 1).
147   virtual int16_t Rand() = 0;
148 
ReferenceHadamard(const int16_t * a,int a_stride,tran_low_t * b,int bwh)149   void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b,
150                          int bwh) {
151     if (bwh == 32)
152       reference_hadamard32x32(a, a_stride, b);
153     else if (bwh == 16)
154       reference_hadamard16x16(a, a_stride, b);
155     else
156       reference_hadamard8x8(a, a_stride, b);
157   }
158 
CompareReferenceRandom()159   void CompareReferenceRandom() {
160     const int kMaxBlockSize = 32 * 32;
161     DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
162     DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
163     memset(a, 0, sizeof(a));
164     memset(b, 0, sizeof(b));
165 
166     tran_low_t b_ref[kMaxBlockSize];
167     memset(b_ref, 0, sizeof(b_ref));
168 
169     for (int i = 0; i < block_size_; ++i) a[i] = Rand();
170 
171     ReferenceHadamard(a, bwh_, b_ref, bwh_);
172     ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b));
173 
174     // The order of the output is not important. Sort before checking.
175     std::sort(b, b + block_size_);
176     std::sort(b_ref, b_ref + block_size_);
177     EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
178   }
179 
ExtremeValuesTest()180   void ExtremeValuesTest() {
181     const int kMaxBlockSize = 32 * 32;
182     DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxBlockSize]);
183     DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
184     memset(b, 0, sizeof(b));
185 
186     tran_low_t b_ref[kMaxBlockSize];
187     memset(b_ref, 0, sizeof(b_ref));
188 
189     for (int i = 0; i < 2; ++i) {
190       // Initialize a test block with input range [-mask_, mask_].
191       const int sign = (i == 0) ? 1 : -1;
192       for (int j = 0; j < kMaxBlockSize; ++j)
193         input_extreme_block[j] = sign * 255;
194 
195       ReferenceHadamard(input_extreme_block, bwh_, b_ref, bwh_);
196       ASM_REGISTER_STATE_CHECK(h_func_(input_extreme_block, bwh_, b));
197 
198       // The order of the output is not important. Sort before checking.
199       std::sort(b, b + block_size_);
200       std::sort(b_ref, b_ref + block_size_);
201       EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
202     }
203   }
204 
VaryStride()205   void VaryStride() {
206     const int kMaxBlockSize = 32 * 32;
207     DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
208     DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
209     memset(a, 0, sizeof(a));
210     for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
211 
212     tran_low_t b_ref[kMaxBlockSize];
213     for (int i = 8; i < 64; i += 8) {
214       memset(b, 0, sizeof(b));
215       memset(b_ref, 0, sizeof(b_ref));
216 
217       ReferenceHadamard(a, i, b_ref, bwh_);
218       ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
219 
220       // The order of the output is not important. Sort before checking.
221       std::sort(b, b + block_size_);
222       std::sort(b_ref, b_ref + block_size_);
223       EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
224     }
225   }
226 
SpeedTest(int times)227   void SpeedTest(int times) {
228     const int kMaxBlockSize = 32 * 32;
229     DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
230     DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]);
231     memset(input, 1, sizeof(input));
232     memset(output, 0, sizeof(output));
233 
234     vpx_usec_timer timer;
235     vpx_usec_timer_start(&timer);
236     for (int i = 0; i < times; ++i) {
237       h_func_(input, bwh_, output);
238     }
239     vpx_usec_timer_mark(&timer);
240 
241     const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
242     printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times,
243            elapsed_time);
244   }
245 
246  protected:
247   int bwh_;
248   int block_size_;
249   HadamardFunc h_func_;
250   ACMRandom rnd_;
251 };
252 
253 class HadamardLowbdTest : public HadamardTestBase {
254  protected:
255   // Use values between -255 (0xFF01) and 255 (0x00FF)
Rand()256   int16_t Rand() override {
257     int16_t src = rnd_.Rand8();
258     int16_t pred = rnd_.Rand8();
259     return src - pred;
260   }
261 };
262 
TEST_P(HadamardLowbdTest,CompareReferenceRandom)263 TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
264 
TEST_P(HadamardLowbdTest,ExtremeValuesTest)265 TEST_P(HadamardLowbdTest, ExtremeValuesTest) { ExtremeValuesTest(); }
266 
TEST_P(HadamardLowbdTest,VaryStride)267 TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
268 
TEST_P(HadamardLowbdTest,DISABLED_Speed)269 TEST_P(HadamardLowbdTest, DISABLED_Speed) {
270   SpeedTest(10);
271   SpeedTest(10000);
272   SpeedTest(10000000);
273 }
274 
275 INSTANTIATE_TEST_SUITE_P(
276     C, HadamardLowbdTest,
277     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8),
278                       HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16),
279                       HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32)));
280 
281 #if HAVE_SSE2
282 INSTANTIATE_TEST_SUITE_P(
283     SSE2, HadamardLowbdTest,
284     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8),
285                       HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16),
286                       HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32)));
287 #endif  // HAVE_SSE2
288 
289 #if HAVE_AVX2
290 INSTANTIATE_TEST_SUITE_P(
291     AVX2, HadamardLowbdTest,
292     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16),
293                       HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
294 #endif  // HAVE_AVX2
295 
296 #if HAVE_SSSE3 && VPX_ARCH_X86_64
297 INSTANTIATE_TEST_SUITE_P(
298     SSSE3, HadamardLowbdTest,
299     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
300 #endif  // HAVE_SSSE3 && VPX_ARCH_X86_64
301 
302 #if HAVE_NEON
303 INSTANTIATE_TEST_SUITE_P(
304     NEON, HadamardLowbdTest,
305     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8),
306                       HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16),
307                       HadamardFuncWithSize(&vpx_hadamard_32x32_neon, 32)));
308 #endif  // HAVE_NEON
309 
310 // TODO(jingning): Remove highbitdepth flag when the SIMD functions are
311 // in place and turn on the unit test.
312 #if !CONFIG_VP9_HIGHBITDEPTH
313 #if HAVE_MSA
314 INSTANTIATE_TEST_SUITE_P(
315     MSA, HadamardLowbdTest,
316     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8),
317                       HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16)));
318 #endif  // HAVE_MSA
319 #endif  // !CONFIG_VP9_HIGHBITDEPTH
320 
321 #if HAVE_VSX
322 INSTANTIATE_TEST_SUITE_P(
323     VSX, HadamardLowbdTest,
324     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8),
325                       HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16)));
326 #endif  // HAVE_VSX
327 
328 #if HAVE_LSX
329 INSTANTIATE_TEST_SUITE_P(
330     LSX, HadamardLowbdTest,
331     ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_lsx, 8),
332                       HadamardFuncWithSize(&vpx_hadamard_16x16_lsx, 16)));
333 #endif  // HAVE_LSX
334 
335 #if CONFIG_VP9_HIGHBITDEPTH
336 class HadamardHighbdTest : public HadamardTestBase {
337  protected:
338   // Use values between -4095 (0xF001) and 4095 (0x0FFF)
Rand()339   int16_t Rand() override {
340     int16_t src = rnd_.Rand12();
341     int16_t pred = rnd_.Rand12();
342     return src - pred;
343   }
344 };
345 
TEST_P(HadamardHighbdTest,CompareReferenceRandom)346 TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
347 
TEST_P(HadamardHighbdTest,VaryStride)348 TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); }
349 
TEST_P(HadamardHighbdTest,DISABLED_Speed)350 TEST_P(HadamardHighbdTest, DISABLED_Speed) {
351   SpeedTest(10);
352   SpeedTest(10000);
353   SpeedTest(10000000);
354 }
355 
356 INSTANTIATE_TEST_SUITE_P(
357     C, HadamardHighbdTest,
358     ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8),
359                       HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16),
360                       HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32)));
361 
362 #if HAVE_AVX2
363 INSTANTIATE_TEST_SUITE_P(
364     AVX2, HadamardHighbdTest,
365     ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8),
366                       HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16),
367                       HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2,
368                                            32)));
369 #endif  // HAVE_AVX2
370 
371 #if HAVE_NEON
372 INSTANTIATE_TEST_SUITE_P(
373     NEON, HadamardHighbdTest,
374     ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_neon, 8),
375                       HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_neon, 16),
376                       HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_neon,
377                                            32)));
378 #endif
379 
380 #endif  // CONFIG_VP9_HIGHBITDEPTH
381 }  // namespace
382