1 /*
2 * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <algorithm>
12
13 #include "gtest/gtest.h"
14
15 #include "./vpx_dsp_rtcd.h"
16 #include "vpx_ports/vpx_timer.h"
17
18 #include "test/acm_random.h"
19 #include "test/register_state_check.h"
20 #include "vpx_config.h"
21
22 namespace {
23
24 using ::libvpx_test::ACMRandom;
25
26 typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
27 tran_low_t *b);
28
hadamard_loop(const tran_low_t * a,tran_low_t * out)29 void hadamard_loop(const tran_low_t *a, tran_low_t *out) {
30 tran_low_t b[8];
31 for (int i = 0; i < 8; i += 2) {
32 b[i + 0] = a[i * 8] + a[(i + 1) * 8];
33 b[i + 1] = a[i * 8] - a[(i + 1) * 8];
34 }
35 tran_low_t c[8];
36 for (int i = 0; i < 8; i += 4) {
37 c[i + 0] = b[i + 0] + b[i + 2];
38 c[i + 1] = b[i + 1] + b[i + 3];
39 c[i + 2] = b[i + 0] - b[i + 2];
40 c[i + 3] = b[i + 1] - b[i + 3];
41 }
42 out[0] = c[0] + c[4];
43 out[7] = c[1] + c[5];
44 out[3] = c[2] + c[6];
45 out[4] = c[3] + c[7];
46 out[2] = c[0] - c[4];
47 out[6] = c[1] - c[5];
48 out[1] = c[2] - c[6];
49 out[5] = c[3] - c[7];
50 }
51
reference_hadamard8x8(const int16_t * a,int a_stride,tran_low_t * b)52 void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
53 tran_low_t input[64];
54 tran_low_t buf[64];
55 for (int i = 0; i < 8; ++i) {
56 for (int j = 0; j < 8; ++j) {
57 input[i * 8 + j] = static_cast<tran_low_t>(a[i * a_stride + j]);
58 }
59 }
60 for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8);
61 for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8);
62 }
63
reference_hadamard16x16(const int16_t * a,int a_stride,tran_low_t * b)64 void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
65 /* The source is a 16x16 block. The destination is rearranged to 8x32.
66 * Input is 9 bit. */
67 reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
68 reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
69 reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
70 reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
71
72 /* Overlay the 8x8 blocks and combine. */
73 for (int i = 0; i < 64; ++i) {
74 /* 8x8 steps the range up to 15 bits. */
75 const tran_low_t a0 = b[0];
76 const tran_low_t a1 = b[64];
77 const tran_low_t a2 = b[128];
78 const tran_low_t a3 = b[192];
79
80 /* Prevent the result from escaping int16_t. */
81 const tran_low_t b0 = (a0 + a1) >> 1;
82 const tran_low_t b1 = (a0 - a1) >> 1;
83 const tran_low_t b2 = (a2 + a3) >> 1;
84 const tran_low_t b3 = (a2 - a3) >> 1;
85
86 /* Store a 16 bit value. */
87 b[0] = b0 + b2;
88 b[64] = b1 + b3;
89 b[128] = b0 - b2;
90 b[192] = b1 - b3;
91
92 ++b;
93 }
94 }
95
reference_hadamard32x32(const int16_t * a,int a_stride,tran_low_t * b)96 void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) {
97 reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
98 reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
99 reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
100 reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
101
102 for (int i = 0; i < 256; ++i) {
103 const tran_low_t a0 = b[0];
104 const tran_low_t a1 = b[256];
105 const tran_low_t a2 = b[512];
106 const tran_low_t a3 = b[768];
107
108 const tran_low_t b0 = (a0 + a1) >> 2;
109 const tran_low_t b1 = (a0 - a1) >> 2;
110 const tran_low_t b2 = (a2 + a3) >> 2;
111 const tran_low_t b3 = (a2 - a3) >> 2;
112
113 b[0] = b0 + b2;
114 b[256] = b1 + b3;
115 b[512] = b0 - b2;
116 b[768] = b1 - b3;
117
118 ++b;
119 }
120 }
121
122 struct HadamardFuncWithSize {
HadamardFuncWithSize__anon54e2e8ff0111::HadamardFuncWithSize123 HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {}
124 HadamardFunc func;
125 int block_size;
126 };
127
operator <<(std::ostream & os,const HadamardFuncWithSize & hfs)128 std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) {
129 return os << "block size: " << hfs.block_size;
130 }
131
132 class HadamardTestBase : public ::testing::TestWithParam<HadamardFuncWithSize> {
133 public:
SetUp()134 void SetUp() override {
135 h_func_ = GetParam().func;
136 bwh_ = GetParam().block_size;
137 block_size_ = bwh_ * bwh_;
138 rnd_.Reset(ACMRandom::DeterministicSeed());
139 }
140
141 // The Rand() function generates values in the range [-((1 << BitDepth) - 1),
142 // (1 << BitDepth) - 1]. This is because the input to the Hadamard transform
143 // is the residual pixel, which is defined as 'source pixel - predicted
144 // pixel'. Source pixel and predicted pixel take values in the range
145 // [0, (1 << BitDepth) - 1] and thus the residual pixel ranges from
146 // -((1 << BitDepth) - 1) to ((1 << BitDepth) - 1).
147 virtual int16_t Rand() = 0;
148
ReferenceHadamard(const int16_t * a,int a_stride,tran_low_t * b,int bwh)149 void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b,
150 int bwh) {
151 if (bwh == 32)
152 reference_hadamard32x32(a, a_stride, b);
153 else if (bwh == 16)
154 reference_hadamard16x16(a, a_stride, b);
155 else
156 reference_hadamard8x8(a, a_stride, b);
157 }
158
CompareReferenceRandom()159 void CompareReferenceRandom() {
160 const int kMaxBlockSize = 32 * 32;
161 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
162 DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
163 memset(a, 0, sizeof(a));
164 memset(b, 0, sizeof(b));
165
166 tran_low_t b_ref[kMaxBlockSize];
167 memset(b_ref, 0, sizeof(b_ref));
168
169 for (int i = 0; i < block_size_; ++i) a[i] = Rand();
170
171 ReferenceHadamard(a, bwh_, b_ref, bwh_);
172 ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b));
173
174 // The order of the output is not important. Sort before checking.
175 std::sort(b, b + block_size_);
176 std::sort(b_ref, b_ref + block_size_);
177 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
178 }
179
ExtremeValuesTest()180 void ExtremeValuesTest() {
181 const int kMaxBlockSize = 32 * 32;
182 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxBlockSize]);
183 DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
184 memset(b, 0, sizeof(b));
185
186 tran_low_t b_ref[kMaxBlockSize];
187 memset(b_ref, 0, sizeof(b_ref));
188
189 for (int i = 0; i < 2; ++i) {
190 // Initialize a test block with input range [-mask_, mask_].
191 const int sign = (i == 0) ? 1 : -1;
192 for (int j = 0; j < kMaxBlockSize; ++j)
193 input_extreme_block[j] = sign * 255;
194
195 ReferenceHadamard(input_extreme_block, bwh_, b_ref, bwh_);
196 ASM_REGISTER_STATE_CHECK(h_func_(input_extreme_block, bwh_, b));
197
198 // The order of the output is not important. Sort before checking.
199 std::sort(b, b + block_size_);
200 std::sort(b_ref, b_ref + block_size_);
201 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
202 }
203 }
204
VaryStride()205 void VaryStride() {
206 const int kMaxBlockSize = 32 * 32;
207 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
208 DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
209 memset(a, 0, sizeof(a));
210 for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
211
212 tran_low_t b_ref[kMaxBlockSize];
213 for (int i = 8; i < 64; i += 8) {
214 memset(b, 0, sizeof(b));
215 memset(b_ref, 0, sizeof(b_ref));
216
217 ReferenceHadamard(a, i, b_ref, bwh_);
218 ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
219
220 // The order of the output is not important. Sort before checking.
221 std::sort(b, b + block_size_);
222 std::sort(b_ref, b_ref + block_size_);
223 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
224 }
225 }
226
SpeedTest(int times)227 void SpeedTest(int times) {
228 const int kMaxBlockSize = 32 * 32;
229 DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
230 DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]);
231 memset(input, 1, sizeof(input));
232 memset(output, 0, sizeof(output));
233
234 vpx_usec_timer timer;
235 vpx_usec_timer_start(&timer);
236 for (int i = 0; i < times; ++i) {
237 h_func_(input, bwh_, output);
238 }
239 vpx_usec_timer_mark(&timer);
240
241 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
242 printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times,
243 elapsed_time);
244 }
245
246 protected:
247 int bwh_;
248 int block_size_;
249 HadamardFunc h_func_;
250 ACMRandom rnd_;
251 };
252
253 class HadamardLowbdTest : public HadamardTestBase {
254 protected:
255 // Use values between -255 (0xFF01) and 255 (0x00FF)
Rand()256 int16_t Rand() override {
257 int16_t src = rnd_.Rand8();
258 int16_t pred = rnd_.Rand8();
259 return src - pred;
260 }
261 };
262
TEST_P(HadamardLowbdTest,CompareReferenceRandom)263 TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
264
TEST_P(HadamardLowbdTest,ExtremeValuesTest)265 TEST_P(HadamardLowbdTest, ExtremeValuesTest) { ExtremeValuesTest(); }
266
TEST_P(HadamardLowbdTest,VaryStride)267 TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
268
TEST_P(HadamardLowbdTest,DISABLED_Speed)269 TEST_P(HadamardLowbdTest, DISABLED_Speed) {
270 SpeedTest(10);
271 SpeedTest(10000);
272 SpeedTest(10000000);
273 }
274
275 INSTANTIATE_TEST_SUITE_P(
276 C, HadamardLowbdTest,
277 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8),
278 HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16),
279 HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32)));
280
281 #if HAVE_SSE2
282 INSTANTIATE_TEST_SUITE_P(
283 SSE2, HadamardLowbdTest,
284 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8),
285 HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16),
286 HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32)));
287 #endif // HAVE_SSE2
288
289 #if HAVE_AVX2
290 INSTANTIATE_TEST_SUITE_P(
291 AVX2, HadamardLowbdTest,
292 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16),
293 HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
294 #endif // HAVE_AVX2
295
296 #if HAVE_SSSE3 && VPX_ARCH_X86_64
297 INSTANTIATE_TEST_SUITE_P(
298 SSSE3, HadamardLowbdTest,
299 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
300 #endif // HAVE_SSSE3 && VPX_ARCH_X86_64
301
302 #if HAVE_NEON
303 INSTANTIATE_TEST_SUITE_P(
304 NEON, HadamardLowbdTest,
305 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8),
306 HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16),
307 HadamardFuncWithSize(&vpx_hadamard_32x32_neon, 32)));
308 #endif // HAVE_NEON
309
310 // TODO(jingning): Remove highbitdepth flag when the SIMD functions are
311 // in place and turn on the unit test.
312 #if !CONFIG_VP9_HIGHBITDEPTH
313 #if HAVE_MSA
314 INSTANTIATE_TEST_SUITE_P(
315 MSA, HadamardLowbdTest,
316 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8),
317 HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16)));
318 #endif // HAVE_MSA
319 #endif // !CONFIG_VP9_HIGHBITDEPTH
320
321 #if HAVE_VSX
322 INSTANTIATE_TEST_SUITE_P(
323 VSX, HadamardLowbdTest,
324 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8),
325 HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16)));
326 #endif // HAVE_VSX
327
328 #if HAVE_LSX
329 INSTANTIATE_TEST_SUITE_P(
330 LSX, HadamardLowbdTest,
331 ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_lsx, 8),
332 HadamardFuncWithSize(&vpx_hadamard_16x16_lsx, 16)));
333 #endif // HAVE_LSX
334
335 #if CONFIG_VP9_HIGHBITDEPTH
336 class HadamardHighbdTest : public HadamardTestBase {
337 protected:
338 // Use values between -4095 (0xF001) and 4095 (0x0FFF)
Rand()339 int16_t Rand() override {
340 int16_t src = rnd_.Rand12();
341 int16_t pred = rnd_.Rand12();
342 return src - pred;
343 }
344 };
345
TEST_P(HadamardHighbdTest,CompareReferenceRandom)346 TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
347
TEST_P(HadamardHighbdTest,VaryStride)348 TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); }
349
TEST_P(HadamardHighbdTest,DISABLED_Speed)350 TEST_P(HadamardHighbdTest, DISABLED_Speed) {
351 SpeedTest(10);
352 SpeedTest(10000);
353 SpeedTest(10000000);
354 }
355
356 INSTANTIATE_TEST_SUITE_P(
357 C, HadamardHighbdTest,
358 ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8),
359 HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16),
360 HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32)));
361
362 #if HAVE_AVX2
363 INSTANTIATE_TEST_SUITE_P(
364 AVX2, HadamardHighbdTest,
365 ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8),
366 HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16),
367 HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2,
368 32)));
369 #endif // HAVE_AVX2
370
371 #if HAVE_NEON
372 INSTANTIATE_TEST_SUITE_P(
373 NEON, HadamardHighbdTest,
374 ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_neon, 8),
375 HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_neon, 16),
376 HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_neon,
377 32)));
378 #endif
379
380 #endif // CONFIG_VP9_HIGHBITDEPTH
381 } // namespace
382