xref: /aosp_15_r20/external/libaom/test/sum_squares_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cmath>
13 #include <cstdlib>
14 #include <string>
15 #include <tuple>
16 
17 #include "gtest/gtest.h"
18 
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
21 
22 #include "aom_ports/mem.h"
23 #include "av1/common/common_data.h"
24 #include "test/acm_random.h"
25 #include "test/register_state_check.h"
26 #include "test/util.h"
27 #include "test/function_equivalence_test.h"
28 
29 using libaom_test::ACMRandom;
30 using libaom_test::FunctionEquivalenceTest;
31 using ::testing::Combine;
32 using ::testing::Range;
33 using ::testing::Values;
34 using ::testing::ValuesIn;
35 
36 namespace {
37 const int kNumIterations = 10000;
38 
39 static const int16_t kInt13Max = (1 << 12) - 1;
40 
41 typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width,
42                               int height);
43 typedef libaom_test::FuncParam<SSI16Func> TestFuncs;
44 
45 class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> {
46  public:
47   ~SumSquaresTest() override = default;
SetUp()48   void SetUp() override {
49     params_ = this->GetParam();
50     rnd_.Reset(ACMRandom::DeterministicSeed());
51     src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2));
52     ASSERT_NE(src_, nullptr);
53   }
54 
TearDown()55   void TearDown() override { aom_free(src_); }
56   void RunTest(bool is_random);
57   void RunSpeedTest();
58 
GenRandomData(int width,int height,int stride)59   void GenRandomData(int width, int height, int stride) {
60     const int msb = 11;  // Up to 12 bit input
61     const int limit = 1 << (msb + 1);
62     for (int ii = 0; ii < height; ii++) {
63       for (int jj = 0; jj < width; jj++) {
64         src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit);
65       }
66     }
67   }
68 
GenExtremeData(int width,int height,int stride)69   void GenExtremeData(int width, int height, int stride) {
70     const int msb = 11;  // Up to 12 bit input
71     const int limit = 1 << (msb + 1);
72     const int val = rnd_(2) ? limit - 1 : -(limit - 1);
73     for (int ii = 0; ii < height; ii++) {
74       for (int jj = 0; jj < width; jj++) {
75         src_[ii * stride + jj] = val;
76       }
77     }
78   }
79 
80  protected:
81   TestFuncs params_;
82   int16_t *src_;
83   ACMRandom rnd_;
84 };
85 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquaresTest);
86 
RunTest(bool is_random)87 void SumSquaresTest::RunTest(bool is_random) {
88   int failed = 0;
89   for (int k = 0; k < kNumIterations; k++) {
90     const int width = 4 * (rnd_(31) + 1);   // Up to 128x128
91     const int height = 4 * (rnd_(31) + 1);  // Up to 128x128
92     int stride = 4 << rnd_(7);              // Up to 256 stride
93     while (stride < width) {                // Make sure it's valid
94       stride = 4 << rnd_(7);
95     }
96     if (is_random) {
97       GenRandomData(width, height, stride);
98     } else {
99       GenExtremeData(width, height, stride);
100     }
101     const uint64_t res_ref = params_.ref_func(src_, stride, width, height);
102     uint64_t res_tst;
103     API_REGISTER_STATE_CHECK(res_tst =
104                                  params_.tst_func(src_, stride, width, height));
105 
106     if (!failed) {
107       failed = res_ref != res_tst;
108       EXPECT_EQ(res_ref, res_tst)
109           << "Error: Sum Squares Test [" << width << "x" << height
110           << "] C output does not match optimized output.";
111     }
112   }
113 }
114 
RunSpeedTest()115 void SumSquaresTest::RunSpeedTest() {
116   for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) {
117     const int width = block_size_wide[block];   // Up to 128x128
118     const int height = block_size_high[block];  // Up to 128x128
119     int stride = 4 << rnd_(7);                  // Up to 256 stride
120     while (stride < width) {                    // Make sure it's valid
121       stride = 4 << rnd_(7);
122     }
123     GenExtremeData(width, height, stride);
124     const int num_loops = 1000000000 / (width + height);
125     aom_usec_timer timer;
126     aom_usec_timer_start(&timer);
127 
128     for (int i = 0; i < num_loops; ++i)
129       params_.ref_func(src_, stride, width, height);
130 
131     aom_usec_timer_mark(&timer);
132     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
133     printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height,
134            1000.0 * elapsed_time / num_loops);
135 
136     aom_usec_timer timer1;
137     aom_usec_timer_start(&timer1);
138     for (int i = 0; i < num_loops; ++i)
139       params_.tst_func(src_, stride, width, height);
140     aom_usec_timer_mark(&timer1);
141     const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
142     printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height,
143            1000.0 * elapsed_time1 / num_loops);
144   }
145 }
146 
TEST_P(SumSquaresTest,OperationCheck)147 TEST_P(SumSquaresTest, OperationCheck) {
148   RunTest(true);  // GenRandomData
149 }
150 
TEST_P(SumSquaresTest,ExtremeValues)151 TEST_P(SumSquaresTest, ExtremeValues) {
152   RunTest(false);  // GenExtremeData
153 }
154 
TEST_P(SumSquaresTest,DISABLED_Speed)155 TEST_P(SumSquaresTest, DISABLED_Speed) { RunSpeedTest(); }
156 
157 #if HAVE_SSE2
158 
159 INSTANTIATE_TEST_SUITE_P(
160     SSE2, SumSquaresTest,
161     ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
162                                 &aom_sum_squares_2d_i16_sse2)));
163 
164 #endif  // HAVE_SSE2
165 
166 #if HAVE_NEON
167 
168 INSTANTIATE_TEST_SUITE_P(
169     NEON, SumSquaresTest,
170     ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
171                                 &aom_sum_squares_2d_i16_neon)));
172 
173 #endif  // HAVE_NEON
174 
175 #if HAVE_SVE
176 INSTANTIATE_TEST_SUITE_P(
177     SVE, SumSquaresTest,
178     ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
179                                 &aom_sum_squares_2d_i16_sve)));
180 
181 #endif  // HAVE_SVE
182 
183 #if HAVE_AVX2
184 INSTANTIATE_TEST_SUITE_P(
185     AVX2, SumSquaresTest,
186     ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
187                                 &aom_sum_squares_2d_i16_avx2)));
188 #endif  // HAVE_AVX2
189 
190 //////////////////////////////////////////////////////////////////////////////
191 // 1D version
192 //////////////////////////////////////////////////////////////////////////////
193 
194 typedef uint64_t (*F1D)(const int16_t *src, uint32_t n);
195 typedef libaom_test::FuncParam<F1D> TestFuncs1D;
196 
197 class SumSquares1DTest : public FunctionEquivalenceTest<F1D> {
198  protected:
199   static const int kIterations = 1000;
200   static const int kMaxSize = 256;
201 };
202 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquares1DTest);
203 
TEST_P(SumSquares1DTest,RandomValues)204 TEST_P(SumSquares1DTest, RandomValues) {
205   DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
206 
207   for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
208     for (int i = 0; i < kMaxSize * kMaxSize; ++i)
209       src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max;
210 
211     // Block size is between 64 and 128 * 128 and is always a multiple of 64.
212     const int n = (rng_(255) + 1) * 64;
213 
214     const uint64_t ref_res = params_.ref_func(src, n);
215     uint64_t tst_res;
216     API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n));
217 
218     ASSERT_EQ(ref_res, tst_res);
219   }
220 }
221 
TEST_P(SumSquares1DTest,ExtremeValues)222 TEST_P(SumSquares1DTest, ExtremeValues) {
223   DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
224 
225   for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
226     if (rng_(2)) {
227       for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max;
228     } else {
229       for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max;
230     }
231 
232     // Block size is between 64 and 128 * 128 and is always a multiple of 64.
233     const int n = (rng_(255) + 1) * 64;
234 
235     const uint64_t ref_res = params_.ref_func(src, n);
236     uint64_t tst_res;
237     API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n));
238 
239     ASSERT_EQ(ref_res, tst_res);
240   }
241 }
242 
243 #if HAVE_SSE2
244 INSTANTIATE_TEST_SUITE_P(SSE2, SumSquares1DTest,
245                          ::testing::Values(TestFuncs1D(
246                              aom_sum_squares_i16_c, aom_sum_squares_i16_sse2)));
247 
248 #endif  // HAVE_SSE2
249 
250 #if HAVE_NEON
251 INSTANTIATE_TEST_SUITE_P(NEON, SumSquares1DTest,
252                          ::testing::Values(TestFuncs1D(
253                              aom_sum_squares_i16_c, aom_sum_squares_i16_neon)));
254 
255 #endif  // HAVE_NEON
256 
257 #if HAVE_SVE
258 INSTANTIATE_TEST_SUITE_P(SVE, SumSquares1DTest,
259                          ::testing::Values(TestFuncs1D(
260                              aom_sum_squares_i16_c, aom_sum_squares_i16_sve)));
261 
262 #endif  // HAVE_SVE
263 
264 typedef int64_t (*SSEFunc)(const uint8_t *a, int a_stride, const uint8_t *b,
265                            int b_stride, int width, int height);
266 typedef libaom_test::FuncParam<SSEFunc> TestSSEFuncs;
267 
268 typedef std::tuple<TestSSEFuncs, int> SSETestParam;
269 
270 class SSETest : public ::testing::TestWithParam<SSETestParam> {
271  public:
272   ~SSETest() override = default;
SetUp()273   void SetUp() override {
274     params_ = GET_PARAM(0);
275     width_ = GET_PARAM(1);
276     is_hbd_ =
277 #if CONFIG_AV1_HIGHBITDEPTH
278         params_.ref_func == aom_highbd_sse_c;
279 #else
280         false;
281 #endif
282     rnd_.Reset(ACMRandom::DeterministicSeed());
283     src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2));
284     ref_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2));
285     ASSERT_NE(src_, nullptr);
286     ASSERT_NE(ref_, nullptr);
287   }
288 
TearDown()289   void TearDown() override {
290     aom_free(src_);
291     aom_free(ref_);
292   }
293   void RunTest(bool is_random, int width, int height, int run_times);
294 
GenRandomData(int width,int height,int stride)295   void GenRandomData(int width, int height, int stride) {
296     uint16_t *src16 = reinterpret_cast<uint16_t *>(src_);
297     uint16_t *ref16 = reinterpret_cast<uint16_t *>(ref_);
298     const int msb = 11;  // Up to 12 bit input
299     const int limit = 1 << (msb + 1);
300     for (int ii = 0; ii < height; ii++) {
301       for (int jj = 0; jj < width; jj++) {
302         if (!is_hbd_) {
303           src_[ii * stride + jj] = rnd_.Rand8();
304           ref_[ii * stride + jj] = rnd_.Rand8();
305         } else {
306           src16[ii * stride + jj] = rnd_(limit);
307           ref16[ii * stride + jj] = rnd_(limit);
308         }
309       }
310     }
311   }
312 
GenExtremeData(int width,int height,int stride,uint8_t * data,int16_t val)313   void GenExtremeData(int width, int height, int stride, uint8_t *data,
314                       int16_t val) {
315     uint16_t *data16 = reinterpret_cast<uint16_t *>(data);
316     for (int ii = 0; ii < height; ii++) {
317       for (int jj = 0; jj < width; jj++) {
318         if (!is_hbd_) {
319           data[ii * stride + jj] = static_cast<uint8_t>(val);
320         } else {
321           data16[ii * stride + jj] = val;
322         }
323       }
324     }
325   }
326 
327  protected:
328   bool is_hbd_;
329   int width_;
330   TestSSEFuncs params_;
331   uint8_t *src_;
332   uint8_t *ref_;
333   ACMRandom rnd_;
334 };
335 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSETest);
336 
RunTest(bool is_random,int width,int height,int run_times)337 void SSETest::RunTest(bool is_random, int width, int height, int run_times) {
338   int failed = 0;
339   aom_usec_timer ref_timer, test_timer;
340   for (int k = 0; k < 3; k++) {
341     int stride = 4 << rnd_(7);  // Up to 256 stride
342     while (stride < width) {    // Make sure it's valid
343       stride = 4 << rnd_(7);
344     }
345     if (is_random) {
346       GenRandomData(width, height, stride);
347     } else {
348       const int msb = is_hbd_ ? 12 : 8;  // Up to 12 bit input
349       const int limit = (1 << msb) - 1;
350       if (k == 0) {
351         GenExtremeData(width, height, stride, src_, 0);
352         GenExtremeData(width, height, stride, ref_, limit);
353       } else {
354         GenExtremeData(width, height, stride, src_, limit);
355         GenExtremeData(width, height, stride, ref_, 0);
356       }
357     }
358     int64_t res_ref, res_tst;
359     uint8_t *src = src_;
360     uint8_t *ref = ref_;
361     if (is_hbd_) {
362       src = CONVERT_TO_BYTEPTR(src_);
363       ref = CONVERT_TO_BYTEPTR(ref_);
364     }
365     res_ref = params_.ref_func(src, stride, ref, stride, width, height);
366     res_tst = params_.tst_func(src, stride, ref, stride, width, height);
367     if (run_times > 1) {
368       aom_usec_timer_start(&ref_timer);
369       for (int j = 0; j < run_times; j++) {
370         params_.ref_func(src, stride, ref, stride, width, height);
371       }
372       aom_usec_timer_mark(&ref_timer);
373       const int elapsed_time_c =
374           static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
375 
376       aom_usec_timer_start(&test_timer);
377       for (int j = 0; j < run_times; j++) {
378         params_.tst_func(src, stride, ref, stride, width, height);
379       }
380       aom_usec_timer_mark(&test_timer);
381       const int elapsed_time_simd =
382           static_cast<int>(aom_usec_timer_elapsed(&test_timer));
383 
384       printf(
385           "c_time=%d \t simd_time=%d \t "
386           "gain=%d\n",
387           elapsed_time_c, elapsed_time_simd,
388           (elapsed_time_c / elapsed_time_simd));
389     } else {
390       if (!failed) {
391         failed = res_ref != res_tst;
392         EXPECT_EQ(res_ref, res_tst)
393             << "Error:" << (is_hbd_ ? "hbd " : " ") << k << " SSE Test ["
394             << width << "x" << height
395             << "] C output does not match optimized output.";
396       }
397     }
398   }
399 }
400 
TEST_P(SSETest,OperationCheck)401 TEST_P(SSETest, OperationCheck) {
402   for (int height = 4; height <= 128; height += 4) {
403     RunTest(true, width_, height, 1);  // GenRandomData
404   }
405 }
406 
TEST_P(SSETest,ExtremeValues)407 TEST_P(SSETest, ExtremeValues) {
408   for (int height = 4; height <= 128; height += 4) {
409     RunTest(false, width_, height, 1);
410   }
411 }
412 
TEST_P(SSETest,DISABLED_Speed)413 TEST_P(SSETest, DISABLED_Speed) {
414   for (int height = 4; height <= 128; height += 4) {
415     RunTest(true, width_, height, 100);
416   }
417 }
418 
419 #if HAVE_NEON
420 TestSSEFuncs sse_neon[] = {
421   TestSSEFuncs(&aom_sse_c, &aom_sse_neon),
422 #if CONFIG_AV1_HIGHBITDEPTH
423   TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_neon)
424 #endif
425 };
426 INSTANTIATE_TEST_SUITE_P(NEON, SSETest,
427                          Combine(ValuesIn(sse_neon), Range(4, 129, 4)));
428 #endif  // HAVE_NEON
429 
430 #if HAVE_NEON_DOTPROD
431 TestSSEFuncs sse_neon_dotprod[] = {
432   TestSSEFuncs(&aom_sse_c, &aom_sse_neon_dotprod),
433 };
434 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SSETest,
435                          Combine(ValuesIn(sse_neon_dotprod), Range(4, 129, 4)));
436 #endif  // HAVE_NEON_DOTPROD
437 
438 #if HAVE_SSE4_1
439 TestSSEFuncs sse_sse4[] = {
440   TestSSEFuncs(&aom_sse_c, &aom_sse_sse4_1),
441 #if CONFIG_AV1_HIGHBITDEPTH
442   TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_sse4_1)
443 #endif
444 };
445 INSTANTIATE_TEST_SUITE_P(SSE4_1, SSETest,
446                          Combine(ValuesIn(sse_sse4), Range(4, 129, 4)));
447 #endif  // HAVE_SSE4_1
448 
449 #if HAVE_AVX2
450 
451 TestSSEFuncs sse_avx2[] = {
452   TestSSEFuncs(&aom_sse_c, &aom_sse_avx2),
453 #if CONFIG_AV1_HIGHBITDEPTH
454   TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_avx2)
455 #endif
456 };
457 INSTANTIATE_TEST_SUITE_P(AVX2, SSETest,
458                          Combine(ValuesIn(sse_avx2), Range(4, 129, 4)));
459 #endif  // HAVE_AVX2
460 
461 #if HAVE_SVE
462 #if CONFIG_AV1_HIGHBITDEPTH
463 TestSSEFuncs sse_sve[] = { TestSSEFuncs(&aom_highbd_sse_c,
464                                         &aom_highbd_sse_sve) };
465 INSTANTIATE_TEST_SUITE_P(SVE, SSETest,
466                          Combine(ValuesIn(sse_sve), Range(4, 129, 4)));
467 #endif
468 #endif  // HAVE_SVE
469 
470 //////////////////////////////////////////////////////////////////////////////
471 // get_blk sum squares test functions
472 //////////////////////////////////////////////////////////////////////////////
473 
474 typedef void (*sse_sum_func)(const int16_t *data, int stride, int bw, int bh,
475                              int *x_sum, int64_t *x2_sum);
476 typedef libaom_test::FuncParam<sse_sum_func> TestSSE_SumFuncs;
477 
478 typedef std::tuple<TestSSE_SumFuncs, TX_SIZE> SSE_SumTestParam;
479 
480 class SSE_Sum_Test : public ::testing::TestWithParam<SSE_SumTestParam> {
481  public:
482   ~SSE_Sum_Test() override = default;
SetUp()483   void SetUp() override {
484     params_ = GET_PARAM(0);
485     rnd_.Reset(ACMRandom::DeterministicSeed());
486     src_ = reinterpret_cast<int16_t *>(aom_memalign(32, 256 * 256 * 2));
487     ASSERT_NE(src_, nullptr);
488   }
489 
TearDown()490   void TearDown() override { aom_free(src_); }
491   void RunTest(bool is_random, int tx_size, int run_times);
492 
GenRandomData(int width,int height,int stride)493   void GenRandomData(int width, int height, int stride) {
494     const int msb = 11;  // Up to 12 bit input
495     const int limit = 1 << (msb + 1);
496     for (int ii = 0; ii < height; ii++) {
497       for (int jj = 0; jj < width; jj++) {
498         src_[ii * stride + jj] = rnd_(limit);
499       }
500     }
501   }
502 
GenExtremeData(int width,int height,int stride,int16_t * data,int16_t val)503   void GenExtremeData(int width, int height, int stride, int16_t *data,
504                       int16_t val) {
505     for (int ii = 0; ii < height; ii++) {
506       for (int jj = 0; jj < width; jj++) {
507         data[ii * stride + jj] = val;
508       }
509     }
510   }
511 
512  protected:
513   TestSSE_SumFuncs params_;
514   int16_t *src_;
515   ACMRandom rnd_;
516 };
517 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSE_Sum_Test);
518 
RunTest(bool is_random,int tx_size,int run_times)519 void SSE_Sum_Test::RunTest(bool is_random, int tx_size, int run_times) {
520   aom_usec_timer ref_timer, test_timer;
521   int width = tx_size_wide[tx_size];
522   int height = tx_size_high[tx_size];
523   for (int k = 0; k < 3; k++) {
524     int stride = 4 << rnd_(7);  // Up to 256 stride
525     while (stride < width) {    // Make sure it's valid
526       stride = 4 << rnd_(7);
527     }
528     if (is_random) {
529       GenRandomData(width, height, stride);
530     } else {
531       const int msb = 12;  // Up to 12 bit input
532       const int limit = (1 << msb) - 1;
533       if (k == 0) {
534         GenExtremeData(width, height, stride, src_, limit);
535       } else {
536         GenExtremeData(width, height, stride, src_, -limit);
537       }
538     }
539     int sum_c = 0;
540     int64_t sse_intr = 0;
541     int sum_intr = 0;
542     int64_t sse_c = 0;
543 
544     params_.ref_func(src_, stride, width, height, &sum_c, &sse_c);
545     params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr);
546 
547     if (run_times > 1) {
548       aom_usec_timer_start(&ref_timer);
549       for (int j = 0; j < run_times; j++) {
550         params_.ref_func(src_, stride, width, height, &sum_c, &sse_c);
551       }
552       aom_usec_timer_mark(&ref_timer);
553       const int elapsed_time_c =
554           static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
555 
556       aom_usec_timer_start(&test_timer);
557       for (int j = 0; j < run_times; j++) {
558         params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr);
559       }
560       aom_usec_timer_mark(&test_timer);
561       const int elapsed_time_simd =
562           static_cast<int>(aom_usec_timer_elapsed(&test_timer));
563 
564       printf(
565           "c_time=%d \t simd_time=%d \t "
566           "gain=%f\t width=%d\t height=%d \n",
567           elapsed_time_c, elapsed_time_simd,
568           (float)((float)elapsed_time_c / (float)elapsed_time_simd), width,
569           height);
570 
571     } else {
572       EXPECT_EQ(sum_c, sum_intr)
573           << "Error:" << k << " SSE Sum Test [" << width << "x" << height
574           << "] C output does not match optimized output.";
575       EXPECT_EQ(sse_c, sse_intr)
576           << "Error:" << k << " SSE Sum Test [" << width << "x" << height
577           << "] C output does not match optimized output.";
578     }
579   }
580 }
581 
TEST_P(SSE_Sum_Test,OperationCheck)582 TEST_P(SSE_Sum_Test, OperationCheck) {
583   RunTest(true, GET_PARAM(1), 1);  // GenRandomData
584 }
585 
TEST_P(SSE_Sum_Test,ExtremeValues)586 TEST_P(SSE_Sum_Test, ExtremeValues) { RunTest(false, GET_PARAM(1), 1); }
587 
TEST_P(SSE_Sum_Test,DISABLED_Speed)588 TEST_P(SSE_Sum_Test, DISABLED_Speed) { RunTest(true, GET_PARAM(1), 10000); }
589 
590 #if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
591 const TX_SIZE kValidBlockSize[] = { TX_4X4,   TX_8X8,   TX_16X16, TX_32X32,
592                                     TX_64X64, TX_4X8,   TX_8X4,   TX_8X16,
593                                     TX_16X8,  TX_16X32, TX_32X16, TX_64X32,
594                                     TX_32X64, TX_4X16,  TX_16X4,  TX_8X32,
595                                     TX_32X8,  TX_16X64, TX_64X16 };
596 #endif
597 
598 #if HAVE_SSE2
599 TestSSE_SumFuncs sse_sum_sse2[] = { TestSSE_SumFuncs(
600     &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_sse2) };
601 INSTANTIATE_TEST_SUITE_P(SSE2, SSE_Sum_Test,
602                          Combine(ValuesIn(sse_sum_sse2),
603                                  ValuesIn(kValidBlockSize)));
604 #endif  // HAVE_SSE2
605 
606 #if HAVE_AVX2
607 TestSSE_SumFuncs sse_sum_avx2[] = { TestSSE_SumFuncs(
608     &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_avx2) };
609 INSTANTIATE_TEST_SUITE_P(AVX2, SSE_Sum_Test,
610                          Combine(ValuesIn(sse_sum_avx2),
611                                  ValuesIn(kValidBlockSize)));
612 #endif  // HAVE_AVX2
613 
614 #if HAVE_NEON
615 TestSSE_SumFuncs sse_sum_neon[] = { TestSSE_SumFuncs(
616     &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_neon) };
617 INSTANTIATE_TEST_SUITE_P(NEON, SSE_Sum_Test,
618                          Combine(ValuesIn(sse_sum_neon),
619                                  ValuesIn(kValidBlockSize)));
620 #endif  // HAVE_NEON
621 
622 #if HAVE_SVE
623 TestSSE_SumFuncs sse_sum_sve[] = { TestSSE_SumFuncs(&aom_get_blk_sse_sum_c,
624                                                     &aom_get_blk_sse_sum_sve) };
625 INSTANTIATE_TEST_SUITE_P(SVE, SSE_Sum_Test,
626                          Combine(ValuesIn(sse_sum_sve),
627                                  ValuesIn(kValidBlockSize)));
628 #endif  // HAVE_SVE
629 
630 //////////////////////////////////////////////////////////////////////////////
631 // 2D Variance test functions
632 //////////////////////////////////////////////////////////////////////////////
633 
634 typedef uint64_t (*Var2DFunc)(uint8_t *src, int stride, int width, int height);
635 typedef libaom_test::FuncParam<Var2DFunc> TestFuncVar2D;
636 
637 const uint16_t test_block_size[2] = { 128, 256 };
638 
639 class Lowbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> {
640  public:
641   ~Lowbd2dVarTest() override = default;
SetUp()642   void SetUp() override {
643     params_ = this->GetParam();
644     rnd_.Reset(ACMRandom::DeterministicSeed());
645     src_ = reinterpret_cast<uint8_t *>(
646         aom_memalign(16, 512 * 512 * sizeof(uint8_t)));
647     ASSERT_NE(src_, nullptr);
648   }
649 
TearDown()650   void TearDown() override { aom_free(src_); }
651   void RunTest(bool is_random);
652   void RunSpeedTest();
653 
GenRandomData(int width,int height,int stride)654   void GenRandomData(int width, int height, int stride) {
655     const int msb = 7;  // Up to 8 bit input
656     const int limit = 1 << (msb + 1);
657     for (int ii = 0; ii < height; ii++) {
658       for (int jj = 0; jj < width; jj++) {
659         src_[ii * stride + jj] = rnd_(limit);
660       }
661     }
662   }
663 
GenExtremeData(int width,int height,int stride)664   void GenExtremeData(int width, int height, int stride) {
665     const int msb = 7;  // Up to 8 bit input
666     const int limit = 1 << (msb + 1);
667     const int val = rnd_(2) ? limit - 1 : 0;
668     for (int ii = 0; ii < height; ii++) {
669       for (int jj = 0; jj < width; jj++) {
670         src_[ii * stride + jj] = val;
671       }
672     }
673   }
674 
675  protected:
676   TestFuncVar2D params_;
677   uint8_t *src_;
678   ACMRandom rnd_;
679 };
680 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Lowbd2dVarTest);
681 
RunTest(bool is_random)682 void Lowbd2dVarTest::RunTest(bool is_random) {
683   int failed = 0;
684   for (int k = 0; k < kNumIterations; k++) {
685     const int width = 4 * (rnd_(63) + 1);   // Up to 256x256
686     const int height = 4 * (rnd_(63) + 1);  // Up to 256x256
687     int stride = 4 << rnd_(8);              // Up to 512 stride
688     while (stride < width) {                // Make sure it's valid
689       stride = 4 << rnd_(8);
690     }
691     if (is_random) {
692       GenRandomData(width, height, stride);
693     } else {
694       GenExtremeData(width, height, stride);
695     }
696 
697     const uint64_t res_ref = params_.ref_func(src_, stride, width, height);
698     uint64_t res_tst;
699     API_REGISTER_STATE_CHECK(res_tst =
700                                  params_.tst_func(src_, stride, width, height));
701 
702     if (!failed) {
703       failed = res_ref != res_tst;
704       EXPECT_EQ(res_ref, res_tst)
705           << "Error: Sum Squares Test [" << width << "x" << height
706           << "] C output does not match optimized output.";
707     }
708   }
709 }
710 
RunSpeedTest()711 void Lowbd2dVarTest::RunSpeedTest() {
712   for (int block = 0; block < 2; block++) {
713     const int width = test_block_size[block];
714     const int height = test_block_size[block];
715     int stride = 4 << rnd_(8);  // Up to 512 stride
716     while (stride < width) {    // Make sure it's valid
717       stride = 4 << rnd_(8);
718     }
719     GenExtremeData(width, height, stride);
720     const int num_loops = 1000000000 / (width + height);
721     aom_usec_timer timer;
722     aom_usec_timer_start(&timer);
723 
724     for (int i = 0; i < num_loops; ++i)
725       params_.ref_func(src_, stride, width, height);
726 
727     aom_usec_timer_mark(&timer);
728     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
729 
730     aom_usec_timer timer1;
731     aom_usec_timer_start(&timer1);
732     for (int i = 0; i < num_loops; ++i)
733       params_.tst_func(src_, stride, width, height);
734     aom_usec_timer_mark(&timer1);
735     const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
736     printf("%3dx%-3d: Scaling = %.2f\n", width, height,
737            (double)elapsed_time / elapsed_time1);
738   }
739 }
740 
TEST_P(Lowbd2dVarTest,OperationCheck)741 TEST_P(Lowbd2dVarTest, OperationCheck) {
742   RunTest(true);  // GenRandomData
743 }
744 
TEST_P(Lowbd2dVarTest,ExtremeValues)745 TEST_P(Lowbd2dVarTest, ExtremeValues) {
746   RunTest(false);  // GenExtremeData
747 }
748 
TEST_P(Lowbd2dVarTest,DISABLED_Speed)749 TEST_P(Lowbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); }
750 
751 #if HAVE_SSE2
752 
753 INSTANTIATE_TEST_SUITE_P(SSE2, Lowbd2dVarTest,
754                          ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c,
755                                                          &aom_var_2d_u8_sse2)));
756 
757 #endif  // HAVE_SSE2
758 
759 #if HAVE_AVX2
760 
761 INSTANTIATE_TEST_SUITE_P(AVX2, Lowbd2dVarTest,
762                          ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c,
763                                                          &aom_var_2d_u8_avx2)));
764 
765 #endif  // HAVE_SSE2
766 
767 #if HAVE_NEON
768 
769 INSTANTIATE_TEST_SUITE_P(NEON, Lowbd2dVarTest,
770                          ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c,
771                                                          &aom_var_2d_u8_neon)));
772 
773 #endif  // HAVE_NEON
774 
775 #if HAVE_NEON_DOTPROD
776 
777 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, Lowbd2dVarTest,
778                          ::testing::Values(TestFuncVar2D(
779                              &aom_var_2d_u8_c, &aom_var_2d_u8_neon_dotprod)));
780 
781 #endif  // HAVE_NEON_DOTPROD
782 
783 #if CONFIG_AV1_HIGHBITDEPTH
784 class Highbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> {
785  public:
786   ~Highbd2dVarTest() override = default;
SetUp()787   void SetUp() override {
788     params_ = this->GetParam();
789     rnd_.Reset(ACMRandom::DeterministicSeed());
790     src_ = reinterpret_cast<uint16_t *>(
791         aom_memalign(16, 512 * 512 * sizeof(uint16_t)));
792     ASSERT_NE(src_, nullptr);
793   }
794 
TearDown()795   void TearDown() override { aom_free(src_); }
796   void RunTest(bool is_random);
797   void RunSpeedTest();
798 
GenRandomData(int width,int height,int stride)799   void GenRandomData(int width, int height, int stride) {
800     const int msb = 11;  // Up to 12 bit input
801     const int limit = 1 << (msb + 1);
802     for (int ii = 0; ii < height; ii++) {
803       for (int jj = 0; jj < width; jj++) {
804         src_[ii * stride + jj] = rnd_(limit);
805       }
806     }
807   }
808 
GenExtremeData(int width,int height,int stride)809   void GenExtremeData(int width, int height, int stride) {
810     const int msb = 11;  // Up to 12 bit input
811     const int limit = 1 << (msb + 1);
812     const int val = rnd_(2) ? limit - 1 : 0;
813     for (int ii = 0; ii < height; ii++) {
814       for (int jj = 0; jj < width; jj++) {
815         src_[ii * stride + jj] = val;
816       }
817     }
818   }
819 
820  protected:
821   TestFuncVar2D params_;
822   uint16_t *src_;
823   ACMRandom rnd_;
824 };
825 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Highbd2dVarTest);
826 
RunTest(bool is_random)827 void Highbd2dVarTest::RunTest(bool is_random) {
828   int failed = 0;
829   for (int k = 0; k < kNumIterations; k++) {
830     const int width = 4 * (rnd_(63) + 1);   // Up to 256x256
831     const int height = 4 * (rnd_(63) + 1);  // Up to 256x256
832     int stride = 4 << rnd_(8);              // Up to 512 stride
833     while (stride < width) {                // Make sure it's valid
834       stride = 4 << rnd_(8);
835     }
836     if (is_random) {
837       GenRandomData(width, height, stride);
838     } else {
839       GenExtremeData(width, height, stride);
840     }
841 
842     const uint64_t res_ref =
843         params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height);
844     uint64_t res_tst;
845     API_REGISTER_STATE_CHECK(
846         res_tst =
847             params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height));
848 
849     if (!failed) {
850       failed = res_ref != res_tst;
851       EXPECT_EQ(res_ref, res_tst)
852           << "Error: Sum Squares Test [" << width << "x" << height
853           << "] C output does not match optimized output.";
854     }
855   }
856 }
857 
RunSpeedTest()858 void Highbd2dVarTest::RunSpeedTest() {
859   for (int block = 0; block < 2; block++) {
860     const int width = test_block_size[block];
861     const int height = test_block_size[block];
862     int stride = 4 << rnd_(8);  // Up to 512 stride
863     while (stride < width) {    // Make sure it's valid
864       stride = 4 << rnd_(8);
865     }
866     GenExtremeData(width, height, stride);
867     const int num_loops = 1000000000 / (width + height);
868     aom_usec_timer timer;
869     aom_usec_timer_start(&timer);
870 
871     for (int i = 0; i < num_loops; ++i)
872       params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height);
873 
874     aom_usec_timer_mark(&timer);
875     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
876 
877     aom_usec_timer timer1;
878     aom_usec_timer_start(&timer1);
879     for (int i = 0; i < num_loops; ++i)
880       params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height);
881     aom_usec_timer_mark(&timer1);
882     const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
883     printf("%3dx%-3d: Scaling = %.2f\n", width, height,
884            (double)elapsed_time / elapsed_time1);
885   }
886 }
887 
TEST_P(Highbd2dVarTest,OperationCheck)888 TEST_P(Highbd2dVarTest, OperationCheck) {
889   RunTest(true);  // GenRandomData
890 }
891 
TEST_P(Highbd2dVarTest,ExtremeValues)892 TEST_P(Highbd2dVarTest, ExtremeValues) {
893   RunTest(false);  // GenExtremeData
894 }
895 
TEST_P(Highbd2dVarTest,DISABLED_Speed)896 TEST_P(Highbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); }
897 
898 #if HAVE_SSE2
899 
900 INSTANTIATE_TEST_SUITE_P(
901     SSE2, Highbd2dVarTest,
902     ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_sse2)));
903 
904 #endif  // HAVE_SSE2
905 
906 #if HAVE_AVX2
907 
908 INSTANTIATE_TEST_SUITE_P(
909     AVX2, Highbd2dVarTest,
910     ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_avx2)));
911 
912 #endif  // HAVE_SSE2
913 
914 #if HAVE_NEON
915 
916 INSTANTIATE_TEST_SUITE_P(
917     NEON, Highbd2dVarTest,
918     ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_neon)));
919 
920 #endif  // HAVE_NEON
921 
922 #if HAVE_SVE
923 
924 INSTANTIATE_TEST_SUITE_P(SVE, Highbd2dVarTest,
925                          ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c,
926                                                          &aom_var_2d_u16_sve)));
927 
928 #endif  // HAVE_SVE
929 #endif  // CONFIG_AV1_HIGHBITDEPTH
930 }  // namespace
931