xref: /aosp_15_r20/external/libaom/test/avg_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <stdlib.h>
13 #include <ostream>
14 #include <string>
15 #include <tuple>
16 
17 #include "gtest/gtest.h"
18 
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
21 
22 #include "aom_ports/aom_timer.h"
23 #include "aom_ports/mem.h"
24 #include "test/acm_random.h"
25 #include "test/register_state_check.h"
26 #include "test/util.h"
27 
28 namespace {
29 
30 using libaom_test::ACMRandom;
31 
32 template <typename Pixel>
33 class AverageTestBase : public ::testing::Test {
34  public:
AverageTestBase(int width,int height,int bit_depth=8)35   AverageTestBase(int width, int height, int bit_depth = 8)
36       : width_(width), height_(height), source_data_(nullptr),
37         source_stride_(0), bit_depth_(bit_depth) {}
38 
TearDown()39   void TearDown() override {
40     aom_free(source_data_);
41     source_data_ = nullptr;
42   }
43 
44  protected:
45   // Handle blocks up to 4 blocks 64x64 with stride up to 128
46   static const int kDataAlignment = 16;
47   static const int kDataBlockWidth = 128;
48   static const int kDataBlockHeight = 128;
49   static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight;
50 
SetUp()51   void SetUp() override {
52     const testing::TestInfo *const test_info =
53         testing::UnitTest::GetInstance()->current_test_info();
54     // Skip the speed test for C code as the baseline uses the same function.
55     if (std::string(test_info->test_suite_name()).find("C/") == 0 &&
56         std::string(test_info->name()).find("DISABLED_Speed") !=
57             std::string::npos) {
58       GTEST_SKIP();
59     }
60 
61     source_data_ = static_cast<Pixel *>(
62         aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
63     ASSERT_NE(source_data_, nullptr);
64     memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0]));
65     source_stride_ = (width_ + 31) & ~31;
66     bit_depth_ = 8;
67     rnd_.Reset(ACMRandom::DeterministicSeed());
68   }
69 
70   // Sum Pixels
ReferenceAverage8x8(const Pixel * source,int pitch)71   static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) {
72     unsigned int average = 0;
73     for (int h = 0; h < 8; ++h) {
74       for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
75     }
76     return (average + 32) >> 6;
77   }
78 
ReferenceAverage8x8_quad(const uint8_t * source,int pitch,int x16_idx,int y16_idx,int * avg)79   static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch,
80                                        int x16_idx, int y16_idx, int *avg) {
81     for (int k = 0; k < 4; k++) {
82       int average = 0;
83       int x8_idx = x16_idx + ((k & 1) << 3);
84       int y8_idx = y16_idx + ((k >> 1) << 3);
85       for (int h = 0; h < 8; ++h) {
86         for (int w = 0; w < 8; ++w)
87           average += source[(h + y8_idx) * pitch + w + x8_idx];
88       }
89       avg[k] = (average + 32) >> 6;
90     }
91   }
92 
ReferenceAverage4x4(const Pixel * source,int pitch)93   static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) {
94     unsigned int average = 0;
95     for (int h = 0; h < 4; ++h) {
96       for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
97     }
98     return (average + 8) >> 4;
99   }
100 
FillConstant(Pixel fill_constant)101   void FillConstant(Pixel fill_constant) {
102     for (int i = 0; i < width_ * height_; ++i) {
103       source_data_[i] = fill_constant;
104     }
105   }
106 
FillRandom()107   void FillRandom() {
108     for (int i = 0; i < width_ * height_; ++i) {
109       source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1);
110     }
111   }
112 
113   int width_, height_;
114   Pixel *source_data_;
115   int source_stride_;
116   int bit_depth_;
117 
118   ACMRandom rnd_;
119 };
120 typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
121 
122 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
123 // function.
124 typedef std::tuple<int, int, int, int, int, AverageFunction> AvgFunc;
125 
126 template <typename Pixel>
127 class AverageTest : public AverageTestBase<Pixel>,
128                     public ::testing::WithParamInterface<AvgFunc> {
129  public:
AverageTest()130   AverageTest()
131       : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
132 
133  protected:
134   using AverageTestBase<Pixel>::source_data_;
135   using AverageTestBase<Pixel>::source_stride_;
136   using AverageTestBase<Pixel>::ReferenceAverage8x8;
137   using AverageTestBase<Pixel>::ReferenceAverage4x4;
138   using AverageTestBase<Pixel>::FillConstant;
139   using AverageTestBase<Pixel>::FillRandom;
140 
CheckAverages()141   void CheckAverages() {
142     const int block_size = GET_PARAM(4);
143     unsigned int expected = 0;
144 
145     // The reference frame, but not the source frame, may be unaligned for
146     // certain types of searches.
147     const Pixel *const src = source_data_ + GET_PARAM(3);
148     if (block_size == 8) {
149       expected = ReferenceAverage8x8(src, source_stride_);
150     } else if (block_size == 4) {
151       expected = ReferenceAverage4x4(src, source_stride_);
152     }
153 
154     aom_usec_timer timer;
155     unsigned int actual;
156     if (sizeof(Pixel) == 2) {
157 #if CONFIG_AV1_HIGHBITDEPTH
158       AverageFunction avg_c =
159           (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c;
160       // To avoid differences in optimization with the local Reference*()
161       // functions the C implementation is used as a baseline.
162       aom_usec_timer_start(&timer);
163       avg_c(CONVERT_TO_BYTEPTR(src), source_stride_);
164       aom_usec_timer_mark(&timer);
165       ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
166 
167       AverageFunction avg_opt = GET_PARAM(5);
168       API_REGISTER_STATE_CHECK(
169           aom_usec_timer_start(&timer);
170           actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_);
171           aom_usec_timer_mark(&timer));
172 #endif  // CONFIG_AV1_HIGHBITDEPTH
173     } else {
174       ASSERT_EQ(sizeof(Pixel), 1u);
175 
176       AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c;
177       aom_usec_timer_start(&timer);
178       avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_);
179       aom_usec_timer_mark(&timer);
180       ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
181 
182       AverageFunction avg_opt = GET_PARAM(5);
183       API_REGISTER_STATE_CHECK(
184           aom_usec_timer_start(&timer);
185           actual =
186               avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_);
187           aom_usec_timer_mark(&timer));
188     }
189     opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
190 
191     EXPECT_EQ(expected, actual);
192   }
193 
TestConstantValue(Pixel value)194   void TestConstantValue(Pixel value) {
195     FillConstant(value);
196     CheckAverages();
197   }
198 
TestRandom(int iterations=1000)199   void TestRandom(int iterations = 1000) {
200     for (int i = 0; i < iterations; i++) {
201       FillRandom();
202       CheckAverages();
203     }
204   }
205 
PrintTimingStats() const206   void PrintTimingStats() const {
207     printf(
208         "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
209         GET_PARAM(4), static_cast<int>(ref_elapsed_time_),
210         static_cast<int>(opt_elapsed_time_),
211         (static_cast<float>(ref_elapsed_time_) /
212          static_cast<float>(opt_elapsed_time_)));
213   }
214 
215   int64_t ref_elapsed_time_ = 0;
216   int64_t opt_elapsed_time_ = 0;
217 };
218 
219 typedef void (*AverageFunction_8x8_quad)(const uint8_t *s, int pitch, int x_idx,
220                                          int y_idx, int *avg);
221 
222 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
223 // function.
224 typedef std::tuple<int, int, int, int, int, AverageFunction_8x8_quad>
225     AvgFunc_8x8_quad;
226 
227 template <typename Pixel>
228 class AverageTest_8x8_quad
229     : public AverageTestBase<Pixel>,
230       public ::testing::WithParamInterface<AvgFunc_8x8_quad> {
231  public:
AverageTest_8x8_quad()232   AverageTest_8x8_quad()
233       : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
234 
235  protected:
236   using AverageTestBase<Pixel>::source_data_;
237   using AverageTestBase<Pixel>::source_stride_;
238   using AverageTestBase<Pixel>::ReferenceAverage8x8_quad;
239   using AverageTestBase<Pixel>::FillConstant;
240   using AverageTestBase<Pixel>::FillRandom;
241 
CheckAveragesAt(int iterations,int x16_idx,int y16_idx)242   void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) {
243     ASSERT_EQ(sizeof(Pixel), 1u);
244     const int block_size = GET_PARAM(4);
245     (void)block_size;
246     int expected[4] = { 0 };
247 
248     // The reference frame, but not the source frame, may be unaligned for
249     // certain types of searches.
250     const Pixel *const src = source_data_ + GET_PARAM(3);
251     ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected);
252 
253     aom_usec_timer timer;
254     int expected_c[4] = { 0 };
255     int actual[4] = { 0 };
256     AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c;
257     aom_usec_timer_start(&timer);
258     for (int i = 0; i < iterations; i++) {
259       avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
260             y16_idx, expected_c);
261     }
262     aom_usec_timer_mark(&timer);
263     ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
264 
265     AverageFunction_8x8_quad avg_opt = GET_PARAM(5);
266     aom_usec_timer_start(&timer);
267     for (int i = 0; i < iterations; i++) {
268       avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
269               y16_idx, actual);
270     }
271     aom_usec_timer_mark(&timer);
272     opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
273 
274     for (int k = 0; k < 4; k++) {
275       EXPECT_EQ(expected[k], actual[k]);
276       EXPECT_EQ(expected_c[k], actual[k]);
277     }
278 
279     // Print scaling information only when Speed test is called.
280     if (iterations > 1) {
281       printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
282              static_cast<int>(ref_elapsed_time_),
283              static_cast<int>(opt_elapsed_time_),
284              (static_cast<float>(ref_elapsed_time_) /
285               static_cast<float>(opt_elapsed_time_)));
286     }
287   }
288 
CheckAverages()289   void CheckAverages() {
290     for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2)
291       for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2)
292         CheckAveragesAt(1, x16_idx, y16_idx);
293   }
294 
TestConstantValue(Pixel value)295   void TestConstantValue(Pixel value) {
296     FillConstant(value);
297     CheckAverages();
298   }
299 
TestRandom()300   void TestRandom() {
301     FillRandom();
302     CheckAverages();
303   }
304 
TestSpeed()305   void TestSpeed() {
306     FillRandom();
307     CheckAveragesAt(1000000, 0, 0);
308   }
309 
310   int64_t ref_elapsed_time_ = 0;
311   int64_t opt_elapsed_time_ = 0;
312 };
313 
314 using AverageTest8bpp = AverageTest<uint8_t>;
315 
TEST_P(AverageTest8bpp,MinValue)316 TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); }
317 
TEST_P(AverageTest8bpp,MaxValue)318 TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); }
319 
TEST_P(AverageTest8bpp,Random)320 TEST_P(AverageTest8bpp, Random) { TestRandom(); }
321 
TEST_P(AverageTest8bpp,DISABLED_Speed)322 TEST_P(AverageTest8bpp, DISABLED_Speed) {
323   TestRandom(1000000);
324   PrintTimingStats();
325 }
326 
327 using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>;
328 
TEST_P(AvgTest8bpp_avg_8x8_quad,MinValue)329 TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); }
330 
TEST_P(AvgTest8bpp_avg_8x8_quad,MaxValue)331 TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); }
332 
TEST_P(AvgTest8bpp_avg_8x8_quad,Random)333 TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); }
334 
TEST_P(AvgTest8bpp_avg_8x8_quad,DISABLED_Speed)335 TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); }
336 
337 #if CONFIG_AV1_HIGHBITDEPTH
338 using AverageTestHbd = AverageTest<uint16_t>;
339 
TEST_P(AverageTestHbd,MinValue)340 TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); }
341 
TEST_P(AverageTestHbd,MaxValue10bit)342 TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); }
TEST_P(AverageTestHbd,MaxValue12bit)343 TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); }
344 
TEST_P(AverageTestHbd,Random)345 TEST_P(AverageTestHbd, Random) { TestRandom(); }
346 
TEST_P(AverageTestHbd,DISABLED_Speed)347 TEST_P(AverageTestHbd, DISABLED_Speed) {
348   TestRandom(1000000);
349   PrintTimingStats();
350 }
351 #endif  // CONFIG_AV1_HIGHBITDEPTH
352 
353 typedef void (*IntProRowFunc)(int16_t *hbuf, uint8_t const *ref,
354                               const int ref_stride, const int width,
355                               const int height, int norm_factor);
356 
357 // Params: width, height, asm function, c function.
358 typedef std::tuple<int, int, IntProRowFunc, IntProRowFunc> IntProRowParam;
359 
360 class IntProRowTest : public AverageTestBase<uint8_t>,
361                       public ::testing::WithParamInterface<IntProRowParam> {
362  public:
IntProRowTest()363   IntProRowTest()
364       : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr),
365         hbuf_c_(nullptr) {
366     asm_func_ = GET_PARAM(2);
367     c_func_ = GET_PARAM(3);
368   }
369 
set_norm_factor()370   void set_norm_factor() {
371     if (height_ == 128)
372       norm_factor_ = 6;
373     else if (height_ == 64)
374       norm_factor_ = 5;
375     else if (height_ == 32)
376       norm_factor_ = 4;
377     else if (height_ == 16)
378       norm_factor_ = 3;
379   }
380 
381  protected:
SetUp()382   void SetUp() override {
383     source_data_ = static_cast<uint8_t *>(
384         aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
385     ASSERT_NE(source_data_, nullptr);
386 
387     hbuf_asm_ = static_cast<int16_t *>(
388         aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_));
389     ASSERT_NE(hbuf_asm_, nullptr);
390     hbuf_c_ = static_cast<int16_t *>(
391         aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_));
392     ASSERT_NE(hbuf_c_, nullptr);
393   }
394 
TearDown()395   void TearDown() override {
396     aom_free(source_data_);
397     source_data_ = nullptr;
398     aom_free(hbuf_c_);
399     hbuf_c_ = nullptr;
400     aom_free(hbuf_asm_);
401     hbuf_asm_ = nullptr;
402   }
403 
RunComparison()404   void RunComparison() {
405     set_norm_factor();
406     API_REGISTER_STATE_CHECK(
407         c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_));
408     API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_,
409                                        height_, norm_factor_));
410     EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
411         << "Output mismatch\n";
412   }
413 
RunSpeedTest()414   void RunSpeedTest() {
415     const int numIter = 5000000;
416     set_norm_factor();
417     printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
418            numIter);
419     aom_usec_timer c_timer_;
420     aom_usec_timer_start(&c_timer_);
421     for (int i = 0; i < numIter; i++) {
422       c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_);
423     }
424     aom_usec_timer_mark(&c_timer_);
425 
426     aom_usec_timer asm_timer_;
427     aom_usec_timer_start(&asm_timer_);
428 
429     for (int i = 0; i < numIter; i++) {
430       asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
431     }
432     aom_usec_timer_mark(&asm_timer_);
433 
434     const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
435     const int asm_sum_time =
436         static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
437 
438     printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
439            asm_sum_time,
440            (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
441 
442     EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
443         << "Output mismatch\n";
444   }
445 
446  private:
447   IntProRowFunc asm_func_;
448   IntProRowFunc c_func_;
449   int16_t *hbuf_asm_;
450   int16_t *hbuf_c_;
451   int norm_factor_;
452 };
453 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest);
454 
455 typedef void (*IntProColFunc)(int16_t *vbuf, uint8_t const *ref,
456                               const int ref_stride, const int width,
457                               const int height, int norm_factor);
458 
459 // Params: width, height, asm function, c function.
460 typedef std::tuple<int, int, IntProColFunc, IntProColFunc> IntProColParam;
461 
462 class IntProColTest : public AverageTestBase<uint8_t>,
463                       public ::testing::WithParamInterface<IntProColParam> {
464  public:
IntProColTest()465   IntProColTest()
466       : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr),
467         vbuf_c_(nullptr) {
468     asm_func_ = GET_PARAM(2);
469     c_func_ = GET_PARAM(3);
470   }
471 
472  protected:
SetUp()473   void SetUp() override {
474     source_data_ = static_cast<uint8_t *>(
475         aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
476     ASSERT_NE(source_data_, nullptr);
477 
478     vbuf_asm_ = static_cast<int16_t *>(
479         aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_));
480     ASSERT_NE(vbuf_asm_, nullptr);
481     vbuf_c_ = static_cast<int16_t *>(
482         aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_));
483     ASSERT_NE(vbuf_c_, nullptr);
484   }
485 
TearDown()486   void TearDown() override {
487     aom_free(source_data_);
488     source_data_ = nullptr;
489     aom_free(vbuf_c_);
490     vbuf_c_ = nullptr;
491     aom_free(vbuf_asm_);
492     vbuf_asm_ = nullptr;
493   }
494 
RunComparison()495   void RunComparison() {
496     int norm_factor_ = 3 + (width_ >> 5);
497     API_REGISTER_STATE_CHECK(
498         c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_));
499     API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_,
500                                        height_, norm_factor_));
501     EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
502         << "Output mismatch\n";
503   }
RunSpeedTest()504   void RunSpeedTest() {
505     const int numIter = 5000000;
506     printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
507            numIter);
508     int norm_factor_ = 3 + (width_ >> 5);
509     aom_usec_timer c_timer_;
510     aom_usec_timer_start(&c_timer_);
511     for (int i = 0; i < numIter; i++) {
512       c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_);
513     }
514     aom_usec_timer_mark(&c_timer_);
515 
516     aom_usec_timer asm_timer_;
517     aom_usec_timer_start(&asm_timer_);
518 
519     for (int i = 0; i < numIter; i++) {
520       asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
521     }
522     aom_usec_timer_mark(&asm_timer_);
523 
524     const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
525     const int asm_sum_time =
526         static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
527 
528     printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
529            asm_sum_time,
530            (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
531 
532     EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
533         << "Output mismatch\n";
534   }
535 
536  private:
537   IntProColFunc asm_func_;
538   IntProColFunc c_func_;
539   int16_t *vbuf_asm_;
540   int16_t *vbuf_c_;
541 };
542 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest);
543 
TEST_P(IntProRowTest,MinValue)544 TEST_P(IntProRowTest, MinValue) {
545   FillConstant(0);
546   RunComparison();
547 }
548 
TEST_P(IntProRowTest,MaxValue)549 TEST_P(IntProRowTest, MaxValue) {
550   FillConstant(255);
551   RunComparison();
552 }
553 
TEST_P(IntProRowTest,Random)554 TEST_P(IntProRowTest, Random) {
555   FillRandom();
556   RunComparison();
557 }
558 
TEST_P(IntProRowTest,DISABLED_Speed)559 TEST_P(IntProRowTest, DISABLED_Speed) {
560   FillRandom();
561   RunSpeedTest();
562 }
563 
TEST_P(IntProColTest,MinValue)564 TEST_P(IntProColTest, MinValue) {
565   FillConstant(0);
566   RunComparison();
567 }
568 
TEST_P(IntProColTest,MaxValue)569 TEST_P(IntProColTest, MaxValue) {
570   FillConstant(255);
571   RunComparison();
572 }
573 
TEST_P(IntProColTest,Random)574 TEST_P(IntProColTest, Random) {
575   FillRandom();
576   RunComparison();
577 }
578 
TEST_P(IntProColTest,DISABLED_Speed)579 TEST_P(IntProColTest, DISABLED_Speed) {
580   FillRandom();
581   RunSpeedTest();
582 }
583 class VectorVarTestBase : public ::testing::Test {
584  public:
VectorVarTestBase(int bwl)585   explicit VectorVarTestBase(int bwl) { m_bwl = bwl; }
586   VectorVarTestBase() = default;
587   ~VectorVarTestBase() override = default;
588 
589  protected:
590   static const int kDataAlignment = 16;
591 
SetUp()592   void SetUp() override {
593     width = 4 << m_bwl;
594 
595     ref_vector = static_cast<int16_t *>(
596         aom_memalign(kDataAlignment, width * sizeof(ref_vector[0])));
597     ASSERT_NE(ref_vector, nullptr);
598     src_vector = static_cast<int16_t *>(
599         aom_memalign(kDataAlignment, width * sizeof(src_vector[0])));
600     ASSERT_NE(src_vector, nullptr);
601 
602     rnd_.Reset(ACMRandom::DeterministicSeed());
603   }
TearDown()604   void TearDown() override {
605     aom_free(ref_vector);
606     ref_vector = nullptr;
607     aom_free(src_vector);
608     src_vector = nullptr;
609   }
610 
FillConstant(int16_t fill_constant_ref,int16_t fill_constant_src)611   void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) {
612     for (int i = 0; i < width; ++i) {
613       ref_vector[i] = fill_constant_ref;
614       src_vector[i] = fill_constant_src;
615     }
616   }
617 
FillRandom()618   void FillRandom() {
619     for (int i = 0; i < width; ++i) {
620       ref_vector[i] =
621           rnd_.Rand16() % max_range;  // acc. aom_vector_var_c brief.
622       src_vector[i] = rnd_.Rand16() % max_range;
623     }
624   }
625 
626   int width;
627   int m_bwl;
628   int16_t *ref_vector;
629   int16_t *src_vector;
630   ACMRandom rnd_;
631 
632   static const int max_range = 510;
633   static const int num_random_cmp = 50;
634 };
635 
636 typedef int (*VectorVarFunc)(const int16_t *ref, const int16_t *src,
637                              const int bwl);
638 
639 typedef std::tuple<int, VectorVarFunc, VectorVarFunc> VecVarFunc;
640 
641 class VectorVarTest : public VectorVarTestBase,
642                       public ::testing::WithParamInterface<VecVarFunc> {
643  public:
VectorVarTest()644   VectorVarTest()
645       : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)),
646         simd_func(GET_PARAM(2)) {}
647 
648  protected:
calcVarC()649   int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); }
calcVarSIMD()650   int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); }
651 
652   VectorVarFunc c_func;
653   VectorVarFunc simd_func;
654 };
655 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest);
656 
TEST_P(VectorVarTest,MaxVar)657 TEST_P(VectorVarTest, MaxVar) {
658   FillConstant(0, max_range);
659   int c_var = calcVarC();
660   int simd_var = calcVarSIMD();
661   ASSERT_EQ(c_var, simd_var);
662 }
TEST_P(VectorVarTest,MaxVarRev)663 TEST_P(VectorVarTest, MaxVarRev) {
664   FillConstant(max_range, 0);
665   int c_var = calcVarC();
666   int simd_var = calcVarSIMD();
667   ASSERT_EQ(c_var, simd_var);
668 }
TEST_P(VectorVarTest,ZeroDiff)669 TEST_P(VectorVarTest, ZeroDiff) {
670   FillConstant(0, 0);
671   int c_var = calcVarC();
672   int simd_var = calcVarSIMD();
673   ASSERT_EQ(c_var, simd_var);
674 }
TEST_P(VectorVarTest,ZeroDiff2)675 TEST_P(VectorVarTest, ZeroDiff2) {
676   FillConstant(max_range, max_range);
677   int c_var = calcVarC();
678   int simd_var = calcVarSIMD();
679   ASSERT_EQ(c_var, simd_var);
680 }
TEST_P(VectorVarTest,Constant)681 TEST_P(VectorVarTest, Constant) {
682   FillConstant(30, 90);
683   int c_var = calcVarC();
684   int simd_var = calcVarSIMD();
685   ASSERT_EQ(c_var, simd_var);
686 }
TEST_P(VectorVarTest,Random)687 TEST_P(VectorVarTest, Random) {
688   for (size_t i = 0; i < num_random_cmp; i++) {
689     FillRandom();
690     int c_var = calcVarC();
691     int simd_var = calcVarSIMD();
692     ASSERT_EQ(c_var, simd_var);
693   }
694 }
TEST_P(VectorVarTest,DISABLED_Speed)695 TEST_P(VectorVarTest, DISABLED_Speed) {
696   FillRandom();
697   const int numIter = 5000000;
698   printf("Width = %d number of iteration is %d \n", width, numIter);
699 
700   int sum_c_var = 0;
701   int c_var = 0;
702 
703   aom_usec_timer c_timer_;
704   aom_usec_timer_start(&c_timer_);
705   for (size_t i = 0; i < numIter; i++) {
706     c_var = calcVarC();
707     sum_c_var += c_var;
708   }
709   aom_usec_timer_mark(&c_timer_);
710 
711   int simd_var = 0;
712   int sum_simd_var = 0;
713   aom_usec_timer simd_timer_;
714   aom_usec_timer_start(&simd_timer_);
715   for (size_t i = 0; i < numIter; i++) {
716     simd_var = calcVarSIMD();
717     sum_simd_var += simd_var;
718   }
719   aom_usec_timer_mark(&simd_timer_);
720 
721   const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
722   const int simd_sum_time =
723       static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
724 
725   printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
726          simd_sum_time,
727          (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
728 
729   EXPECT_EQ(c_var, simd_var) << "Output mismatch \n";
730   EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n";
731 }
732 
733 using std::make_tuple;
734 
735 INSTANTIATE_TEST_SUITE_P(
736     C, AverageTest8bpp,
737     ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c),
738                       make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c)));
739 
740 INSTANTIATE_TEST_SUITE_P(
741     C, AvgTest8bpp_avg_8x8_quad,
742     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c),
743                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c),
744                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c)));
745 
746 #if HAVE_SSE2
747 INSTANTIATE_TEST_SUITE_P(
748     SSE2, AverageTest8bpp,
749     ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2),
750                       make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2),
751                       make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2),
752                       make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2),
753                       make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2),
754                       make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2)));
755 
756 INSTANTIATE_TEST_SUITE_P(
757     SSE2, AvgTest8bpp_avg_8x8_quad,
758     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2),
759                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2),
760                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2)));
761 
762 INSTANTIATE_TEST_SUITE_P(
763     SSE2, IntProRowTest,
764     ::testing::Values(
765         make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
766         make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
767         make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
768         make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c)));
769 
770 INSTANTIATE_TEST_SUITE_P(
771     SSE2, IntProColTest,
772     ::testing::Values(
773         make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
774         make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
775         make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
776         make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c)));
777 #endif
778 
779 #if HAVE_AVX2
780 INSTANTIATE_TEST_SUITE_P(
781     AVX2, AvgTest8bpp_avg_8x8_quad,
782     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2),
783                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2),
784                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2)));
785 
786 INSTANTIATE_TEST_SUITE_P(
787     AVX2, IntProRowTest,
788     ::testing::Values(
789         make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
790         make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
791         make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
792         make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c)));
793 
794 INSTANTIATE_TEST_SUITE_P(
795     AVX2, IntProColTest,
796     ::testing::Values(
797         make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
798         make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
799         make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
800         make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c)));
801 #endif
802 
803 #if HAVE_NEON
804 INSTANTIATE_TEST_SUITE_P(
805     NEON, AverageTest8bpp,
806     ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon),
807                       make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon),
808                       make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon),
809                       make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon),
810                       make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon),
811                       make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon)));
812 INSTANTIATE_TEST_SUITE_P(
813     NEON, IntProRowTest,
814     ::testing::Values(
815         make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
816         make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
817         make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c),
818         make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c)));
819 
820 INSTANTIATE_TEST_SUITE_P(
821     NEON, IntProColTest,
822     ::testing::Values(
823         make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
824         make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
825         make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c),
826         make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c)));
827 
828 INSTANTIATE_TEST_SUITE_P(
829     NEON, AvgTest8bpp_avg_8x8_quad,
830     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon),
831                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon),
832                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon)));
833 #endif
834 
835 #if CONFIG_AV1_HIGHBITDEPTH
836 INSTANTIATE_TEST_SUITE_P(
837     C, AverageTestHbd,
838     ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c),
839                       make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c),
840                       make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c),
841                       make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c)));
842 
843 #if HAVE_NEON
844 INSTANTIATE_TEST_SUITE_P(
845     NEON, AverageTestHbd,
846     ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon),
847                       make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon),
848                       make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon),
849                       make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon),
850                       make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon),
851                       make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon),
852                       make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon),
853                       make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon),
854                       make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon),
855                       make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon),
856                       make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon),
857                       make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon)));
858 #endif  // HAVE_NEON
859 #endif  // CONFIG_AV1_HIGHBITDEPTH
860 
861 typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
862 typedef int (*SatdLpFunc)(const int16_t *coeffs, int length);
863 
864 template <typename SatdFuncType>
865 struct SatdTestParam {
SatdTestParam__anon9f9f480a0111::SatdTestParam866   SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2)
867       : satd_size(s), func_ref(f1), func_simd(f2) {}
operator <<(std::ostream & os,const SatdTestParam<SatdFuncType> & param)868   friend std::ostream &operator<<(std::ostream &os,
869                                   const SatdTestParam<SatdFuncType> &param) {
870     return os << "satd_size: " << param.satd_size;
871   }
872   int satd_size;
873   SatdFuncType func_ref;
874   SatdFuncType func_simd;
875 };
876 
877 template <typename CoeffType, typename SatdFuncType>
878 class SatdTestBase
879     : public ::testing::Test,
880       public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> {
881  protected:
SatdTestBase(const SatdTestParam<SatdFuncType> & func_param)882   explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) {
883     satd_size_ = func_param.satd_size;
884     satd_func_ref_ = func_param.func_ref;
885     satd_func_simd_ = func_param.func_simd;
886   }
SetUp()887   void SetUp() override {
888     rnd_.Reset(ACMRandom::DeterministicSeed());
889     src_ = reinterpret_cast<CoeffType *>(
890         aom_memalign(32, sizeof(*src_) * satd_size_));
891     ASSERT_NE(src_, nullptr);
892   }
TearDown()893   void TearDown() override { aom_free(src_); }
FillConstant(const CoeffType val)894   void FillConstant(const CoeffType val) {
895     for (int i = 0; i < satd_size_; ++i) src_[i] = val;
896   }
FillRandom()897   void FillRandom() {
898     for (int i = 0; i < satd_size_; ++i) {
899       src_[i] = static_cast<int16_t>(rnd_.Rand16());
900     }
901   }
Check(int expected)902   void Check(int expected) {
903     int total_ref;
904     API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
905     EXPECT_EQ(expected, total_ref);
906 
907     int total_simd;
908     API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
909     EXPECT_EQ(expected, total_simd);
910   }
RunComparison()911   void RunComparison() {
912     int total_ref;
913     API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
914 
915     int total_simd;
916     API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
917 
918     EXPECT_EQ(total_ref, total_simd);
919   }
RunSpeedTest()920   void RunSpeedTest() {
921     const int numIter = 500000;
922     printf("size = %d number of iteration is %d \n", satd_size_, numIter);
923 
924     int total_ref;
925     aom_usec_timer c_timer_;
926     aom_usec_timer_start(&c_timer_);
927     for (int i = 0; i < numIter; i++) {
928       total_ref = satd_func_ref_(src_, satd_size_);
929     }
930     aom_usec_timer_mark(&c_timer_);
931 
932     int total_simd;
933     aom_usec_timer simd_timer_;
934     aom_usec_timer_start(&simd_timer_);
935 
936     for (int i = 0; i < numIter; i++) {
937       total_simd = satd_func_simd_(src_, satd_size_);
938     }
939     aom_usec_timer_mark(&simd_timer_);
940 
941     const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
942     const int simd_sum_time =
943         static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
944 
945     printf(
946         "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
947         simd_sum_time,
948         (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
949 
950     EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n";
951   }
952   int satd_size_;
953 
954  private:
955   CoeffType *src_;
956   SatdFuncType satd_func_ref_;
957   SatdFuncType satd_func_simd_;
958   ACMRandom rnd_;
959 };
960 
961 class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> {
962  public:
SatdTest()963   SatdTest() : SatdTestBase(GetParam()) {}
964 };
965 
TEST_P(SatdTest,MinValue)966 TEST_P(SatdTest, MinValue) {
967   const int kMin = -524287;
968   const int expected = -kMin * satd_size_;
969   FillConstant(kMin);
970   Check(expected);
971 }
TEST_P(SatdTest,MaxValue)972 TEST_P(SatdTest, MaxValue) {
973   const int kMax = 524287;
974   const int expected = kMax * satd_size_;
975   FillConstant(kMax);
976   Check(expected);
977 }
TEST_P(SatdTest,Random)978 TEST_P(SatdTest, Random) {
979   int expected;
980   switch (satd_size_) {
981     case 16: expected = 205298; break;
982     case 64: expected = 1113950; break;
983     case 256: expected = 4268415; break;
984     case 1024: expected = 16954082; break;
985     default:
986       FAIL() << "Invalid satd size (" << satd_size_
987              << ") valid: 16/64/256/1024";
988   }
989   FillRandom();
990   Check(expected);
991 }
TEST_P(SatdTest,Match)992 TEST_P(SatdTest, Match) {
993   FillRandom();
994   RunComparison();
995 }
TEST_P(SatdTest,DISABLED_Speed)996 TEST_P(SatdTest, DISABLED_Speed) {
997   FillRandom();
998   RunSpeedTest();
999 }
1000 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest);
1001 
1002 INSTANTIATE_TEST_SUITE_P(
1003     C, SatdTest,
1004     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c),
1005                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c),
1006                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c),
1007                       SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c)));
1008 
1009 #if HAVE_NEON
1010 INSTANTIATE_TEST_SUITE_P(
1011     NEON, SatdTest,
1012     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon),
1013                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon),
1014                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon),
1015                       SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1016                                               &aom_satd_neon)));
1017 INSTANTIATE_TEST_SUITE_P(
1018     NEON, VectorVarTest,
1019     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon),
1020                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon),
1021                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon),
1022                       make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon)));
1023 #endif
1024 
1025 #if HAVE_SVE
1026 INSTANTIATE_TEST_SUITE_P(
1027     SVE, VectorVarTest,
1028     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve),
1029                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve),
1030                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve),
1031                       make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve)));
1032 #endif  // HAVE_SVE
1033 
1034 #if HAVE_SSE4_1
1035 INSTANTIATE_TEST_SUITE_P(
1036     SSE4_1, VectorVarTest,
1037     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1),
1038                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1),
1039                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1),
1040                       make_tuple(5, &aom_vector_var_c,
1041                                  &aom_vector_var_sse4_1)));
1042 #endif  // HAVE_SSE4_1
1043 
1044 #if HAVE_AVX2
1045 INSTANTIATE_TEST_SUITE_P(
1046     AVX2, SatdTest,
1047     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2),
1048                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2),
1049                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2),
1050                       SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1051                                               &aom_satd_avx2)));
1052 
1053 INSTANTIATE_TEST_SUITE_P(
1054     AVX2, VectorVarTest,
1055     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2),
1056                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2),
1057                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2),
1058                       make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2)));
1059 #endif  // HAVE_AVX2
1060 
1061 #if HAVE_SSE2
1062 INSTANTIATE_TEST_SUITE_P(
1063     SSE2, SatdTest,
1064     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2),
1065                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2),
1066                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2),
1067                       SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1068                                               &aom_satd_sse2)));
1069 #endif
1070 
1071 class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> {
1072  public:
SatdLpTest()1073   SatdLpTest() : SatdTestBase(GetParam()) {}
1074 };
1075 
TEST_P(SatdLpTest,MinValue)1076 TEST_P(SatdLpTest, MinValue) {
1077   const int kMin = -32640;
1078   const int expected = -kMin * satd_size_;
1079   FillConstant(kMin);
1080   Check(expected);
1081 }
TEST_P(SatdLpTest,MaxValue)1082 TEST_P(SatdLpTest, MaxValue) {
1083   const int kMax = 32640;
1084   const int expected = kMax * satd_size_;
1085   FillConstant(kMax);
1086   Check(expected);
1087 }
TEST_P(SatdLpTest,Random)1088 TEST_P(SatdLpTest, Random) {
1089   int expected;
1090   switch (satd_size_) {
1091     case 16: expected = 205298; break;
1092     case 64: expected = 1113950; break;
1093     case 256: expected = 4268415; break;
1094     case 1024: expected = 16954082; break;
1095     default:
1096       FAIL() << "Invalid satd size (" << satd_size_
1097              << ") valid: 16/64/256/1024";
1098   }
1099   FillRandom();
1100   Check(expected);
1101 }
TEST_P(SatdLpTest,Match)1102 TEST_P(SatdLpTest, Match) {
1103   FillRandom();
1104   RunComparison();
1105 }
TEST_P(SatdLpTest,DISABLED_Speed)1106 TEST_P(SatdLpTest, DISABLED_Speed) {
1107   FillRandom();
1108   RunSpeedTest();
1109 }
1110 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest);
1111 
1112 // Add the following c test to avoid gtest uninitialized warning.
1113 INSTANTIATE_TEST_SUITE_P(
1114     C, SatdLpTest,
1115     ::testing::Values(
1116         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c),
1117         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c),
1118         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c),
1119         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c)));
1120 
1121 #if HAVE_NEON
1122 INSTANTIATE_TEST_SUITE_P(
1123     NEON, SatdLpTest,
1124     ::testing::Values(
1125         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon),
1126         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon),
1127         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon),
1128         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon)));
1129 #endif
1130 
1131 #if HAVE_AVX2
1132 INSTANTIATE_TEST_SUITE_P(
1133     AVX2, SatdLpTest,
1134     ::testing::Values(
1135         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2),
1136         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2),
1137         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2),
1138         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2)));
1139 #endif
1140 
1141 #if HAVE_SSE2
1142 INSTANTIATE_TEST_SUITE_P(
1143     SSE2, SatdLpTest,
1144     ::testing::Values(
1145         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2),
1146         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2),
1147         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2),
1148         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2)));
1149 #endif
1150 
1151 }  // namespace
1152