1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <stdlib.h>
13 #include <ostream>
14 #include <string>
15 #include <tuple>
16
17 #include "gtest/gtest.h"
18
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
21
22 #include "aom_ports/aom_timer.h"
23 #include "aom_ports/mem.h"
24 #include "test/acm_random.h"
25 #include "test/register_state_check.h"
26 #include "test/util.h"
27
28 namespace {
29
30 using libaom_test::ACMRandom;
31
32 template <typename Pixel>
33 class AverageTestBase : public ::testing::Test {
34 public:
AverageTestBase(int width,int height,int bit_depth=8)35 AverageTestBase(int width, int height, int bit_depth = 8)
36 : width_(width), height_(height), source_data_(nullptr),
37 source_stride_(0), bit_depth_(bit_depth) {}
38
TearDown()39 void TearDown() override {
40 aom_free(source_data_);
41 source_data_ = nullptr;
42 }
43
44 protected:
45 // Handle blocks up to 4 blocks 64x64 with stride up to 128
46 static const int kDataAlignment = 16;
47 static const int kDataBlockWidth = 128;
48 static const int kDataBlockHeight = 128;
49 static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight;
50
SetUp()51 void SetUp() override {
52 const testing::TestInfo *const test_info =
53 testing::UnitTest::GetInstance()->current_test_info();
54 // Skip the speed test for C code as the baseline uses the same function.
55 if (std::string(test_info->test_suite_name()).find("C/") == 0 &&
56 std::string(test_info->name()).find("DISABLED_Speed") !=
57 std::string::npos) {
58 GTEST_SKIP();
59 }
60
61 source_data_ = static_cast<Pixel *>(
62 aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
63 ASSERT_NE(source_data_, nullptr);
64 memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0]));
65 source_stride_ = (width_ + 31) & ~31;
66 bit_depth_ = 8;
67 rnd_.Reset(ACMRandom::DeterministicSeed());
68 }
69
70 // Sum Pixels
ReferenceAverage8x8(const Pixel * source,int pitch)71 static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) {
72 unsigned int average = 0;
73 for (int h = 0; h < 8; ++h) {
74 for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
75 }
76 return (average + 32) >> 6;
77 }
78
ReferenceAverage8x8_quad(const uint8_t * source,int pitch,int x16_idx,int y16_idx,int * avg)79 static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch,
80 int x16_idx, int y16_idx, int *avg) {
81 for (int k = 0; k < 4; k++) {
82 int average = 0;
83 int x8_idx = x16_idx + ((k & 1) << 3);
84 int y8_idx = y16_idx + ((k >> 1) << 3);
85 for (int h = 0; h < 8; ++h) {
86 for (int w = 0; w < 8; ++w)
87 average += source[(h + y8_idx) * pitch + w + x8_idx];
88 }
89 avg[k] = (average + 32) >> 6;
90 }
91 }
92
ReferenceAverage4x4(const Pixel * source,int pitch)93 static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) {
94 unsigned int average = 0;
95 for (int h = 0; h < 4; ++h) {
96 for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
97 }
98 return (average + 8) >> 4;
99 }
100
FillConstant(Pixel fill_constant)101 void FillConstant(Pixel fill_constant) {
102 for (int i = 0; i < width_ * height_; ++i) {
103 source_data_[i] = fill_constant;
104 }
105 }
106
FillRandom()107 void FillRandom() {
108 for (int i = 0; i < width_ * height_; ++i) {
109 source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1);
110 }
111 }
112
113 int width_, height_;
114 Pixel *source_data_;
115 int source_stride_;
116 int bit_depth_;
117
118 ACMRandom rnd_;
119 };
120 typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
121
122 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
123 // function.
124 typedef std::tuple<int, int, int, int, int, AverageFunction> AvgFunc;
125
126 template <typename Pixel>
127 class AverageTest : public AverageTestBase<Pixel>,
128 public ::testing::WithParamInterface<AvgFunc> {
129 public:
AverageTest()130 AverageTest()
131 : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
132
133 protected:
134 using AverageTestBase<Pixel>::source_data_;
135 using AverageTestBase<Pixel>::source_stride_;
136 using AverageTestBase<Pixel>::ReferenceAverage8x8;
137 using AverageTestBase<Pixel>::ReferenceAverage4x4;
138 using AverageTestBase<Pixel>::FillConstant;
139 using AverageTestBase<Pixel>::FillRandom;
140
CheckAverages()141 void CheckAverages() {
142 const int block_size = GET_PARAM(4);
143 unsigned int expected = 0;
144
145 // The reference frame, but not the source frame, may be unaligned for
146 // certain types of searches.
147 const Pixel *const src = source_data_ + GET_PARAM(3);
148 if (block_size == 8) {
149 expected = ReferenceAverage8x8(src, source_stride_);
150 } else if (block_size == 4) {
151 expected = ReferenceAverage4x4(src, source_stride_);
152 }
153
154 aom_usec_timer timer;
155 unsigned int actual;
156 if (sizeof(Pixel) == 2) {
157 #if CONFIG_AV1_HIGHBITDEPTH
158 AverageFunction avg_c =
159 (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c;
160 // To avoid differences in optimization with the local Reference*()
161 // functions the C implementation is used as a baseline.
162 aom_usec_timer_start(&timer);
163 avg_c(CONVERT_TO_BYTEPTR(src), source_stride_);
164 aom_usec_timer_mark(&timer);
165 ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
166
167 AverageFunction avg_opt = GET_PARAM(5);
168 API_REGISTER_STATE_CHECK(
169 aom_usec_timer_start(&timer);
170 actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_);
171 aom_usec_timer_mark(&timer));
172 #endif // CONFIG_AV1_HIGHBITDEPTH
173 } else {
174 ASSERT_EQ(sizeof(Pixel), 1u);
175
176 AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c;
177 aom_usec_timer_start(&timer);
178 avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_);
179 aom_usec_timer_mark(&timer);
180 ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
181
182 AverageFunction avg_opt = GET_PARAM(5);
183 API_REGISTER_STATE_CHECK(
184 aom_usec_timer_start(&timer);
185 actual =
186 avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_);
187 aom_usec_timer_mark(&timer));
188 }
189 opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
190
191 EXPECT_EQ(expected, actual);
192 }
193
TestConstantValue(Pixel value)194 void TestConstantValue(Pixel value) {
195 FillConstant(value);
196 CheckAverages();
197 }
198
TestRandom(int iterations=1000)199 void TestRandom(int iterations = 1000) {
200 for (int i = 0; i < iterations; i++) {
201 FillRandom();
202 CheckAverages();
203 }
204 }
205
PrintTimingStats() const206 void PrintTimingStats() const {
207 printf(
208 "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
209 GET_PARAM(4), static_cast<int>(ref_elapsed_time_),
210 static_cast<int>(opt_elapsed_time_),
211 (static_cast<float>(ref_elapsed_time_) /
212 static_cast<float>(opt_elapsed_time_)));
213 }
214
215 int64_t ref_elapsed_time_ = 0;
216 int64_t opt_elapsed_time_ = 0;
217 };
218
219 typedef void (*AverageFunction_8x8_quad)(const uint8_t *s, int pitch, int x_idx,
220 int y_idx, int *avg);
221
222 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
223 // function.
224 typedef std::tuple<int, int, int, int, int, AverageFunction_8x8_quad>
225 AvgFunc_8x8_quad;
226
227 template <typename Pixel>
228 class AverageTest_8x8_quad
229 : public AverageTestBase<Pixel>,
230 public ::testing::WithParamInterface<AvgFunc_8x8_quad> {
231 public:
AverageTest_8x8_quad()232 AverageTest_8x8_quad()
233 : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
234
235 protected:
236 using AverageTestBase<Pixel>::source_data_;
237 using AverageTestBase<Pixel>::source_stride_;
238 using AverageTestBase<Pixel>::ReferenceAverage8x8_quad;
239 using AverageTestBase<Pixel>::FillConstant;
240 using AverageTestBase<Pixel>::FillRandom;
241
CheckAveragesAt(int iterations,int x16_idx,int y16_idx)242 void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) {
243 ASSERT_EQ(sizeof(Pixel), 1u);
244 const int block_size = GET_PARAM(4);
245 (void)block_size;
246 int expected[4] = { 0 };
247
248 // The reference frame, but not the source frame, may be unaligned for
249 // certain types of searches.
250 const Pixel *const src = source_data_ + GET_PARAM(3);
251 ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected);
252
253 aom_usec_timer timer;
254 int expected_c[4] = { 0 };
255 int actual[4] = { 0 };
256 AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c;
257 aom_usec_timer_start(&timer);
258 for (int i = 0; i < iterations; i++) {
259 avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
260 y16_idx, expected_c);
261 }
262 aom_usec_timer_mark(&timer);
263 ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
264
265 AverageFunction_8x8_quad avg_opt = GET_PARAM(5);
266 aom_usec_timer_start(&timer);
267 for (int i = 0; i < iterations; i++) {
268 avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
269 y16_idx, actual);
270 }
271 aom_usec_timer_mark(&timer);
272 opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
273
274 for (int k = 0; k < 4; k++) {
275 EXPECT_EQ(expected[k], actual[k]);
276 EXPECT_EQ(expected_c[k], actual[k]);
277 }
278
279 // Print scaling information only when Speed test is called.
280 if (iterations > 1) {
281 printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
282 static_cast<int>(ref_elapsed_time_),
283 static_cast<int>(opt_elapsed_time_),
284 (static_cast<float>(ref_elapsed_time_) /
285 static_cast<float>(opt_elapsed_time_)));
286 }
287 }
288
CheckAverages()289 void CheckAverages() {
290 for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2)
291 for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2)
292 CheckAveragesAt(1, x16_idx, y16_idx);
293 }
294
TestConstantValue(Pixel value)295 void TestConstantValue(Pixel value) {
296 FillConstant(value);
297 CheckAverages();
298 }
299
TestRandom()300 void TestRandom() {
301 FillRandom();
302 CheckAverages();
303 }
304
TestSpeed()305 void TestSpeed() {
306 FillRandom();
307 CheckAveragesAt(1000000, 0, 0);
308 }
309
310 int64_t ref_elapsed_time_ = 0;
311 int64_t opt_elapsed_time_ = 0;
312 };
313
314 using AverageTest8bpp = AverageTest<uint8_t>;
315
TEST_P(AverageTest8bpp,MinValue)316 TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); }
317
TEST_P(AverageTest8bpp,MaxValue)318 TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); }
319
TEST_P(AverageTest8bpp,Random)320 TEST_P(AverageTest8bpp, Random) { TestRandom(); }
321
TEST_P(AverageTest8bpp,DISABLED_Speed)322 TEST_P(AverageTest8bpp, DISABLED_Speed) {
323 TestRandom(1000000);
324 PrintTimingStats();
325 }
326
327 using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>;
328
TEST_P(AvgTest8bpp_avg_8x8_quad,MinValue)329 TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); }
330
TEST_P(AvgTest8bpp_avg_8x8_quad,MaxValue)331 TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); }
332
TEST_P(AvgTest8bpp_avg_8x8_quad,Random)333 TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); }
334
TEST_P(AvgTest8bpp_avg_8x8_quad,DISABLED_Speed)335 TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); }
336
337 #if CONFIG_AV1_HIGHBITDEPTH
338 using AverageTestHbd = AverageTest<uint16_t>;
339
TEST_P(AverageTestHbd,MinValue)340 TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); }
341
TEST_P(AverageTestHbd,MaxValue10bit)342 TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); }
TEST_P(AverageTestHbd,MaxValue12bit)343 TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); }
344
TEST_P(AverageTestHbd,Random)345 TEST_P(AverageTestHbd, Random) { TestRandom(); }
346
TEST_P(AverageTestHbd,DISABLED_Speed)347 TEST_P(AverageTestHbd, DISABLED_Speed) {
348 TestRandom(1000000);
349 PrintTimingStats();
350 }
351 #endif // CONFIG_AV1_HIGHBITDEPTH
352
353 typedef void (*IntProRowFunc)(int16_t *hbuf, uint8_t const *ref,
354 const int ref_stride, const int width,
355 const int height, int norm_factor);
356
357 // Params: width, height, asm function, c function.
358 typedef std::tuple<int, int, IntProRowFunc, IntProRowFunc> IntProRowParam;
359
360 class IntProRowTest : public AverageTestBase<uint8_t>,
361 public ::testing::WithParamInterface<IntProRowParam> {
362 public:
IntProRowTest()363 IntProRowTest()
364 : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr),
365 hbuf_c_(nullptr) {
366 asm_func_ = GET_PARAM(2);
367 c_func_ = GET_PARAM(3);
368 }
369
set_norm_factor()370 void set_norm_factor() {
371 if (height_ == 128)
372 norm_factor_ = 6;
373 else if (height_ == 64)
374 norm_factor_ = 5;
375 else if (height_ == 32)
376 norm_factor_ = 4;
377 else if (height_ == 16)
378 norm_factor_ = 3;
379 }
380
381 protected:
SetUp()382 void SetUp() override {
383 source_data_ = static_cast<uint8_t *>(
384 aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
385 ASSERT_NE(source_data_, nullptr);
386
387 hbuf_asm_ = static_cast<int16_t *>(
388 aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_));
389 ASSERT_NE(hbuf_asm_, nullptr);
390 hbuf_c_ = static_cast<int16_t *>(
391 aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_));
392 ASSERT_NE(hbuf_c_, nullptr);
393 }
394
TearDown()395 void TearDown() override {
396 aom_free(source_data_);
397 source_data_ = nullptr;
398 aom_free(hbuf_c_);
399 hbuf_c_ = nullptr;
400 aom_free(hbuf_asm_);
401 hbuf_asm_ = nullptr;
402 }
403
RunComparison()404 void RunComparison() {
405 set_norm_factor();
406 API_REGISTER_STATE_CHECK(
407 c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_));
408 API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_,
409 height_, norm_factor_));
410 EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
411 << "Output mismatch\n";
412 }
413
RunSpeedTest()414 void RunSpeedTest() {
415 const int numIter = 5000000;
416 set_norm_factor();
417 printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
418 numIter);
419 aom_usec_timer c_timer_;
420 aom_usec_timer_start(&c_timer_);
421 for (int i = 0; i < numIter; i++) {
422 c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_);
423 }
424 aom_usec_timer_mark(&c_timer_);
425
426 aom_usec_timer asm_timer_;
427 aom_usec_timer_start(&asm_timer_);
428
429 for (int i = 0; i < numIter; i++) {
430 asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
431 }
432 aom_usec_timer_mark(&asm_timer_);
433
434 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
435 const int asm_sum_time =
436 static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
437
438 printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
439 asm_sum_time,
440 (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
441
442 EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
443 << "Output mismatch\n";
444 }
445
446 private:
447 IntProRowFunc asm_func_;
448 IntProRowFunc c_func_;
449 int16_t *hbuf_asm_;
450 int16_t *hbuf_c_;
451 int norm_factor_;
452 };
453 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest);
454
455 typedef void (*IntProColFunc)(int16_t *vbuf, uint8_t const *ref,
456 const int ref_stride, const int width,
457 const int height, int norm_factor);
458
459 // Params: width, height, asm function, c function.
460 typedef std::tuple<int, int, IntProColFunc, IntProColFunc> IntProColParam;
461
462 class IntProColTest : public AverageTestBase<uint8_t>,
463 public ::testing::WithParamInterface<IntProColParam> {
464 public:
IntProColTest()465 IntProColTest()
466 : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr),
467 vbuf_c_(nullptr) {
468 asm_func_ = GET_PARAM(2);
469 c_func_ = GET_PARAM(3);
470 }
471
472 protected:
SetUp()473 void SetUp() override {
474 source_data_ = static_cast<uint8_t *>(
475 aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
476 ASSERT_NE(source_data_, nullptr);
477
478 vbuf_asm_ = static_cast<int16_t *>(
479 aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_));
480 ASSERT_NE(vbuf_asm_, nullptr);
481 vbuf_c_ = static_cast<int16_t *>(
482 aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_));
483 ASSERT_NE(vbuf_c_, nullptr);
484 }
485
TearDown()486 void TearDown() override {
487 aom_free(source_data_);
488 source_data_ = nullptr;
489 aom_free(vbuf_c_);
490 vbuf_c_ = nullptr;
491 aom_free(vbuf_asm_);
492 vbuf_asm_ = nullptr;
493 }
494
RunComparison()495 void RunComparison() {
496 int norm_factor_ = 3 + (width_ >> 5);
497 API_REGISTER_STATE_CHECK(
498 c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_));
499 API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_,
500 height_, norm_factor_));
501 EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
502 << "Output mismatch\n";
503 }
RunSpeedTest()504 void RunSpeedTest() {
505 const int numIter = 5000000;
506 printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
507 numIter);
508 int norm_factor_ = 3 + (width_ >> 5);
509 aom_usec_timer c_timer_;
510 aom_usec_timer_start(&c_timer_);
511 for (int i = 0; i < numIter; i++) {
512 c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_);
513 }
514 aom_usec_timer_mark(&c_timer_);
515
516 aom_usec_timer asm_timer_;
517 aom_usec_timer_start(&asm_timer_);
518
519 for (int i = 0; i < numIter; i++) {
520 asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
521 }
522 aom_usec_timer_mark(&asm_timer_);
523
524 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
525 const int asm_sum_time =
526 static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
527
528 printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
529 asm_sum_time,
530 (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
531
532 EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
533 << "Output mismatch\n";
534 }
535
536 private:
537 IntProColFunc asm_func_;
538 IntProColFunc c_func_;
539 int16_t *vbuf_asm_;
540 int16_t *vbuf_c_;
541 };
542 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest);
543
TEST_P(IntProRowTest,MinValue)544 TEST_P(IntProRowTest, MinValue) {
545 FillConstant(0);
546 RunComparison();
547 }
548
TEST_P(IntProRowTest,MaxValue)549 TEST_P(IntProRowTest, MaxValue) {
550 FillConstant(255);
551 RunComparison();
552 }
553
TEST_P(IntProRowTest,Random)554 TEST_P(IntProRowTest, Random) {
555 FillRandom();
556 RunComparison();
557 }
558
TEST_P(IntProRowTest,DISABLED_Speed)559 TEST_P(IntProRowTest, DISABLED_Speed) {
560 FillRandom();
561 RunSpeedTest();
562 }
563
TEST_P(IntProColTest,MinValue)564 TEST_P(IntProColTest, MinValue) {
565 FillConstant(0);
566 RunComparison();
567 }
568
TEST_P(IntProColTest,MaxValue)569 TEST_P(IntProColTest, MaxValue) {
570 FillConstant(255);
571 RunComparison();
572 }
573
TEST_P(IntProColTest,Random)574 TEST_P(IntProColTest, Random) {
575 FillRandom();
576 RunComparison();
577 }
578
TEST_P(IntProColTest,DISABLED_Speed)579 TEST_P(IntProColTest, DISABLED_Speed) {
580 FillRandom();
581 RunSpeedTest();
582 }
583 class VectorVarTestBase : public ::testing::Test {
584 public:
VectorVarTestBase(int bwl)585 explicit VectorVarTestBase(int bwl) { m_bwl = bwl; }
586 VectorVarTestBase() = default;
587 ~VectorVarTestBase() override = default;
588
589 protected:
590 static const int kDataAlignment = 16;
591
SetUp()592 void SetUp() override {
593 width = 4 << m_bwl;
594
595 ref_vector = static_cast<int16_t *>(
596 aom_memalign(kDataAlignment, width * sizeof(ref_vector[0])));
597 ASSERT_NE(ref_vector, nullptr);
598 src_vector = static_cast<int16_t *>(
599 aom_memalign(kDataAlignment, width * sizeof(src_vector[0])));
600 ASSERT_NE(src_vector, nullptr);
601
602 rnd_.Reset(ACMRandom::DeterministicSeed());
603 }
TearDown()604 void TearDown() override {
605 aom_free(ref_vector);
606 ref_vector = nullptr;
607 aom_free(src_vector);
608 src_vector = nullptr;
609 }
610
FillConstant(int16_t fill_constant_ref,int16_t fill_constant_src)611 void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) {
612 for (int i = 0; i < width; ++i) {
613 ref_vector[i] = fill_constant_ref;
614 src_vector[i] = fill_constant_src;
615 }
616 }
617
FillRandom()618 void FillRandom() {
619 for (int i = 0; i < width; ++i) {
620 ref_vector[i] =
621 rnd_.Rand16() % max_range; // acc. aom_vector_var_c brief.
622 src_vector[i] = rnd_.Rand16() % max_range;
623 }
624 }
625
626 int width;
627 int m_bwl;
628 int16_t *ref_vector;
629 int16_t *src_vector;
630 ACMRandom rnd_;
631
632 static const int max_range = 510;
633 static const int num_random_cmp = 50;
634 };
635
636 typedef int (*VectorVarFunc)(const int16_t *ref, const int16_t *src,
637 const int bwl);
638
639 typedef std::tuple<int, VectorVarFunc, VectorVarFunc> VecVarFunc;
640
641 class VectorVarTest : public VectorVarTestBase,
642 public ::testing::WithParamInterface<VecVarFunc> {
643 public:
VectorVarTest()644 VectorVarTest()
645 : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)),
646 simd_func(GET_PARAM(2)) {}
647
648 protected:
calcVarC()649 int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); }
calcVarSIMD()650 int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); }
651
652 VectorVarFunc c_func;
653 VectorVarFunc simd_func;
654 };
655 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest);
656
TEST_P(VectorVarTest,MaxVar)657 TEST_P(VectorVarTest, MaxVar) {
658 FillConstant(0, max_range);
659 int c_var = calcVarC();
660 int simd_var = calcVarSIMD();
661 ASSERT_EQ(c_var, simd_var);
662 }
TEST_P(VectorVarTest,MaxVarRev)663 TEST_P(VectorVarTest, MaxVarRev) {
664 FillConstant(max_range, 0);
665 int c_var = calcVarC();
666 int simd_var = calcVarSIMD();
667 ASSERT_EQ(c_var, simd_var);
668 }
TEST_P(VectorVarTest,ZeroDiff)669 TEST_P(VectorVarTest, ZeroDiff) {
670 FillConstant(0, 0);
671 int c_var = calcVarC();
672 int simd_var = calcVarSIMD();
673 ASSERT_EQ(c_var, simd_var);
674 }
TEST_P(VectorVarTest,ZeroDiff2)675 TEST_P(VectorVarTest, ZeroDiff2) {
676 FillConstant(max_range, max_range);
677 int c_var = calcVarC();
678 int simd_var = calcVarSIMD();
679 ASSERT_EQ(c_var, simd_var);
680 }
TEST_P(VectorVarTest,Constant)681 TEST_P(VectorVarTest, Constant) {
682 FillConstant(30, 90);
683 int c_var = calcVarC();
684 int simd_var = calcVarSIMD();
685 ASSERT_EQ(c_var, simd_var);
686 }
TEST_P(VectorVarTest,Random)687 TEST_P(VectorVarTest, Random) {
688 for (size_t i = 0; i < num_random_cmp; i++) {
689 FillRandom();
690 int c_var = calcVarC();
691 int simd_var = calcVarSIMD();
692 ASSERT_EQ(c_var, simd_var);
693 }
694 }
TEST_P(VectorVarTest,DISABLED_Speed)695 TEST_P(VectorVarTest, DISABLED_Speed) {
696 FillRandom();
697 const int numIter = 5000000;
698 printf("Width = %d number of iteration is %d \n", width, numIter);
699
700 int sum_c_var = 0;
701 int c_var = 0;
702
703 aom_usec_timer c_timer_;
704 aom_usec_timer_start(&c_timer_);
705 for (size_t i = 0; i < numIter; i++) {
706 c_var = calcVarC();
707 sum_c_var += c_var;
708 }
709 aom_usec_timer_mark(&c_timer_);
710
711 int simd_var = 0;
712 int sum_simd_var = 0;
713 aom_usec_timer simd_timer_;
714 aom_usec_timer_start(&simd_timer_);
715 for (size_t i = 0; i < numIter; i++) {
716 simd_var = calcVarSIMD();
717 sum_simd_var += simd_var;
718 }
719 aom_usec_timer_mark(&simd_timer_);
720
721 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
722 const int simd_sum_time =
723 static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
724
725 printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
726 simd_sum_time,
727 (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
728
729 EXPECT_EQ(c_var, simd_var) << "Output mismatch \n";
730 EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n";
731 }
732
733 using std::make_tuple;
734
735 INSTANTIATE_TEST_SUITE_P(
736 C, AverageTest8bpp,
737 ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c),
738 make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c)));
739
740 INSTANTIATE_TEST_SUITE_P(
741 C, AvgTest8bpp_avg_8x8_quad,
742 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c),
743 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c),
744 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c)));
745
746 #if HAVE_SSE2
747 INSTANTIATE_TEST_SUITE_P(
748 SSE2, AverageTest8bpp,
749 ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2),
750 make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2),
751 make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2),
752 make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2),
753 make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2),
754 make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2)));
755
756 INSTANTIATE_TEST_SUITE_P(
757 SSE2, AvgTest8bpp_avg_8x8_quad,
758 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2),
759 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2),
760 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2)));
761
762 INSTANTIATE_TEST_SUITE_P(
763 SSE2, IntProRowTest,
764 ::testing::Values(
765 make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
766 make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
767 make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
768 make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c)));
769
770 INSTANTIATE_TEST_SUITE_P(
771 SSE2, IntProColTest,
772 ::testing::Values(
773 make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
774 make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
775 make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
776 make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c)));
777 #endif
778
779 #if HAVE_AVX2
780 INSTANTIATE_TEST_SUITE_P(
781 AVX2, AvgTest8bpp_avg_8x8_quad,
782 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2),
783 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2),
784 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2)));
785
786 INSTANTIATE_TEST_SUITE_P(
787 AVX2, IntProRowTest,
788 ::testing::Values(
789 make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
790 make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
791 make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
792 make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c)));
793
794 INSTANTIATE_TEST_SUITE_P(
795 AVX2, IntProColTest,
796 ::testing::Values(
797 make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
798 make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
799 make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
800 make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c)));
801 #endif
802
803 #if HAVE_NEON
804 INSTANTIATE_TEST_SUITE_P(
805 NEON, AverageTest8bpp,
806 ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon),
807 make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon),
808 make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon),
809 make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon),
810 make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon),
811 make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon)));
812 INSTANTIATE_TEST_SUITE_P(
813 NEON, IntProRowTest,
814 ::testing::Values(
815 make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
816 make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
817 make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c),
818 make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c)));
819
820 INSTANTIATE_TEST_SUITE_P(
821 NEON, IntProColTest,
822 ::testing::Values(
823 make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
824 make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
825 make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c),
826 make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c)));
827
828 INSTANTIATE_TEST_SUITE_P(
829 NEON, AvgTest8bpp_avg_8x8_quad,
830 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon),
831 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon),
832 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon)));
833 #endif
834
835 #if CONFIG_AV1_HIGHBITDEPTH
836 INSTANTIATE_TEST_SUITE_P(
837 C, AverageTestHbd,
838 ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c),
839 make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c),
840 make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c),
841 make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c)));
842
843 #if HAVE_NEON
844 INSTANTIATE_TEST_SUITE_P(
845 NEON, AverageTestHbd,
846 ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon),
847 make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon),
848 make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon),
849 make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon),
850 make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon),
851 make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon),
852 make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon),
853 make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon),
854 make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon),
855 make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon),
856 make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon),
857 make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon)));
858 #endif // HAVE_NEON
859 #endif // CONFIG_AV1_HIGHBITDEPTH
860
861 typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
862 typedef int (*SatdLpFunc)(const int16_t *coeffs, int length);
863
864 template <typename SatdFuncType>
865 struct SatdTestParam {
SatdTestParam__anon9f9f480a0111::SatdTestParam866 SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2)
867 : satd_size(s), func_ref(f1), func_simd(f2) {}
operator <<(std::ostream & os,const SatdTestParam<SatdFuncType> & param)868 friend std::ostream &operator<<(std::ostream &os,
869 const SatdTestParam<SatdFuncType> ¶m) {
870 return os << "satd_size: " << param.satd_size;
871 }
872 int satd_size;
873 SatdFuncType func_ref;
874 SatdFuncType func_simd;
875 };
876
877 template <typename CoeffType, typename SatdFuncType>
878 class SatdTestBase
879 : public ::testing::Test,
880 public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> {
881 protected:
SatdTestBase(const SatdTestParam<SatdFuncType> & func_param)882 explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) {
883 satd_size_ = func_param.satd_size;
884 satd_func_ref_ = func_param.func_ref;
885 satd_func_simd_ = func_param.func_simd;
886 }
SetUp()887 void SetUp() override {
888 rnd_.Reset(ACMRandom::DeterministicSeed());
889 src_ = reinterpret_cast<CoeffType *>(
890 aom_memalign(32, sizeof(*src_) * satd_size_));
891 ASSERT_NE(src_, nullptr);
892 }
TearDown()893 void TearDown() override { aom_free(src_); }
FillConstant(const CoeffType val)894 void FillConstant(const CoeffType val) {
895 for (int i = 0; i < satd_size_; ++i) src_[i] = val;
896 }
FillRandom()897 void FillRandom() {
898 for (int i = 0; i < satd_size_; ++i) {
899 src_[i] = static_cast<int16_t>(rnd_.Rand16());
900 }
901 }
Check(int expected)902 void Check(int expected) {
903 int total_ref;
904 API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
905 EXPECT_EQ(expected, total_ref);
906
907 int total_simd;
908 API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
909 EXPECT_EQ(expected, total_simd);
910 }
RunComparison()911 void RunComparison() {
912 int total_ref;
913 API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
914
915 int total_simd;
916 API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
917
918 EXPECT_EQ(total_ref, total_simd);
919 }
RunSpeedTest()920 void RunSpeedTest() {
921 const int numIter = 500000;
922 printf("size = %d number of iteration is %d \n", satd_size_, numIter);
923
924 int total_ref;
925 aom_usec_timer c_timer_;
926 aom_usec_timer_start(&c_timer_);
927 for (int i = 0; i < numIter; i++) {
928 total_ref = satd_func_ref_(src_, satd_size_);
929 }
930 aom_usec_timer_mark(&c_timer_);
931
932 int total_simd;
933 aom_usec_timer simd_timer_;
934 aom_usec_timer_start(&simd_timer_);
935
936 for (int i = 0; i < numIter; i++) {
937 total_simd = satd_func_simd_(src_, satd_size_);
938 }
939 aom_usec_timer_mark(&simd_timer_);
940
941 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
942 const int simd_sum_time =
943 static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
944
945 printf(
946 "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
947 simd_sum_time,
948 (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
949
950 EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n";
951 }
952 int satd_size_;
953
954 private:
955 CoeffType *src_;
956 SatdFuncType satd_func_ref_;
957 SatdFuncType satd_func_simd_;
958 ACMRandom rnd_;
959 };
960
961 class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> {
962 public:
SatdTest()963 SatdTest() : SatdTestBase(GetParam()) {}
964 };
965
TEST_P(SatdTest,MinValue)966 TEST_P(SatdTest, MinValue) {
967 const int kMin = -524287;
968 const int expected = -kMin * satd_size_;
969 FillConstant(kMin);
970 Check(expected);
971 }
TEST_P(SatdTest,MaxValue)972 TEST_P(SatdTest, MaxValue) {
973 const int kMax = 524287;
974 const int expected = kMax * satd_size_;
975 FillConstant(kMax);
976 Check(expected);
977 }
TEST_P(SatdTest,Random)978 TEST_P(SatdTest, Random) {
979 int expected;
980 switch (satd_size_) {
981 case 16: expected = 205298; break;
982 case 64: expected = 1113950; break;
983 case 256: expected = 4268415; break;
984 case 1024: expected = 16954082; break;
985 default:
986 FAIL() << "Invalid satd size (" << satd_size_
987 << ") valid: 16/64/256/1024";
988 }
989 FillRandom();
990 Check(expected);
991 }
TEST_P(SatdTest,Match)992 TEST_P(SatdTest, Match) {
993 FillRandom();
994 RunComparison();
995 }
TEST_P(SatdTest,DISABLED_Speed)996 TEST_P(SatdTest, DISABLED_Speed) {
997 FillRandom();
998 RunSpeedTest();
999 }
1000 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest);
1001
1002 INSTANTIATE_TEST_SUITE_P(
1003 C, SatdTest,
1004 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c),
1005 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c),
1006 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c),
1007 SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c)));
1008
1009 #if HAVE_NEON
1010 INSTANTIATE_TEST_SUITE_P(
1011 NEON, SatdTest,
1012 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon),
1013 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon),
1014 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon),
1015 SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1016 &aom_satd_neon)));
1017 INSTANTIATE_TEST_SUITE_P(
1018 NEON, VectorVarTest,
1019 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon),
1020 make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon),
1021 make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon),
1022 make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon)));
1023 #endif
1024
1025 #if HAVE_SVE
1026 INSTANTIATE_TEST_SUITE_P(
1027 SVE, VectorVarTest,
1028 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve),
1029 make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve),
1030 make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve),
1031 make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve)));
1032 #endif // HAVE_SVE
1033
1034 #if HAVE_SSE4_1
1035 INSTANTIATE_TEST_SUITE_P(
1036 SSE4_1, VectorVarTest,
1037 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1),
1038 make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1),
1039 make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1),
1040 make_tuple(5, &aom_vector_var_c,
1041 &aom_vector_var_sse4_1)));
1042 #endif // HAVE_SSE4_1
1043
1044 #if HAVE_AVX2
1045 INSTANTIATE_TEST_SUITE_P(
1046 AVX2, SatdTest,
1047 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2),
1048 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2),
1049 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2),
1050 SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1051 &aom_satd_avx2)));
1052
1053 INSTANTIATE_TEST_SUITE_P(
1054 AVX2, VectorVarTest,
1055 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2),
1056 make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2),
1057 make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2),
1058 make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2)));
1059 #endif // HAVE_AVX2
1060
1061 #if HAVE_SSE2
1062 INSTANTIATE_TEST_SUITE_P(
1063 SSE2, SatdTest,
1064 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2),
1065 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2),
1066 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2),
1067 SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1068 &aom_satd_sse2)));
1069 #endif
1070
1071 class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> {
1072 public:
SatdLpTest()1073 SatdLpTest() : SatdTestBase(GetParam()) {}
1074 };
1075
TEST_P(SatdLpTest,MinValue)1076 TEST_P(SatdLpTest, MinValue) {
1077 const int kMin = -32640;
1078 const int expected = -kMin * satd_size_;
1079 FillConstant(kMin);
1080 Check(expected);
1081 }
TEST_P(SatdLpTest,MaxValue)1082 TEST_P(SatdLpTest, MaxValue) {
1083 const int kMax = 32640;
1084 const int expected = kMax * satd_size_;
1085 FillConstant(kMax);
1086 Check(expected);
1087 }
TEST_P(SatdLpTest,Random)1088 TEST_P(SatdLpTest, Random) {
1089 int expected;
1090 switch (satd_size_) {
1091 case 16: expected = 205298; break;
1092 case 64: expected = 1113950; break;
1093 case 256: expected = 4268415; break;
1094 case 1024: expected = 16954082; break;
1095 default:
1096 FAIL() << "Invalid satd size (" << satd_size_
1097 << ") valid: 16/64/256/1024";
1098 }
1099 FillRandom();
1100 Check(expected);
1101 }
TEST_P(SatdLpTest,Match)1102 TEST_P(SatdLpTest, Match) {
1103 FillRandom();
1104 RunComparison();
1105 }
TEST_P(SatdLpTest,DISABLED_Speed)1106 TEST_P(SatdLpTest, DISABLED_Speed) {
1107 FillRandom();
1108 RunSpeedTest();
1109 }
1110 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest);
1111
1112 // Add the following c test to avoid gtest uninitialized warning.
1113 INSTANTIATE_TEST_SUITE_P(
1114 C, SatdLpTest,
1115 ::testing::Values(
1116 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c),
1117 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c),
1118 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c),
1119 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c)));
1120
1121 #if HAVE_NEON
1122 INSTANTIATE_TEST_SUITE_P(
1123 NEON, SatdLpTest,
1124 ::testing::Values(
1125 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon),
1126 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon),
1127 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon),
1128 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon)));
1129 #endif
1130
1131 #if HAVE_AVX2
1132 INSTANTIATE_TEST_SUITE_P(
1133 AVX2, SatdLpTest,
1134 ::testing::Values(
1135 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2),
1136 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2),
1137 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2),
1138 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2)));
1139 #endif
1140
1141 #if HAVE_SSE2
1142 INSTANTIATE_TEST_SUITE_P(
1143 SSE2, SatdLpTest,
1144 ::testing::Values(
1145 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2),
1146 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2),
1147 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2),
1148 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2)));
1149 #endif
1150
1151 } // namespace
1152