xref: /aosp_15_r20/external/libaom/test/comp_mask_pred_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cstdlib>
13 #include <new>
14 #include <tuple>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 
19 #include "aom/aom_codec.h"
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/variance.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "av1/common/reconinter.h"
26 #include "av1/encoder/reconinter_enc.h"
27 #include "gtest/gtest.h"
28 #include "test/acm_random.h"
29 #include "test/register_state_check.h"
30 #include "test/util.h"
31 
32 namespace {
33 typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
34                                     int width, int height, const uint8_t *ref,
35                                     int ref_stride, const uint8_t *mask,
36                                     int mask_stride, int invert_mask);
37 
38 typedef void (*comp_avg_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
39                                    int width, int height, const uint8_t *ref,
40                                    int ref_stride);
41 
42 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
43 const BLOCK_SIZE kCompMaskPredParams[] = {
44   BLOCK_8X8,   BLOCK_8X16, BLOCK_8X32,  BLOCK_16X8, BLOCK_16X16,
45   BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32
46 };
47 #endif
48 
49 class AV1CompMaskPredBase : public ::testing::Test {
50  public:
51   ~AV1CompMaskPredBase() override;
52   void SetUp() override;
53 
54   void TearDown() override;
55 
56  protected:
CheckResult(int width,int height)57   bool CheckResult(int width, int height) {
58     for (int y = 0; y < height; ++y) {
59       for (int x = 0; x < width; ++x) {
60         const int idx = y * width + x;
61         if (comp_pred1_[idx] != comp_pred2_[idx]) {
62           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
63           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
64           return false;
65         }
66       }
67     }
68     return true;
69   }
70 
71   libaom_test::ACMRandom rnd_;
72   uint8_t *comp_pred1_;
73   uint8_t *comp_pred2_;
74   uint8_t *pred_;
75   uint8_t *ref_buffer_;
76   uint8_t *ref_;
77 };
78 
79 AV1CompMaskPredBase::~AV1CompMaskPredBase() = default;
80 
SetUp()81 void AV1CompMaskPredBase::SetUp() {
82   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
83   av1_init_wedge_masks();
84   comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
85   ASSERT_NE(comp_pred1_, nullptr);
86   comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
87   ASSERT_NE(comp_pred2_, nullptr);
88   pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
89   ASSERT_NE(pred_, nullptr);
90   // The biggest block size is MAX_SB_SQUARE(128*128), however for the
91   // convolution we need to access 3 bytes before and 4 bytes after (for an
92   // 8-tap filter), in both directions, so we need to allocate
93   // (128 + 7) * (128 + 7) = MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49
94   ref_buffer_ =
95       (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49);
96   ASSERT_NE(ref_buffer_, nullptr);
97   // Start of the actual block where the convolution will be computed
98   ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3);
99   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
100     pred_[i] = rnd_.Rand8();
101   }
102   for (int i = 0; i < MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49; ++i) {
103     ref_buffer_[i] = rnd_.Rand8();
104   }
105 }
106 
TearDown()107 void AV1CompMaskPredBase::TearDown() {
108   aom_free(comp_pred1_);
109   aom_free(comp_pred2_);
110   aom_free(pred_);
111   aom_free(ref_buffer_);
112 }
113 
114 typedef std::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
115 
116 class AV1CompMaskPredTest
117     : public AV1CompMaskPredBase,
118       public ::testing::WithParamInterface<CompMaskPredParam> {
119  protected:
120   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
121   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
122 };
123 
RunCheckOutput(comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)124 void AV1CompMaskPredTest::RunCheckOutput(comp_mask_pred_func test_impl,
125                                          BLOCK_SIZE bsize, int inv) {
126   const int w = block_size_wide[bsize];
127   const int h = block_size_high[bsize];
128   const int wedge_types = get_wedge_types_lookup(bsize);
129   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
130     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
131 
132     aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
133                          inv);
134     test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
135 
136     ASSERT_EQ(CheckResult(w, h), true)
137         << " wedge " << wedge_index << " inv " << inv;
138   }
139 }
140 
RunSpeedTest(comp_mask_pred_func test_impl,BLOCK_SIZE bsize)141 void AV1CompMaskPredTest::RunSpeedTest(comp_mask_pred_func test_impl,
142                                        BLOCK_SIZE bsize) {
143   const int w = block_size_wide[bsize];
144   const int h = block_size_high[bsize];
145   const int wedge_types = get_wedge_types_lookup(bsize);
146   int wedge_index = wedge_types / 2;
147   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
148   const int num_loops = 1000000000 / (w + h);
149 
150   comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
151   double elapsed_time[2] = { 0 };
152   for (int i = 0; i < 2; ++i) {
153     aom_usec_timer timer;
154     aom_usec_timer_start(&timer);
155     comp_mask_pred_func func = funcs[i];
156     for (int j = 0; j < num_loops; ++j) {
157       func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
158     }
159     aom_usec_timer_mark(&timer);
160     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
161     elapsed_time[i] = 1000.0 * time / num_loops;
162   }
163   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
164          elapsed_time[1]);
165   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
166 }
167 
168 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompMaskPredTest);
169 
TEST_P(AV1CompMaskPredTest,CheckOutput)170 TEST_P(AV1CompMaskPredTest, CheckOutput) {
171   // inv = 0, 1
172   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
173   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
174 }
175 
TEST_P(AV1CompMaskPredTest,DISABLED_Speed)176 TEST_P(AV1CompMaskPredTest, DISABLED_Speed) {
177   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
178 }
179 
180 #if HAVE_SSSE3
181 INSTANTIATE_TEST_SUITE_P(
182     SSSE3, AV1CompMaskPredTest,
183     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
184                        ::testing::ValuesIn(kCompMaskPredParams)));
185 #endif
186 
187 #if HAVE_AVX2
188 INSTANTIATE_TEST_SUITE_P(
189     AVX2, AV1CompMaskPredTest,
190     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
191                        ::testing::ValuesIn(kCompMaskPredParams)));
192 #endif
193 
194 #if HAVE_NEON
195 INSTANTIATE_TEST_SUITE_P(
196     NEON, AV1CompMaskPredTest,
197     ::testing::Combine(::testing::Values(&aom_comp_mask_pred_neon),
198                        ::testing::ValuesIn(kCompMaskPredParams)));
199 #endif
200 
201 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
202 const BLOCK_SIZE kValidBlockSize[] = {
203   BLOCK_4X4,     BLOCK_8X8,   BLOCK_8X16,  BLOCK_8X32,   BLOCK_16X8,
204   BLOCK_16X16,   BLOCK_16X32, BLOCK_32X8,  BLOCK_32X16,  BLOCK_32X32,
205   BLOCK_32X64,   BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64,
206   BLOCK_128X128, BLOCK_16X64, BLOCK_64X16
207 };
208 #endif
209 
210 typedef void (*upsampled_pred_func)(MACROBLOCKD *xd, const AV1_COMMON *const cm,
211                                     int mi_row, int mi_col, const MV *const mv,
212                                     uint8_t *comp_pred, int width, int height,
213                                     int subpel_x_q3, int subpel_y_q3,
214                                     const uint8_t *ref, int ref_stride,
215                                     int subpel_search);
216 
217 typedef std::tuple<upsampled_pred_func, BLOCK_SIZE> UpsampledPredParam;
218 
219 class AV1UpsampledPredTest
220     : public AV1CompMaskPredBase,
221       public ::testing::WithParamInterface<UpsampledPredParam> {
222  protected:
223   void RunCheckOutput(upsampled_pred_func test_impl, BLOCK_SIZE bsize);
224   void RunSpeedTest(upsampled_pred_func test_impl, BLOCK_SIZE bsize,
225                     int havSub);
226 };
227 
RunCheckOutput(upsampled_pred_func test_impl,BLOCK_SIZE bsize)228 void AV1UpsampledPredTest::RunCheckOutput(upsampled_pred_func test_impl,
229                                           BLOCK_SIZE bsize) {
230   const int w = block_size_wide[bsize];
231   const int h = block_size_high[bsize];
232   for (int subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
233        ++subpel_search) {
234     // loop through subx and suby
235     for (int sub = 0; sub < 8 * 8; ++sub) {
236       int subx = sub & 0x7;
237       int suby = (sub >> 3);
238 
239       aom_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h,
240                            subx, suby, ref_, MAX_SB_SIZE, subpel_search);
241 
242       test_impl(nullptr, nullptr, 0, 0, nullptr, comp_pred2_, w, h, subx, suby,
243                 ref_, MAX_SB_SIZE, subpel_search);
244       ASSERT_EQ(CheckResult(w, h), true)
245           << "sub (" << subx << "," << suby << ")";
246     }
247   }
248 }
249 
RunSpeedTest(upsampled_pred_func test_impl,BLOCK_SIZE bsize,int havSub)250 void AV1UpsampledPredTest::RunSpeedTest(upsampled_pred_func test_impl,
251                                         BLOCK_SIZE bsize, int havSub) {
252   const int w = block_size_wide[bsize];
253   const int h = block_size_high[bsize];
254   const int subx = havSub ? 3 : 0;
255   const int suby = havSub ? 4 : 0;
256 
257   const int num_loops = 1000000000 / (w + h);
258   upsampled_pred_func funcs[2] = { aom_upsampled_pred_c, test_impl };
259   double elapsed_time[2] = { 0 };
260   int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
261   for (int i = 0; i < 2; ++i) {
262     aom_usec_timer timer;
263     aom_usec_timer_start(&timer);
264     upsampled_pred_func func = funcs[i];
265     for (int j = 0; j < num_loops; ++j) {
266       func(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h, subx, suby, ref_,
267            MAX_SB_SIZE, subpel_search);
268     }
269     aom_usec_timer_mark(&timer);
270     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
271     elapsed_time[i] = 1000.0 * time / num_loops;
272   }
273   printf("UpsampledPred[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h,
274          elapsed_time[0], elapsed_time[1]);
275   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
276 }
277 
278 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1UpsampledPredTest);
279 
TEST_P(AV1UpsampledPredTest,CheckOutput)280 TEST_P(AV1UpsampledPredTest, CheckOutput) {
281   RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
282 }
283 
TEST_P(AV1UpsampledPredTest,DISABLED_Speed)284 TEST_P(AV1UpsampledPredTest, DISABLED_Speed) {
285   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
286 }
287 
288 #if HAVE_SSE2
289 INSTANTIATE_TEST_SUITE_P(
290     SSE2, AV1UpsampledPredTest,
291     ::testing::Combine(::testing::Values(&aom_upsampled_pred_sse2),
292                        ::testing::ValuesIn(kValidBlockSize)));
293 #endif
294 
295 #if HAVE_NEON
296 INSTANTIATE_TEST_SUITE_P(
297     NEON, AV1UpsampledPredTest,
298     ::testing::Combine(::testing::Values(&aom_upsampled_pred_neon),
299                        ::testing::ValuesIn(kValidBlockSize)));
300 #endif
301 
302 typedef std::tuple<comp_avg_pred_func, BLOCK_SIZE> CompAvgPredParam;
303 
304 class AV1CompAvgPredTest : public ::testing::TestWithParam<CompAvgPredParam> {
305  public:
306   ~AV1CompAvgPredTest() override;
307   void SetUp() override;
308 
309   void TearDown() override;
310 
311  protected:
312   void RunCheckOutput(comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
313   void RunSpeedTest(comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height)314   bool CheckResult(int width, int height) {
315     for (int y = 0; y < height; ++y) {
316       for (int x = 0; x < width; ++x) {
317         const int idx = y * width + x;
318         if (comp_pred1_[idx] != comp_pred2_[idx]) {
319           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y);
320           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
321           return false;
322         }
323       }
324     }
325     return true;
326   }
327 
328   libaom_test::ACMRandom rnd_;
329   uint8_t *comp_pred1_;
330   uint8_t *comp_pred2_;
331   uint8_t *pred_;
332   uint8_t *ref_;
333 };
334 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompAvgPredTest);
335 
336 AV1CompAvgPredTest::~AV1CompAvgPredTest() = default;
337 
SetUp()338 void AV1CompAvgPredTest::SetUp() {
339   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
340 
341   comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
342   ASSERT_NE(comp_pred1_, nullptr);
343   comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
344   ASSERT_NE(comp_pred2_, nullptr);
345   pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
346   ASSERT_NE(pred_, nullptr);
347   ref_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
348   ASSERT_NE(ref_, nullptr);
349   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
350     pred_[i] = rnd_.Rand8();
351   }
352   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
353     ref_[i] = rnd_.Rand8();
354   }
355 }
356 
TearDown()357 void AV1CompAvgPredTest::TearDown() {
358   aom_free(comp_pred1_);
359   aom_free(comp_pred2_);
360   aom_free(pred_);
361   aom_free(ref_);
362 }
363 
RunCheckOutput(comp_avg_pred_func test_impl,BLOCK_SIZE bsize)364 void AV1CompAvgPredTest::RunCheckOutput(comp_avg_pred_func test_impl,
365                                         BLOCK_SIZE bsize) {
366   const int w = block_size_wide[bsize];
367   const int h = block_size_high[bsize];
368   aom_comp_avg_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE);
369   test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE);
370 
371   ASSERT_EQ(CheckResult(w, h), true);
372 }
373 
RunSpeedTest(comp_avg_pred_func test_impl,BLOCK_SIZE bsize)374 void AV1CompAvgPredTest::RunSpeedTest(comp_avg_pred_func test_impl,
375                                       BLOCK_SIZE bsize) {
376   const int w = block_size_wide[bsize];
377   const int h = block_size_high[bsize];
378   const int num_loops = 1000000000 / (w + h);
379 
380   comp_avg_pred_func functions[2] = { aom_comp_avg_pred_c, test_impl };
381   double elapsed_time[2] = { 0.0 };
382   for (int i = 0; i < 2; ++i) {
383     aom_usec_timer timer;
384     aom_usec_timer_start(&timer);
385     comp_avg_pred_func func = functions[i];
386     for (int j = 0; j < num_loops; ++j) {
387       func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE);
388     }
389     aom_usec_timer_mark(&timer);
390     const double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
391     elapsed_time[i] = 1000.0 * time;
392   }
393   printf("CompAvgPred %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
394          elapsed_time[1]);
395   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
396 }
397 
TEST_P(AV1CompAvgPredTest,CheckOutput)398 TEST_P(AV1CompAvgPredTest, CheckOutput) {
399   RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
400 }
401 
TEST_P(AV1CompAvgPredTest,DISABLED_Speed)402 TEST_P(AV1CompAvgPredTest, DISABLED_Speed) {
403   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
404 }
405 
406 #if HAVE_AVX2
407 INSTANTIATE_TEST_SUITE_P(
408     AVX2, AV1CompAvgPredTest,
409     ::testing::Combine(::testing::Values(&aom_comp_avg_pred_avx2),
410                        ::testing::ValuesIn(kValidBlockSize)));
411 #endif
412 
413 #if HAVE_NEON
414 INSTANTIATE_TEST_SUITE_P(
415     NEON, AV1CompAvgPredTest,
416     ::testing::Combine(::testing::Values(&aom_comp_avg_pred_neon),
417                        ::testing::ValuesIn(kValidBlockSize)));
418 #endif
419 
420 #if CONFIG_AV1_HIGHBITDEPTH
421 class AV1HighbdCompMaskPredTestBase : public ::testing::Test {
422  public:
423   ~AV1HighbdCompMaskPredTestBase() override;
424   void SetUp() override;
425 
426   void TearDown() override;
427 
428  protected:
CheckResult(int width,int height)429   bool CheckResult(int width, int height) {
430     for (int y = 0; y < height; ++y) {
431       for (int x = 0; x < width; ++x) {
432         const int idx = y * width + x;
433         if (comp_pred1_[idx] != comp_pred2_[idx]) {
434           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
435           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
436           return false;
437         }
438       }
439     }
440     return true;
441   }
442 
443   libaom_test::ACMRandom rnd_;
444   uint16_t *comp_pred1_;
445   uint16_t *comp_pred2_;
446   uint16_t *pred_;
447   uint16_t *ref_buffer_;
448   uint16_t *ref_;
449 };
450 
451 AV1HighbdCompMaskPredTestBase::~AV1HighbdCompMaskPredTestBase() = default;
452 
SetUp()453 void AV1HighbdCompMaskPredTestBase::SetUp() {
454   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
455   av1_init_wedge_masks();
456 
457   comp_pred1_ =
458       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
459   ASSERT_NE(comp_pred1_, nullptr);
460   comp_pred2_ =
461       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
462   ASSERT_NE(comp_pred2_, nullptr);
463   pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
464   ASSERT_NE(pred_, nullptr);
465   // The biggest block size is MAX_SB_SQUARE(128*128), however for the
466   // convolution we need to access 3 elements before and 4 elements after (for
467   // an 8-tap filter), in both directions, so we need to allocate (128 + 7) *
468   // (128 + 7) = (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) *
469   // sizeof(*ref_buffer_)
470   ref_buffer_ = (uint16_t *)aom_memalign(
471       16, (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) * sizeof(*ref_buffer_));
472   ASSERT_NE(ref_buffer_, nullptr);
473   // Start of the actual block where the convolution will be computed
474   ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3);
475 }
476 
TearDown()477 void AV1HighbdCompMaskPredTestBase::TearDown() {
478   aom_free(comp_pred1_);
479   aom_free(comp_pred2_);
480   aom_free(pred_);
481   aom_free(ref_buffer_);
482 }
483 
484 typedef void (*highbd_comp_mask_pred_func)(uint8_t *comp_pred8,
485                                            const uint8_t *pred8, int width,
486                                            int height, const uint8_t *ref8,
487                                            int ref_stride, const uint8_t *mask,
488                                            int mask_stride, int invert_mask);
489 
490 typedef std::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>
491     HighbdCompMaskPredParam;
492 
493 class AV1HighbdCompMaskPredTest
494     : public AV1HighbdCompMaskPredTestBase,
495       public ::testing::WithParamInterface<HighbdCompMaskPredParam> {
496  public:
497   ~AV1HighbdCompMaskPredTest() override;
498 
499  protected:
500   void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
501   void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
502 };
503 
504 AV1HighbdCompMaskPredTest::~AV1HighbdCompMaskPredTest() = default;
505 
RunCheckOutput(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize,int inv)506 void AV1HighbdCompMaskPredTest::RunCheckOutput(
507     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
508   int bd_ = GET_PARAM(2);
509   const int w = block_size_wide[bsize];
510   const int h = block_size_high[bsize];
511   const int wedge_types = get_wedge_types_lookup(bsize);
512 
513   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
514     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
515   }
516   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
517     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
518   }
519 
520   for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
521     const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
522 
523     aom_highbd_comp_mask_pred_c(
524         CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
525         CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
526 
527     test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
528               CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
529 
530     ASSERT_EQ(CheckResult(w, h), true)
531         << " wedge " << wedge_index << " inv " << inv;
532   }
533 }
534 
RunSpeedTest(highbd_comp_mask_pred_func test_impl,BLOCK_SIZE bsize)535 void AV1HighbdCompMaskPredTest::RunSpeedTest(
536     highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
537   int bd_ = GET_PARAM(2);
538 
539   const int w = block_size_wide[bsize];
540   const int h = block_size_high[bsize];
541   const int wedge_types = get_wedge_types_lookup(bsize);
542   int wedge_index = wedge_types / 2;
543 
544   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
545     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
546   }
547   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
548     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
549   }
550 
551   const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
552   const int num_loops = 1000000000 / (w + h);
553 
554   highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
555                                           test_impl };
556   double elapsed_time[2] = { 0 };
557   for (int i = 0; i < 2; ++i) {
558     aom_usec_timer timer;
559     aom_usec_timer_start(&timer);
560     highbd_comp_mask_pred_func func = funcs[i];
561     for (int j = 0; j < num_loops; ++j) {
562       func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
563            CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
564     }
565     aom_usec_timer_mark(&timer);
566     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
567     elapsed_time[i] = 1000.0 * time / num_loops;
568   }
569   printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
570          elapsed_time[1]);
571   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
572 }
573 
574 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompMaskPredTest);
575 
TEST_P(AV1HighbdCompMaskPredTest,CheckOutput)576 TEST_P(AV1HighbdCompMaskPredTest, CheckOutput) {
577   // inv = 0, 1
578   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
579   RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
580 }
581 
TEST_P(AV1HighbdCompMaskPredTest,DISABLED_Speed)582 TEST_P(AV1HighbdCompMaskPredTest, DISABLED_Speed) {
583   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
584 }
585 
586 #if HAVE_NEON
587 INSTANTIATE_TEST_SUITE_P(
588     NEON, AV1HighbdCompMaskPredTest,
589     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_neon),
590                        ::testing::ValuesIn(kCompMaskPredParams),
591                        ::testing::Range(8, 13, 2)));
592 #endif
593 
594 #if HAVE_AVX2
595 INSTANTIATE_TEST_SUITE_P(
596     AVX2, AV1HighbdCompMaskPredTest,
597     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
598                        ::testing::ValuesIn(kCompMaskPredParams),
599                        ::testing::Range(8, 13, 2)));
600 #endif
601 
602 #if HAVE_SSE2
603 INSTANTIATE_TEST_SUITE_P(
604     SSE2, AV1HighbdCompMaskPredTest,
605     ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
606                        ::testing::ValuesIn(kCompMaskPredParams),
607                        ::testing::Range(8, 13, 2)));
608 #endif
609 
610 typedef void (*highbd_upsampled_pred_func)(
611     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
612     const MV *const mv, uint8_t *comp_pred8, int width, int height,
613     int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride,
614     int bd, int subpel_search);
615 
616 typedef std::tuple<highbd_upsampled_pred_func, BLOCK_SIZE, int>
617     HighbdUpsampledPredParam;
618 
619 class AV1HighbdUpsampledPredTest
620     : public AV1HighbdCompMaskPredTestBase,
621       public ::testing::WithParamInterface<HighbdUpsampledPredParam> {
622  public:
623   ~AV1HighbdUpsampledPredTest() override;
624 
625  protected:
626   void RunCheckOutput(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize);
627   void RunSpeedTest(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize,
628                     int havSub);
629 };
630 
631 AV1HighbdUpsampledPredTest::~AV1HighbdUpsampledPredTest() = default;
632 
RunCheckOutput(highbd_upsampled_pred_func test_impl,BLOCK_SIZE bsize)633 void AV1HighbdUpsampledPredTest::RunCheckOutput(
634     highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize) {
635   int bd_ = GET_PARAM(2);
636   const int w = block_size_wide[bsize];
637   const int h = block_size_high[bsize];
638 
639   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
640     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
641   }
642   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
643     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
644   }
645 
646   for (int subpel_search = 1; subpel_search <= 2; ++subpel_search) {
647     // loop through subx and suby
648     for (int sub = 0; sub < 8 * 8; ++sub) {
649       int subx = sub & 0x7;
650       int suby = (sub >> 3);
651 
652       aom_highbd_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr,
653                                   CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx,
654                                   suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE,
655                                   bd_, subpel_search);
656 
657       test_impl(nullptr, nullptr, 0, 0, nullptr,
658                 CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx, suby,
659                 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
660 
661       ASSERT_EQ(CheckResult(w, h), true)
662           << "sub (" << subx << "," << suby << ")";
663     }
664   }
665 }
666 
RunSpeedTest(highbd_upsampled_pred_func test_impl,BLOCK_SIZE bsize,int havSub)667 void AV1HighbdUpsampledPredTest::RunSpeedTest(
668     highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
669   int bd_ = GET_PARAM(2);
670   const int w = block_size_wide[bsize];
671   const int h = block_size_high[bsize];
672   const int subx = havSub ? 3 : 0;
673   const int suby = havSub ? 4 : 0;
674 
675   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
676     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
677   }
678   for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
679     ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
680   }
681 
682   const int num_loops = 1000000000 / (w + h);
683   highbd_upsampled_pred_func funcs[2] = { &aom_highbd_upsampled_pred_c,
684                                           test_impl };
685   double elapsed_time[2] = { 0 };
686   for (int i = 0; i < 2; ++i) {
687     aom_usec_timer timer;
688     aom_usec_timer_start(&timer);
689     highbd_upsampled_pred_func func = funcs[i];
690     int subpel_search = 2;  // set to 1 to test 4-tap filter.
691     for (int j = 0; j < num_loops; ++j) {
692       func(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(comp_pred1_), w,
693            h, subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_,
694            subpel_search);
695     }
696     aom_usec_timer_mark(&timer);
697     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
698     elapsed_time[i] = 1000.0 * time / num_loops;
699   }
700   printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
701          elapsed_time[1]);
702   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
703 }
704 
705 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdUpsampledPredTest);
706 
TEST_P(AV1HighbdUpsampledPredTest,CheckOutput)707 TEST_P(AV1HighbdUpsampledPredTest, CheckOutput) {
708   RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
709 }
710 
TEST_P(AV1HighbdUpsampledPredTest,DISABLED_Speed)711 TEST_P(AV1HighbdUpsampledPredTest, DISABLED_Speed) {
712   RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
713 }
714 
715 #if HAVE_SSE2
716 INSTANTIATE_TEST_SUITE_P(
717     SSE2, AV1HighbdUpsampledPredTest,
718     ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_sse2),
719                        ::testing::ValuesIn(kValidBlockSize),
720                        ::testing::Range(8, 13, 2)));
721 #endif
722 
723 #if HAVE_NEON
724 INSTANTIATE_TEST_SUITE_P(
725     NEON, AV1HighbdUpsampledPredTest,
726     ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_neon),
727                        ::testing::ValuesIn(kValidBlockSize),
728                        ::testing::Range(8, 13, 2)));
729 #endif
730 
731 typedef void (*highbd_comp_avg_pred_func)(uint8_t *comp_pred,
732                                           const uint8_t *pred, int width,
733                                           int height, const uint8_t *ref,
734                                           int ref_stride);
735 
736 typedef std::tuple<highbd_comp_avg_pred_func, BLOCK_SIZE, int>
737     HighbdCompAvgPredParam;
738 
739 class AV1HighbdCompAvgPredTest
740     : public ::testing::TestWithParam<HighbdCompAvgPredParam> {
741  public:
742   ~AV1HighbdCompAvgPredTest() override;
743   void SetUp() override;
744 
745  protected:
746   void RunCheckOutput(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
747   void RunSpeedTest(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
CheckResult(int width,int height) const748   bool CheckResult(int width, int height) const {
749     for (int y = 0; y < height; ++y) {
750       for (int x = 0; x < width; ++x) {
751         const int idx = y * width + x;
752         if (comp_pred1_[idx] != comp_pred2_[idx]) {
753           printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y);
754           printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
755           return false;
756         }
757       }
758     }
759     return true;
760   }
761 
762   libaom_test::ACMRandom rnd_;
763   uint16_t *comp_pred1_;
764   uint16_t *comp_pred2_;
765   uint16_t *pred_;
766   uint16_t *ref_;
767 };
768 
~AV1HighbdCompAvgPredTest()769 AV1HighbdCompAvgPredTest::~AV1HighbdCompAvgPredTest() {
770   aom_free(comp_pred1_);
771   aom_free(comp_pred2_);
772   aom_free(pred_);
773   aom_free(ref_);
774 }
775 
SetUp()776 void AV1HighbdCompAvgPredTest::SetUp() {
777   int bd_ = GET_PARAM(2);
778   rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
779 
780   comp_pred1_ =
781       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
782   ASSERT_NE(comp_pred1_, nullptr);
783   comp_pred2_ =
784       (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
785   ASSERT_NE(comp_pred2_, nullptr);
786   pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
787   ASSERT_NE(pred_, nullptr);
788   ref_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*ref_));
789   ASSERT_NE(ref_, nullptr);
790   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
791     pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
792   }
793   for (int i = 0; i < MAX_SB_SQUARE; ++i) {
794     ref_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
795   }
796 }
797 
RunCheckOutput(highbd_comp_avg_pred_func test_impl,BLOCK_SIZE bsize)798 void AV1HighbdCompAvgPredTest::RunCheckOutput(
799     highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize) {
800   const int w = block_size_wide[bsize];
801   const int h = block_size_high[bsize];
802   aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(comp_pred1_),
803                              CONVERT_TO_BYTEPTR(pred_), w, h,
804                              CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
805   test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
806             CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
807 
808   ASSERT_EQ(CheckResult(w, h), true);
809 }
810 
RunSpeedTest(highbd_comp_avg_pred_func test_impl,BLOCK_SIZE bsize)811 void AV1HighbdCompAvgPredTest::RunSpeedTest(highbd_comp_avg_pred_func test_impl,
812                                             BLOCK_SIZE bsize) {
813   const int w = block_size_wide[bsize];
814   const int h = block_size_high[bsize];
815   const int num_loops = 1000000000 / (w + h);
816 
817   highbd_comp_avg_pred_func functions[2] = { aom_highbd_comp_avg_pred_c,
818                                              test_impl };
819   double elapsed_time[2] = { 0.0 };
820   for (int i = 0; i < 2; ++i) {
821     aom_usec_timer timer;
822     aom_usec_timer_start(&timer);
823     highbd_comp_avg_pred_func func = functions[i];
824     for (int j = 0; j < num_loops; ++j) {
825       func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
826            CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
827     }
828     aom_usec_timer_mark(&timer);
829     const double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
830     elapsed_time[i] = 1000.0 * time;
831   }
832   printf("HighbdCompAvg %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
833          elapsed_time[1]);
834   printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
835 }
836 
837 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompAvgPredTest);
838 
TEST_P(AV1HighbdCompAvgPredTest,CheckOutput)839 TEST_P(AV1HighbdCompAvgPredTest, CheckOutput) {
840   RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
841 }
842 
TEST_P(AV1HighbdCompAvgPredTest,DISABLED_Speed)843 TEST_P(AV1HighbdCompAvgPredTest, DISABLED_Speed) {
844   RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
845 }
846 
847 #if HAVE_NEON
848 INSTANTIATE_TEST_SUITE_P(
849     NEON, AV1HighbdCompAvgPredTest,
850     ::testing::Combine(::testing::Values(&aom_highbd_comp_avg_pred_neon),
851                        ::testing::ValuesIn(kValidBlockSize),
852                        ::testing::Range(8, 13, 2)));
853 #endif
854 
855 #endif  // CONFIG_AV1_HIGHBITDEPTH
856 }  // namespace
857