xref: /aosp_15_r20/external/libaom/test/comp_avg_pred_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <tuple>
13 
14 #include "config/aom_dsp_rtcd.h"
15 #include "config/av1_rtcd.h"
16 
17 #include "gtest/gtest.h"
18 #include "test/acm_random.h"
19 #include "test/util.h"
20 #include "test/register_state_check.h"
21 #include "av1/common/common_data.h"
22 #include "aom_ports/aom_timer.h"
23 
24 using libaom_test::ACMRandom;
25 using std::make_tuple;
26 using std::tuple;
27 
28 namespace {
29 
30 const int kMaxSize = 128 + 32;  // padding
31 
32 typedef void (*distwtdcompavg_func)(uint8_t *comp_pred, const uint8_t *pred,
33                                     int width, int height, const uint8_t *ref,
34                                     int ref_stride,
35                                     const DIST_WTD_COMP_PARAMS *jcp_param);
36 
37 typedef void (*distwtdcompavgupsampled_func)(
38     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
39     const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
40     int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
41     int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search);
42 
43 typedef void (*DistWtdCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred,
44                                    int width, int height, const uint8_t *ref,
45                                    int ref_stride,
46                                    const DIST_WTD_COMP_PARAMS *jcp_param);
47 
48 typedef std::tuple<distwtdcompavg_func, BLOCK_SIZE> AV1DistWtdCompAvgParam;
49 
50 typedef std::tuple<int, int, DistWtdCompAvgFunc, int> DistWtdCompAvgParam;
51 
52 #if CONFIG_AV1_HIGHBITDEPTH
53 typedef void (*highbddistwtdcompavgupsampled_func)(
54     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
55     const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
56     int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
57     int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
58     int subpel_search);
59 
60 typedef std::tuple<int, highbddistwtdcompavgupsampled_func, BLOCK_SIZE>
61     HighbdDistWtdCompAvgUpsampledParam;
62 
63 typedef std::tuple<int, distwtdcompavg_func, BLOCK_SIZE>
64     HighbdDistWtdCompAvgParam;
65 
66 #if HAVE_SSE2 || HAVE_NEON
BuildParams(distwtdcompavg_func filter,int is_hbd)67 ::testing::internal::ParamGenerator<HighbdDistWtdCompAvgParam> BuildParams(
68     distwtdcompavg_func filter, int is_hbd) {
69   (void)is_hbd;
70   return ::testing::Combine(::testing::Range(8, 13, 2),
71                             ::testing::Values(filter),
72                             ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
73 }
74 
75 ::testing::internal::ParamGenerator<HighbdDistWtdCompAvgUpsampledParam>
BuildParams(highbddistwtdcompavgupsampled_func filter)76 BuildParams(highbddistwtdcompavgupsampled_func filter) {
77   return ::testing::Combine(::testing::Range(8, 13, 2),
78                             ::testing::Values(filter),
79                             ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
80 }
81 #endif  // HAVE_SSE2 || HAVE_NEON
82 #endif  // CONFIG_AV1_HIGHBITDEPTH
83 
84 #if HAVE_SSSE3
BuildParams(distwtdcompavg_func filter)85 ::testing::internal::ParamGenerator<AV1DistWtdCompAvgParam> BuildParams(
86     distwtdcompavg_func filter) {
87   return ::testing::Combine(::testing::Values(filter),
88                             ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
89 }
90 #endif  // HAVE_SSSE3
91 
92 class AV1DistWtdCompAvgTest
93     : public ::testing::TestWithParam<AV1DistWtdCompAvgParam> {
94  public:
95   ~AV1DistWtdCompAvgTest() override = default;
SetUp()96   void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
97 
98  protected:
RunCheckOutput(distwtdcompavg_func test_impl)99   void RunCheckOutput(distwtdcompavg_func test_impl) {
100     const int w = kMaxSize, h = kMaxSize;
101     const int block_idx = GET_PARAM(1);
102 
103     uint8_t pred8[kMaxSize * kMaxSize];
104     uint8_t ref8[kMaxSize * kMaxSize];
105     uint8_t output[kMaxSize * kMaxSize];
106     uint8_t output2[kMaxSize * kMaxSize];
107 
108     for (int i = 0; i < h; ++i)
109       for (int j = 0; j < w; ++j) {
110         pred8[i * w + j] = rnd_.Rand8();
111         ref8[i * w + j] = rnd_.Rand8();
112       }
113     const int in_w = block_size_wide[block_idx];
114     const int in_h = block_size_high[block_idx];
115 
116     DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
117     dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
118 
119     for (int ii = 0; ii < 2; ii++) {
120       for (int jj = 0; jj < 4; jj++) {
121         dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
122         dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
123 
124         const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
125         const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
126         aom_dist_wtd_comp_avg_pred_c(output, pred8 + offset_r * w + offset_c,
127                                      in_w, in_h, ref8 + offset_r * w + offset_c,
128                                      in_w, &dist_wtd_comp_params);
129         test_impl(output2, pred8 + offset_r * w + offset_c, in_w, in_h,
130                   ref8 + offset_r * w + offset_c, in_w, &dist_wtd_comp_params);
131 
132         for (int i = 0; i < in_h; ++i) {
133           for (int j = 0; j < in_w; ++j) {
134             int idx = i * in_w + j;
135             ASSERT_EQ(output[idx], output2[idx])
136                 << "Mismatch at unit tests for AV1DistWtdCompAvgTest\n"
137                 << in_w << "x" << in_h << " Pixel mismatch at index " << idx
138                 << " = (" << i << ", " << j << ")";
139           }
140         }
141       }
142     }
143   }
RunSpeedTest(distwtdcompavg_func test_impl)144   void RunSpeedTest(distwtdcompavg_func test_impl) {
145     const int w = kMaxSize, h = kMaxSize;
146     const int block_idx = GET_PARAM(1);
147 
148     uint8_t pred8[kMaxSize * kMaxSize];
149     uint8_t ref8[kMaxSize * kMaxSize];
150     uint8_t output[kMaxSize * kMaxSize];
151     uint8_t output2[kMaxSize * kMaxSize];
152 
153     for (int i = 0; i < h; ++i)
154       for (int j = 0; j < w; ++j) {
155         pred8[i * w + j] = rnd_.Rand8();
156         ref8[i * w + j] = rnd_.Rand8();
157       }
158     const int in_w = block_size_wide[block_idx];
159     const int in_h = block_size_high[block_idx];
160 
161     DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
162     dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
163 
164     dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
165     dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
166 
167     const int num_loops = 1000000000 / (in_w + in_h);
168     aom_usec_timer timer;
169     aom_usec_timer_start(&timer);
170 
171     for (int i = 0; i < num_loops; ++i)
172       aom_dist_wtd_comp_avg_pred_c(output, pred8, in_w, in_h, ref8, in_w,
173                                    &dist_wtd_comp_params);
174 
175     aom_usec_timer_mark(&timer);
176     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
177     printf("distwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
178            1000.0 * elapsed_time / num_loops);
179 
180     aom_usec_timer timer1;
181     aom_usec_timer_start(&timer1);
182 
183     for (int i = 0; i < num_loops; ++i)
184       test_impl(output2, pred8, in_w, in_h, ref8, in_w, &dist_wtd_comp_params);
185 
186     aom_usec_timer_mark(&timer1);
187     const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
188     printf("distwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
189            1000.0 * elapsed_time1 / num_loops);
190   }
191 
192   libaom_test::ACMRandom rnd_;
193 };  // class AV1DistWtdCompAvgTest
194 
195 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DistWtdCompAvgTest);
196 
197 class DistWtdCompAvgTest
198     : public ::testing::WithParamInterface<DistWtdCompAvgParam>,
199       public ::testing::Test {
200  public:
DistWtdCompAvgTest()201   DistWtdCompAvgTest()
202       : width_(GET_PARAM(0)), height_(GET_PARAM(1)), bd_(GET_PARAM(3)) {}
203 
SetUpTestSuite()204   static void SetUpTestSuite() {
205     reference_data8_ = reinterpret_cast<uint8_t *>(
206         aom_memalign(kDataAlignment, kDataBufferSize));
207     ASSERT_NE(reference_data8_, nullptr);
208     second_pred8_ =
209         reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
210     ASSERT_NE(second_pred8_, nullptr);
211     comp_pred8_ =
212         reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
213     ASSERT_NE(comp_pred8_, nullptr);
214     comp_pred8_test_ =
215         reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
216     ASSERT_NE(comp_pred8_test_, nullptr);
217     reference_data16_ = reinterpret_cast<uint16_t *>(
218         aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t)));
219     ASSERT_NE(reference_data16_, nullptr);
220     second_pred16_ = reinterpret_cast<uint16_t *>(
221         aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
222     ASSERT_NE(second_pred16_, nullptr);
223     comp_pred16_ = reinterpret_cast<uint16_t *>(
224         aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
225     ASSERT_NE(comp_pred16_, nullptr);
226     comp_pred16_test_ = reinterpret_cast<uint16_t *>(
227         aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
228     ASSERT_NE(comp_pred16_test_, nullptr);
229   }
230 
TearDownTestSuite()231   static void TearDownTestSuite() {
232     aom_free(reference_data8_);
233     reference_data8_ = nullptr;
234     aom_free(second_pred8_);
235     second_pred8_ = nullptr;
236     aom_free(comp_pred8_);
237     comp_pred8_ = nullptr;
238     aom_free(comp_pred8_test_);
239     comp_pred8_test_ = nullptr;
240     aom_free(reference_data16_);
241     reference_data16_ = nullptr;
242     aom_free(second_pred16_);
243     second_pred16_ = nullptr;
244     aom_free(comp_pred16_);
245     comp_pred16_ = nullptr;
246     aom_free(comp_pred16_test_);
247     comp_pred16_test_ = nullptr;
248   }
249 
250  protected:
251   // Handle up to 4 128x128 blocks, with stride up to 256
252   static const int kDataAlignment = 16;
253   static const int kDataBlockSize = 128 * 256;
254   static const int kDataBufferSize = 4 * kDataBlockSize;
255 
SetUp()256   void SetUp() override {
257     if (bd_ == -1) {
258       use_high_bit_depth_ = false;
259       bit_depth_ = AOM_BITS_8;
260       reference_data_ = reference_data8_;
261       second_pred_ = second_pred8_;
262       comp_pred_ = comp_pred8_;
263       comp_pred_test_ = comp_pred8_test_;
264     } else {
265       use_high_bit_depth_ = true;
266       bit_depth_ = static_cast<aom_bit_depth_t>(bd_);
267       reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
268       second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
269       comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_);
270       comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_);
271     }
272     mask_ = (1 << bit_depth_) - 1;
273     reference_stride_ = width_ * 2;
274     rnd_.Reset(ACMRandom::DeterministicSeed());
275   }
276 
GetReference(int block_idx)277   virtual uint8_t *GetReference(int block_idx) {
278     if (use_high_bit_depth_)
279       return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
280                                 block_idx * kDataBlockSize);
281     return reference_data_ + block_idx * kDataBlockSize;
282   }
283 
ReferenceDistWtdCompAvg(int block_idx)284   void ReferenceDistWtdCompAvg(int block_idx) {
285     const uint8_t *const reference8 = GetReference(block_idx);
286     const uint8_t *const second_pred8 = second_pred_;
287     uint8_t *const comp_pred8 = comp_pred_;
288     const uint16_t *const reference16 =
289         CONVERT_TO_SHORTPTR(GetReference(block_idx));
290     const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
291     uint16_t *const comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred_);
292     for (int h = 0; h < height_; ++h) {
293       for (int w = 0; w < width_; ++w) {
294         if (!use_high_bit_depth_) {
295           const int tmp =
296               second_pred8[h * width_ + w] * jcp_param_.bck_offset +
297               reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
298           comp_pred8[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
299         } else {
300           const int tmp =
301               second_pred16[h * width_ + w] * jcp_param_.bck_offset +
302               reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
303           comp_pred16[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
304         }
305       }
306     }
307   }
308 
FillConstant(uint8_t * data,int stride,uint16_t fill_constant)309   void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
310     uint8_t *data8 = data;
311     uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
312     for (int h = 0; h < height_; ++h) {
313       for (int w = 0; w < width_; ++w) {
314         if (!use_high_bit_depth_) {
315           data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
316         } else {
317           data16[h * stride + w] = fill_constant;
318         }
319       }
320     }
321   }
322 
FillRandom(uint8_t * data,int stride)323   void FillRandom(uint8_t *data, int stride) {
324     uint8_t *data8 = data;
325     uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
326     for (int h = 0; h < height_; ++h) {
327       for (int w = 0; w < width_; ++w) {
328         if (!use_high_bit_depth_) {
329           data8[h * stride + w] = rnd_.Rand8();
330         } else {
331           data16[h * stride + w] = rnd_.Rand16() & mask_;
332         }
333       }
334     }
335   }
336 
dist_wtd_comp_avg(int block_idx)337   void dist_wtd_comp_avg(int block_idx) {
338     const uint8_t *const reference = GetReference(block_idx);
339 
340     API_REGISTER_STATE_CHECK(GET_PARAM(2)(comp_pred_test_, second_pred_, width_,
341                                           height_, reference, reference_stride_,
342                                           &jcp_param_));
343   }
344 
CheckCompAvg()345   void CheckCompAvg() {
346     for (int j = 0; j < 2; ++j) {
347       for (int i = 0; i < 4; ++i) {
348         jcp_param_.fwd_offset = quant_dist_lookup_table[i][j];
349         jcp_param_.bck_offset = quant_dist_lookup_table[i][1 - j];
350 
351         ReferenceDistWtdCompAvg(0);
352         dist_wtd_comp_avg(0);
353 
354         for (int y = 0; y < height_; ++y)
355           for (int x = 0; x < width_; ++x)
356             ASSERT_EQ(comp_pred_[y * width_ + x],
357                       comp_pred_test_[y * width_ + x]);
358       }
359     }
360   }
361 
362   int width_, height_, mask_, bd_;
363   aom_bit_depth_t bit_depth_;
364   static uint8_t *reference_data_;
365   static uint8_t *second_pred_;
366   bool use_high_bit_depth_;
367   static uint8_t *reference_data8_;
368   static uint8_t *second_pred8_;
369   static uint16_t *reference_data16_;
370   static uint16_t *second_pred16_;
371   int reference_stride_;
372   static uint8_t *comp_pred_;
373   static uint8_t *comp_pred8_;
374   static uint16_t *comp_pred16_;
375   static uint8_t *comp_pred_test_;
376   static uint8_t *comp_pred8_test_;
377   static uint16_t *comp_pred16_test_;
378   DIST_WTD_COMP_PARAMS jcp_param_;
379 
380   ACMRandom rnd_;
381 };
382 
383 uint8_t *DistWtdCompAvgTest::reference_data_ = nullptr;
384 uint8_t *DistWtdCompAvgTest::second_pred_ = nullptr;
385 uint8_t *DistWtdCompAvgTest::comp_pred_ = nullptr;
386 uint8_t *DistWtdCompAvgTest::comp_pred_test_ = nullptr;
387 uint8_t *DistWtdCompAvgTest::reference_data8_ = nullptr;
388 uint8_t *DistWtdCompAvgTest::second_pred8_ = nullptr;
389 uint8_t *DistWtdCompAvgTest::comp_pred8_ = nullptr;
390 uint8_t *DistWtdCompAvgTest::comp_pred8_test_ = nullptr;
391 uint16_t *DistWtdCompAvgTest::reference_data16_ = nullptr;
392 uint16_t *DistWtdCompAvgTest::second_pred16_ = nullptr;
393 uint16_t *DistWtdCompAvgTest::comp_pred16_ = nullptr;
394 uint16_t *DistWtdCompAvgTest::comp_pred16_test_ = nullptr;
395 
396 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DistWtdCompAvgTest);
397 
398 #if CONFIG_AV1_HIGHBITDEPTH
399 class AV1HighBDDistWtdCompAvgTest
400     : public ::testing::TestWithParam<HighbdDistWtdCompAvgParam> {
401  public:
402   ~AV1HighBDDistWtdCompAvgTest() override = default;
SetUp()403   void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
404 
405  protected:
RunCheckOutput(distwtdcompavg_func test_impl)406   void RunCheckOutput(distwtdcompavg_func test_impl) {
407     const int w = kMaxSize, h = kMaxSize;
408     const int block_idx = GET_PARAM(2);
409     const int bd = GET_PARAM(0);
410     uint16_t pred8[kMaxSize * kMaxSize];
411     uint16_t ref8[kMaxSize * kMaxSize];
412     uint16_t output[kMaxSize * kMaxSize];
413     uint16_t output2[kMaxSize * kMaxSize];
414 
415     for (int i = 0; i < h; ++i)
416       for (int j = 0; j < w; ++j) {
417         pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
418         ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
419       }
420     const int in_w = block_size_wide[block_idx];
421     const int in_h = block_size_high[block_idx];
422 
423     DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
424     dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
425 
426     for (int ii = 0; ii < 2; ii++) {
427       for (int jj = 0; jj < 4; jj++) {
428         dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
429         dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
430 
431         const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
432         const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
433         aom_highbd_dist_wtd_comp_avg_pred_c(
434             CONVERT_TO_BYTEPTR(output),
435             CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, in_h,
436             CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
437             &dist_wtd_comp_params);
438         test_impl(CONVERT_TO_BYTEPTR(output2),
439                   CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
440                   in_h, CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
441                   in_w, &dist_wtd_comp_params);
442 
443         for (int i = 0; i < in_h; ++i) {
444           for (int j = 0; j < in_w; ++j) {
445             int idx = i * in_w + j;
446             ASSERT_EQ(output[idx], output2[idx])
447                 << "Mismatch at unit tests for AV1HighBDDistWtdCompAvgTest\n"
448                 << in_w << "x" << in_h << " Pixel mismatch at index " << idx
449                 << " = (" << i << ", " << j << ")";
450           }
451         }
452       }
453     }
454   }
RunSpeedTest(distwtdcompavg_func test_impl)455   void RunSpeedTest(distwtdcompavg_func test_impl) {
456     const int w = kMaxSize, h = kMaxSize;
457     const int block_idx = GET_PARAM(2);
458     const int bd = GET_PARAM(0);
459     uint16_t pred8[kMaxSize * kMaxSize];
460     uint16_t ref8[kMaxSize * kMaxSize];
461     uint16_t output[kMaxSize * kMaxSize];
462     uint16_t output2[kMaxSize * kMaxSize];
463 
464     for (int i = 0; i < h; ++i)
465       for (int j = 0; j < w; ++j) {
466         pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
467         ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
468       }
469     const int in_w = block_size_wide[block_idx];
470     const int in_h = block_size_high[block_idx];
471 
472     DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
473     dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
474 
475     dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
476     dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
477 
478     const int num_loops = 1000000000 / (in_w + in_h);
479     aom_usec_timer timer;
480     aom_usec_timer_start(&timer);
481 
482     for (int i = 0; i < num_loops; ++i)
483       aom_highbd_dist_wtd_comp_avg_pred_c(
484           CONVERT_TO_BYTEPTR(output), CONVERT_TO_BYTEPTR(pred8), in_w, in_h,
485           CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params);
486 
487     aom_usec_timer_mark(&timer);
488     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
489     printf("highbddistwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
490            1000.0 * elapsed_time / num_loops);
491 
492     aom_usec_timer timer1;
493     aom_usec_timer_start(&timer1);
494 
495     for (int i = 0; i < num_loops; ++i)
496       test_impl(CONVERT_TO_BYTEPTR(output2), CONVERT_TO_BYTEPTR(pred8), in_w,
497                 in_h, CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params);
498 
499     aom_usec_timer_mark(&timer1);
500     const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
501     printf("highbddistwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
502            1000.0 * elapsed_time1 / num_loops);
503   }
504 
505   libaom_test::ACMRandom rnd_;
506 };  // class AV1HighBDDistWtdCompAvgTest
507 
508 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighBDDistWtdCompAvgTest);
509 
510 class AV1HighBDDistWtdCompAvgUpsampledTest
511     : public ::testing::TestWithParam<HighbdDistWtdCompAvgUpsampledParam> {
512  public:
513   ~AV1HighBDDistWtdCompAvgUpsampledTest() override = default;
SetUp()514   void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
515 
516  protected:
RunCheckOutput(highbddistwtdcompavgupsampled_func test_impl)517   void RunCheckOutput(highbddistwtdcompavgupsampled_func test_impl) {
518     const int w = kMaxSize, h = kMaxSize;
519     const int block_idx = GET_PARAM(2);
520     const int bd = GET_PARAM(0);
521     uint16_t pred8[kMaxSize * kMaxSize];
522     uint16_t ref8[kMaxSize * kMaxSize];
523     DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]);
524     DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]);
525 
526     for (int i = 0; i < h; ++i)
527       for (int j = 0; j < w; ++j) {
528         pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
529         ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
530       }
531     const int in_w = block_size_wide[block_idx];
532     const int in_h = block_size_high[block_idx];
533 
534     DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
535     dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
536     int sub_x_q3, sub_y_q3;
537     int subpel_search;
538     for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
539          ++subpel_search) {
540       for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
541         for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
542           for (int ii = 0; ii < 2; ii++) {
543             for (int jj = 0; jj < 4; jj++) {
544               dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
545               dist_wtd_comp_params.bck_offset =
546                   quant_dist_lookup_table[jj][1 - ii];
547 
548               const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
549               const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
550 
551               aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
552                   nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output),
553                   CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
554                   in_h, sub_x_q3, sub_y_q3,
555                   CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd,
556                   &dist_wtd_comp_params, subpel_search);
557               test_impl(nullptr, nullptr, 0, 0, nullptr,
558                         CONVERT_TO_BYTEPTR(output2),
559                         CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c,
560                         in_w, in_h, sub_x_q3, sub_y_q3,
561                         CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
562                         in_w, bd, &dist_wtd_comp_params, subpel_search);
563 
564               for (int i = 0; i < in_h; ++i) {
565                 for (int j = 0; j < in_w; ++j) {
566                   int idx = i * in_w + j;
567                   ASSERT_EQ(output[idx], output2[idx])
568                       << "Mismatch at unit tests for "
569                          "AV1HighBDDistWtdCompAvgUpsampledTest\n"
570                       << in_w << "x" << in_h << " Pixel mismatch at index "
571                       << idx << " = (" << i << ", " << j
572                       << "), sub pixel offset = (" << sub_y_q3 << ", "
573                       << sub_x_q3 << ")";
574                 }
575               }
576             }
577           }
578         }
579       }
580     }
581   }
RunSpeedTest(highbddistwtdcompavgupsampled_func test_impl)582   void RunSpeedTest(highbddistwtdcompavgupsampled_func test_impl) {
583     const int w = kMaxSize, h = kMaxSize;
584     const int block_idx = GET_PARAM(2);
585     const int bd = GET_PARAM(0);
586     uint16_t pred8[kMaxSize * kMaxSize];
587     uint16_t ref8[kMaxSize * kMaxSize];
588     DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]);
589     DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]);
590 
591     for (int i = 0; i < h; ++i)
592       for (int j = 0; j < w; ++j) {
593         pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
594         ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
595       }
596     const int in_w = block_size_wide[block_idx];
597     const int in_h = block_size_high[block_idx];
598 
599     DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
600     dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
601 
602     dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
603     dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
604     int sub_x_q3 = 0;
605     int sub_y_q3 = 0;
606     const int num_loops = 1000000000 / (in_w + in_h);
607     aom_usec_timer timer;
608     aom_usec_timer_start(&timer);
609     int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
610     for (int i = 0; i < num_loops; ++i)
611       aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
612           nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output),
613           CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
614           CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params,
615           subpel_search);
616 
617     aom_usec_timer_mark(&timer);
618     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
619     printf("highbddistwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w,
620            in_h, 1000.0 * elapsed_time / num_loops);
621 
622     aom_usec_timer timer1;
623     aom_usec_timer_start(&timer1);
624 
625     for (int i = 0; i < num_loops; ++i)
626       test_impl(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output2),
627                 CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
628                 CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params,
629                 subpel_search);
630 
631     aom_usec_timer_mark(&timer1);
632     const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
633     printf("highbddistwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w,
634            in_h, 1000.0 * elapsed_time1 / num_loops);
635   }
636 
637   libaom_test::ACMRandom rnd_;
638 };  // class AV1HighBDDistWtdCompAvgUpsampledTest
639 
640 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(
641     AV1HighBDDistWtdCompAvgUpsampledTest);
642 #endif  // CONFIG_AV1_HIGHBITDEPTH
643 
TEST_P(AV1DistWtdCompAvgTest,DISABLED_Speed)644 TEST_P(AV1DistWtdCompAvgTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
645 
TEST_P(AV1DistWtdCompAvgTest,CheckOutput)646 TEST_P(AV1DistWtdCompAvgTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
647 
648 #if HAVE_SSSE3
649 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DistWtdCompAvgTest,
650                          BuildParams(aom_dist_wtd_comp_avg_pred_ssse3));
651 #endif
652 
TEST_P(DistWtdCompAvgTest,MaxRef)653 TEST_P(DistWtdCompAvgTest, MaxRef) {
654   FillConstant(reference_data_, reference_stride_, mask_);
655   FillConstant(second_pred_, width_, 0);
656   CheckCompAvg();
657 }
658 
TEST_P(DistWtdCompAvgTest,MaxSecondPred)659 TEST_P(DistWtdCompAvgTest, MaxSecondPred) {
660   FillConstant(reference_data_, reference_stride_, 0);
661   FillConstant(second_pred_, width_, mask_);
662   CheckCompAvg();
663 }
664 
TEST_P(DistWtdCompAvgTest,ShortRef)665 TEST_P(DistWtdCompAvgTest, ShortRef) {
666   const int tmp_stride = reference_stride_;
667   reference_stride_ >>= 1;
668   FillRandom(reference_data_, reference_stride_);
669   FillRandom(second_pred_, width_);
670   CheckCompAvg();
671   reference_stride_ = tmp_stride;
672 }
673 
TEST_P(DistWtdCompAvgTest,UnalignedRef)674 TEST_P(DistWtdCompAvgTest, UnalignedRef) {
675   // The reference frame, but not the source frame, may be unaligned for
676   // certain types of searches.
677   const int tmp_stride = reference_stride_;
678   reference_stride_ -= 1;
679   FillRandom(reference_data_, reference_stride_);
680   FillRandom(second_pred_, width_);
681   CheckCompAvg();
682   reference_stride_ = tmp_stride;
683 }
684 
685 // TODO(chengchen): add highbd tests
686 const DistWtdCompAvgParam dist_wtd_comp_avg_c_tests[] = {
687   make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_c, -1),
688   make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
689   make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_c, -1),
690   make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
691   make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
692   make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
693   make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
694   make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
695   make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
696   make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
697   make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
698   make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
699   make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
700   make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
701   make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
702   make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
703 
704 #if !CONFIG_REALTIME_ONLY
705   make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
706   make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
707   make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
708   make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
709   make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
710   make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
711 #endif
712 };
713 
714 INSTANTIATE_TEST_SUITE_P(C, DistWtdCompAvgTest,
715                          ::testing::ValuesIn(dist_wtd_comp_avg_c_tests));
716 
717 #if HAVE_SSSE3
718 const DistWtdCompAvgParam dist_wtd_comp_avg_ssse3_tests[] = {
719   make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
720   make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
721   make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
722   make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
723   make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
724   make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
725   make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
726   make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
727   make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
728   make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
729   make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
730   make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
731   make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
732   make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
733   make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
734   make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
735   make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
736 #if !CONFIG_REALTIME_ONLY
737   make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
738   make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
739   make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
740   make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
741   make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
742   make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
743 #endif
744 };
745 
746 INSTANTIATE_TEST_SUITE_P(SSSE3, DistWtdCompAvgTest,
747                          ::testing::ValuesIn(dist_wtd_comp_avg_ssse3_tests));
748 #endif  // HAVE_SSSE3
749 
750 #if HAVE_NEON
751 const DistWtdCompAvgParam dist_wtd_comp_avg_neon_tests[] = {
752   make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_neon, -1),
753   make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
754   make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_neon, -1),
755   make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
756   make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
757   make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
758   make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
759   make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
760   make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
761   make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
762   make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
763   make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
764   make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
765   make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
766   make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
767   make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
768 #if !CONFIG_REALTIME_ONLY
769   make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
770   make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
771   make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
772   make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
773   make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
774   make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
775 #endif  // !CONFIG_REALTIME_ONLY
776 };
777 
778 INSTANTIATE_TEST_SUITE_P(NEON, DistWtdCompAvgTest,
779                          ::testing::ValuesIn(dist_wtd_comp_avg_neon_tests));
780 #endif  // HAVE_NEON
781 
782 #if CONFIG_AV1_HIGHBITDEPTH
TEST_P(AV1HighBDDistWtdCompAvgTest,DISABLED_Speed)783 TEST_P(AV1HighBDDistWtdCompAvgTest, DISABLED_Speed) {
784   RunSpeedTest(GET_PARAM(1));
785 }
786 
TEST_P(AV1HighBDDistWtdCompAvgTest,CheckOutput)787 TEST_P(AV1HighBDDistWtdCompAvgTest, CheckOutput) {
788   RunCheckOutput(GET_PARAM(1));
789 }
790 
791 #if HAVE_SSE2
792 INSTANTIATE_TEST_SUITE_P(SSE2, AV1HighBDDistWtdCompAvgTest,
793                          BuildParams(aom_highbd_dist_wtd_comp_avg_pred_sse2,
794                                      1));
795 #endif
796 
797 #if HAVE_NEON
798 INSTANTIATE_TEST_SUITE_P(NEON, AV1HighBDDistWtdCompAvgTest,
799                          BuildParams(aom_highbd_dist_wtd_comp_avg_pred_neon,
800                                      1));
801 #endif
802 
TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest,DISABLED_Speed)803 TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest, DISABLED_Speed) {
804   RunSpeedTest(GET_PARAM(1));
805 }
806 
TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest,CheckOutput)807 TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest, CheckOutput) {
808   RunCheckOutput(GET_PARAM(1));
809 }
810 
811 #if HAVE_SSE2
812 INSTANTIATE_TEST_SUITE_P(
813     SSE2, AV1HighBDDistWtdCompAvgUpsampledTest,
814     BuildParams(aom_highbd_dist_wtd_comp_avg_upsampled_pred_sse2));
815 #endif
816 
817 #if HAVE_NEON
818 INSTANTIATE_TEST_SUITE_P(
819     NEON, AV1HighBDDistWtdCompAvgUpsampledTest,
820     BuildParams(aom_highbd_dist_wtd_comp_avg_upsampled_pred_neon));
821 #endif
822 
823 #endif  // CONFIG_AV1_HIGHBITDEPTH
824 
825 }  // namespace
826