1 /*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <tuple>
13
14 #include "config/aom_dsp_rtcd.h"
15 #include "config/av1_rtcd.h"
16
17 #include "gtest/gtest.h"
18 #include "test/acm_random.h"
19 #include "test/util.h"
20 #include "test/register_state_check.h"
21 #include "av1/common/common_data.h"
22 #include "aom_ports/aom_timer.h"
23
24 using libaom_test::ACMRandom;
25 using std::make_tuple;
26 using std::tuple;
27
28 namespace {
29
30 const int kMaxSize = 128 + 32; // padding
31
32 typedef void (*distwtdcompavg_func)(uint8_t *comp_pred, const uint8_t *pred,
33 int width, int height, const uint8_t *ref,
34 int ref_stride,
35 const DIST_WTD_COMP_PARAMS *jcp_param);
36
37 typedef void (*distwtdcompavgupsampled_func)(
38 MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
39 const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
40 int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
41 int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search);
42
43 typedef void (*DistWtdCompAvgFunc)(uint8_t *comp_pred, const uint8_t *pred,
44 int width, int height, const uint8_t *ref,
45 int ref_stride,
46 const DIST_WTD_COMP_PARAMS *jcp_param);
47
48 typedef std::tuple<distwtdcompavg_func, BLOCK_SIZE> AV1DistWtdCompAvgParam;
49
50 typedef std::tuple<int, int, DistWtdCompAvgFunc, int> DistWtdCompAvgParam;
51
52 #if CONFIG_AV1_HIGHBITDEPTH
53 typedef void (*highbddistwtdcompavgupsampled_func)(
54 MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
55 const MV *const mv, uint8_t *comp_pred8, const uint8_t *pred8, int width,
56 int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
57 int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
58 int subpel_search);
59
60 typedef std::tuple<int, highbddistwtdcompavgupsampled_func, BLOCK_SIZE>
61 HighbdDistWtdCompAvgUpsampledParam;
62
63 typedef std::tuple<int, distwtdcompavg_func, BLOCK_SIZE>
64 HighbdDistWtdCompAvgParam;
65
66 #if HAVE_SSE2 || HAVE_NEON
BuildParams(distwtdcompavg_func filter,int is_hbd)67 ::testing::internal::ParamGenerator<HighbdDistWtdCompAvgParam> BuildParams(
68 distwtdcompavg_func filter, int is_hbd) {
69 (void)is_hbd;
70 return ::testing::Combine(::testing::Range(8, 13, 2),
71 ::testing::Values(filter),
72 ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
73 }
74
75 ::testing::internal::ParamGenerator<HighbdDistWtdCompAvgUpsampledParam>
BuildParams(highbddistwtdcompavgupsampled_func filter)76 BuildParams(highbddistwtdcompavgupsampled_func filter) {
77 return ::testing::Combine(::testing::Range(8, 13, 2),
78 ::testing::Values(filter),
79 ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
80 }
81 #endif // HAVE_SSE2 || HAVE_NEON
82 #endif // CONFIG_AV1_HIGHBITDEPTH
83
84 #if HAVE_SSSE3
BuildParams(distwtdcompavg_func filter)85 ::testing::internal::ParamGenerator<AV1DistWtdCompAvgParam> BuildParams(
86 distwtdcompavg_func filter) {
87 return ::testing::Combine(::testing::Values(filter),
88 ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
89 }
90 #endif // HAVE_SSSE3
91
92 class AV1DistWtdCompAvgTest
93 : public ::testing::TestWithParam<AV1DistWtdCompAvgParam> {
94 public:
95 ~AV1DistWtdCompAvgTest() override = default;
SetUp()96 void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
97
98 protected:
RunCheckOutput(distwtdcompavg_func test_impl)99 void RunCheckOutput(distwtdcompavg_func test_impl) {
100 const int w = kMaxSize, h = kMaxSize;
101 const int block_idx = GET_PARAM(1);
102
103 uint8_t pred8[kMaxSize * kMaxSize];
104 uint8_t ref8[kMaxSize * kMaxSize];
105 uint8_t output[kMaxSize * kMaxSize];
106 uint8_t output2[kMaxSize * kMaxSize];
107
108 for (int i = 0; i < h; ++i)
109 for (int j = 0; j < w; ++j) {
110 pred8[i * w + j] = rnd_.Rand8();
111 ref8[i * w + j] = rnd_.Rand8();
112 }
113 const int in_w = block_size_wide[block_idx];
114 const int in_h = block_size_high[block_idx];
115
116 DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
117 dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
118
119 for (int ii = 0; ii < 2; ii++) {
120 for (int jj = 0; jj < 4; jj++) {
121 dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
122 dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
123
124 const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
125 const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
126 aom_dist_wtd_comp_avg_pred_c(output, pred8 + offset_r * w + offset_c,
127 in_w, in_h, ref8 + offset_r * w + offset_c,
128 in_w, &dist_wtd_comp_params);
129 test_impl(output2, pred8 + offset_r * w + offset_c, in_w, in_h,
130 ref8 + offset_r * w + offset_c, in_w, &dist_wtd_comp_params);
131
132 for (int i = 0; i < in_h; ++i) {
133 for (int j = 0; j < in_w; ++j) {
134 int idx = i * in_w + j;
135 ASSERT_EQ(output[idx], output2[idx])
136 << "Mismatch at unit tests for AV1DistWtdCompAvgTest\n"
137 << in_w << "x" << in_h << " Pixel mismatch at index " << idx
138 << " = (" << i << ", " << j << ")";
139 }
140 }
141 }
142 }
143 }
RunSpeedTest(distwtdcompavg_func test_impl)144 void RunSpeedTest(distwtdcompavg_func test_impl) {
145 const int w = kMaxSize, h = kMaxSize;
146 const int block_idx = GET_PARAM(1);
147
148 uint8_t pred8[kMaxSize * kMaxSize];
149 uint8_t ref8[kMaxSize * kMaxSize];
150 uint8_t output[kMaxSize * kMaxSize];
151 uint8_t output2[kMaxSize * kMaxSize];
152
153 for (int i = 0; i < h; ++i)
154 for (int j = 0; j < w; ++j) {
155 pred8[i * w + j] = rnd_.Rand8();
156 ref8[i * w + j] = rnd_.Rand8();
157 }
158 const int in_w = block_size_wide[block_idx];
159 const int in_h = block_size_high[block_idx];
160
161 DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
162 dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
163
164 dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
165 dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
166
167 const int num_loops = 1000000000 / (in_w + in_h);
168 aom_usec_timer timer;
169 aom_usec_timer_start(&timer);
170
171 for (int i = 0; i < num_loops; ++i)
172 aom_dist_wtd_comp_avg_pred_c(output, pred8, in_w, in_h, ref8, in_w,
173 &dist_wtd_comp_params);
174
175 aom_usec_timer_mark(&timer);
176 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
177 printf("distwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
178 1000.0 * elapsed_time / num_loops);
179
180 aom_usec_timer timer1;
181 aom_usec_timer_start(&timer1);
182
183 for (int i = 0; i < num_loops; ++i)
184 test_impl(output2, pred8, in_w, in_h, ref8, in_w, &dist_wtd_comp_params);
185
186 aom_usec_timer_mark(&timer1);
187 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
188 printf("distwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
189 1000.0 * elapsed_time1 / num_loops);
190 }
191
192 libaom_test::ACMRandom rnd_;
193 }; // class AV1DistWtdCompAvgTest
194
195 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1DistWtdCompAvgTest);
196
197 class DistWtdCompAvgTest
198 : public ::testing::WithParamInterface<DistWtdCompAvgParam>,
199 public ::testing::Test {
200 public:
DistWtdCompAvgTest()201 DistWtdCompAvgTest()
202 : width_(GET_PARAM(0)), height_(GET_PARAM(1)), bd_(GET_PARAM(3)) {}
203
SetUpTestSuite()204 static void SetUpTestSuite() {
205 reference_data8_ = reinterpret_cast<uint8_t *>(
206 aom_memalign(kDataAlignment, kDataBufferSize));
207 ASSERT_NE(reference_data8_, nullptr);
208 second_pred8_ =
209 reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
210 ASSERT_NE(second_pred8_, nullptr);
211 comp_pred8_ =
212 reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
213 ASSERT_NE(comp_pred8_, nullptr);
214 comp_pred8_test_ =
215 reinterpret_cast<uint8_t *>(aom_memalign(kDataAlignment, 128 * 128));
216 ASSERT_NE(comp_pred8_test_, nullptr);
217 reference_data16_ = reinterpret_cast<uint16_t *>(
218 aom_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t)));
219 ASSERT_NE(reference_data16_, nullptr);
220 second_pred16_ = reinterpret_cast<uint16_t *>(
221 aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
222 ASSERT_NE(second_pred16_, nullptr);
223 comp_pred16_ = reinterpret_cast<uint16_t *>(
224 aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
225 ASSERT_NE(comp_pred16_, nullptr);
226 comp_pred16_test_ = reinterpret_cast<uint16_t *>(
227 aom_memalign(kDataAlignment, 128 * 128 * sizeof(uint16_t)));
228 ASSERT_NE(comp_pred16_test_, nullptr);
229 }
230
TearDownTestSuite()231 static void TearDownTestSuite() {
232 aom_free(reference_data8_);
233 reference_data8_ = nullptr;
234 aom_free(second_pred8_);
235 second_pred8_ = nullptr;
236 aom_free(comp_pred8_);
237 comp_pred8_ = nullptr;
238 aom_free(comp_pred8_test_);
239 comp_pred8_test_ = nullptr;
240 aom_free(reference_data16_);
241 reference_data16_ = nullptr;
242 aom_free(second_pred16_);
243 second_pred16_ = nullptr;
244 aom_free(comp_pred16_);
245 comp_pred16_ = nullptr;
246 aom_free(comp_pred16_test_);
247 comp_pred16_test_ = nullptr;
248 }
249
250 protected:
251 // Handle up to 4 128x128 blocks, with stride up to 256
252 static const int kDataAlignment = 16;
253 static const int kDataBlockSize = 128 * 256;
254 static const int kDataBufferSize = 4 * kDataBlockSize;
255
SetUp()256 void SetUp() override {
257 if (bd_ == -1) {
258 use_high_bit_depth_ = false;
259 bit_depth_ = AOM_BITS_8;
260 reference_data_ = reference_data8_;
261 second_pred_ = second_pred8_;
262 comp_pred_ = comp_pred8_;
263 comp_pred_test_ = comp_pred8_test_;
264 } else {
265 use_high_bit_depth_ = true;
266 bit_depth_ = static_cast<aom_bit_depth_t>(bd_);
267 reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
268 second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
269 comp_pred_ = CONVERT_TO_BYTEPTR(comp_pred16_);
270 comp_pred_test_ = CONVERT_TO_BYTEPTR(comp_pred16_test_);
271 }
272 mask_ = (1 << bit_depth_) - 1;
273 reference_stride_ = width_ * 2;
274 rnd_.Reset(ACMRandom::DeterministicSeed());
275 }
276
GetReference(int block_idx)277 virtual uint8_t *GetReference(int block_idx) {
278 if (use_high_bit_depth_)
279 return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) +
280 block_idx * kDataBlockSize);
281 return reference_data_ + block_idx * kDataBlockSize;
282 }
283
ReferenceDistWtdCompAvg(int block_idx)284 void ReferenceDistWtdCompAvg(int block_idx) {
285 const uint8_t *const reference8 = GetReference(block_idx);
286 const uint8_t *const second_pred8 = second_pred_;
287 uint8_t *const comp_pred8 = comp_pred_;
288 const uint16_t *const reference16 =
289 CONVERT_TO_SHORTPTR(GetReference(block_idx));
290 const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
291 uint16_t *const comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred_);
292 for (int h = 0; h < height_; ++h) {
293 for (int w = 0; w < width_; ++w) {
294 if (!use_high_bit_depth_) {
295 const int tmp =
296 second_pred8[h * width_ + w] * jcp_param_.bck_offset +
297 reference8[h * reference_stride_ + w] * jcp_param_.fwd_offset;
298 comp_pred8[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
299 } else {
300 const int tmp =
301 second_pred16[h * width_ + w] * jcp_param_.bck_offset +
302 reference16[h * reference_stride_ + w] * jcp_param_.fwd_offset;
303 comp_pred16[h * width_ + w] = ROUND_POWER_OF_TWO(tmp, 4);
304 }
305 }
306 }
307 }
308
FillConstant(uint8_t * data,int stride,uint16_t fill_constant)309 void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) {
310 uint8_t *data8 = data;
311 uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
312 for (int h = 0; h < height_; ++h) {
313 for (int w = 0; w < width_; ++w) {
314 if (!use_high_bit_depth_) {
315 data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
316 } else {
317 data16[h * stride + w] = fill_constant;
318 }
319 }
320 }
321 }
322
FillRandom(uint8_t * data,int stride)323 void FillRandom(uint8_t *data, int stride) {
324 uint8_t *data8 = data;
325 uint16_t *data16 = CONVERT_TO_SHORTPTR(data);
326 for (int h = 0; h < height_; ++h) {
327 for (int w = 0; w < width_; ++w) {
328 if (!use_high_bit_depth_) {
329 data8[h * stride + w] = rnd_.Rand8();
330 } else {
331 data16[h * stride + w] = rnd_.Rand16() & mask_;
332 }
333 }
334 }
335 }
336
dist_wtd_comp_avg(int block_idx)337 void dist_wtd_comp_avg(int block_idx) {
338 const uint8_t *const reference = GetReference(block_idx);
339
340 API_REGISTER_STATE_CHECK(GET_PARAM(2)(comp_pred_test_, second_pred_, width_,
341 height_, reference, reference_stride_,
342 &jcp_param_));
343 }
344
CheckCompAvg()345 void CheckCompAvg() {
346 for (int j = 0; j < 2; ++j) {
347 for (int i = 0; i < 4; ++i) {
348 jcp_param_.fwd_offset = quant_dist_lookup_table[i][j];
349 jcp_param_.bck_offset = quant_dist_lookup_table[i][1 - j];
350
351 ReferenceDistWtdCompAvg(0);
352 dist_wtd_comp_avg(0);
353
354 for (int y = 0; y < height_; ++y)
355 for (int x = 0; x < width_; ++x)
356 ASSERT_EQ(comp_pred_[y * width_ + x],
357 comp_pred_test_[y * width_ + x]);
358 }
359 }
360 }
361
362 int width_, height_, mask_, bd_;
363 aom_bit_depth_t bit_depth_;
364 static uint8_t *reference_data_;
365 static uint8_t *second_pred_;
366 bool use_high_bit_depth_;
367 static uint8_t *reference_data8_;
368 static uint8_t *second_pred8_;
369 static uint16_t *reference_data16_;
370 static uint16_t *second_pred16_;
371 int reference_stride_;
372 static uint8_t *comp_pred_;
373 static uint8_t *comp_pred8_;
374 static uint16_t *comp_pred16_;
375 static uint8_t *comp_pred_test_;
376 static uint8_t *comp_pred8_test_;
377 static uint16_t *comp_pred16_test_;
378 DIST_WTD_COMP_PARAMS jcp_param_;
379
380 ACMRandom rnd_;
381 };
382
383 uint8_t *DistWtdCompAvgTest::reference_data_ = nullptr;
384 uint8_t *DistWtdCompAvgTest::second_pred_ = nullptr;
385 uint8_t *DistWtdCompAvgTest::comp_pred_ = nullptr;
386 uint8_t *DistWtdCompAvgTest::comp_pred_test_ = nullptr;
387 uint8_t *DistWtdCompAvgTest::reference_data8_ = nullptr;
388 uint8_t *DistWtdCompAvgTest::second_pred8_ = nullptr;
389 uint8_t *DistWtdCompAvgTest::comp_pred8_ = nullptr;
390 uint8_t *DistWtdCompAvgTest::comp_pred8_test_ = nullptr;
391 uint16_t *DistWtdCompAvgTest::reference_data16_ = nullptr;
392 uint16_t *DistWtdCompAvgTest::second_pred16_ = nullptr;
393 uint16_t *DistWtdCompAvgTest::comp_pred16_ = nullptr;
394 uint16_t *DistWtdCompAvgTest::comp_pred16_test_ = nullptr;
395
396 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DistWtdCompAvgTest);
397
398 #if CONFIG_AV1_HIGHBITDEPTH
399 class AV1HighBDDistWtdCompAvgTest
400 : public ::testing::TestWithParam<HighbdDistWtdCompAvgParam> {
401 public:
402 ~AV1HighBDDistWtdCompAvgTest() override = default;
SetUp()403 void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
404
405 protected:
RunCheckOutput(distwtdcompavg_func test_impl)406 void RunCheckOutput(distwtdcompavg_func test_impl) {
407 const int w = kMaxSize, h = kMaxSize;
408 const int block_idx = GET_PARAM(2);
409 const int bd = GET_PARAM(0);
410 uint16_t pred8[kMaxSize * kMaxSize];
411 uint16_t ref8[kMaxSize * kMaxSize];
412 uint16_t output[kMaxSize * kMaxSize];
413 uint16_t output2[kMaxSize * kMaxSize];
414
415 for (int i = 0; i < h; ++i)
416 for (int j = 0; j < w; ++j) {
417 pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
418 ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
419 }
420 const int in_w = block_size_wide[block_idx];
421 const int in_h = block_size_high[block_idx];
422
423 DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
424 dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
425
426 for (int ii = 0; ii < 2; ii++) {
427 for (int jj = 0; jj < 4; jj++) {
428 dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
429 dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[jj][1 - ii];
430
431 const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
432 const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
433 aom_highbd_dist_wtd_comp_avg_pred_c(
434 CONVERT_TO_BYTEPTR(output),
435 CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w, in_h,
436 CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w,
437 &dist_wtd_comp_params);
438 test_impl(CONVERT_TO_BYTEPTR(output2),
439 CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
440 in_h, CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
441 in_w, &dist_wtd_comp_params);
442
443 for (int i = 0; i < in_h; ++i) {
444 for (int j = 0; j < in_w; ++j) {
445 int idx = i * in_w + j;
446 ASSERT_EQ(output[idx], output2[idx])
447 << "Mismatch at unit tests for AV1HighBDDistWtdCompAvgTest\n"
448 << in_w << "x" << in_h << " Pixel mismatch at index " << idx
449 << " = (" << i << ", " << j << ")";
450 }
451 }
452 }
453 }
454 }
RunSpeedTest(distwtdcompavg_func test_impl)455 void RunSpeedTest(distwtdcompavg_func test_impl) {
456 const int w = kMaxSize, h = kMaxSize;
457 const int block_idx = GET_PARAM(2);
458 const int bd = GET_PARAM(0);
459 uint16_t pred8[kMaxSize * kMaxSize];
460 uint16_t ref8[kMaxSize * kMaxSize];
461 uint16_t output[kMaxSize * kMaxSize];
462 uint16_t output2[kMaxSize * kMaxSize];
463
464 for (int i = 0; i < h; ++i)
465 for (int j = 0; j < w; ++j) {
466 pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
467 ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
468 }
469 const int in_w = block_size_wide[block_idx];
470 const int in_h = block_size_high[block_idx];
471
472 DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
473 dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
474
475 dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
476 dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
477
478 const int num_loops = 1000000000 / (in_w + in_h);
479 aom_usec_timer timer;
480 aom_usec_timer_start(&timer);
481
482 for (int i = 0; i < num_loops; ++i)
483 aom_highbd_dist_wtd_comp_avg_pred_c(
484 CONVERT_TO_BYTEPTR(output), CONVERT_TO_BYTEPTR(pred8), in_w, in_h,
485 CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params);
486
487 aom_usec_timer_mark(&timer);
488 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
489 printf("highbddistwtdcompavg c_code %3dx%-3d: %7.2f us\n", in_w, in_h,
490 1000.0 * elapsed_time / num_loops);
491
492 aom_usec_timer timer1;
493 aom_usec_timer_start(&timer1);
494
495 for (int i = 0; i < num_loops; ++i)
496 test_impl(CONVERT_TO_BYTEPTR(output2), CONVERT_TO_BYTEPTR(pred8), in_w,
497 in_h, CONVERT_TO_BYTEPTR(ref8), in_w, &dist_wtd_comp_params);
498
499 aom_usec_timer_mark(&timer1);
500 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
501 printf("highbddistwtdcompavg test_code %3dx%-3d: %7.2f us\n", in_w, in_h,
502 1000.0 * elapsed_time1 / num_loops);
503 }
504
505 libaom_test::ACMRandom rnd_;
506 }; // class AV1HighBDDistWtdCompAvgTest
507
508 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighBDDistWtdCompAvgTest);
509
510 class AV1HighBDDistWtdCompAvgUpsampledTest
511 : public ::testing::TestWithParam<HighbdDistWtdCompAvgUpsampledParam> {
512 public:
513 ~AV1HighBDDistWtdCompAvgUpsampledTest() override = default;
SetUp()514 void SetUp() override { rnd_.Reset(ACMRandom::DeterministicSeed()); }
515
516 protected:
RunCheckOutput(highbddistwtdcompavgupsampled_func test_impl)517 void RunCheckOutput(highbddistwtdcompavgupsampled_func test_impl) {
518 const int w = kMaxSize, h = kMaxSize;
519 const int block_idx = GET_PARAM(2);
520 const int bd = GET_PARAM(0);
521 uint16_t pred8[kMaxSize * kMaxSize];
522 uint16_t ref8[kMaxSize * kMaxSize];
523 DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]);
524 DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]);
525
526 for (int i = 0; i < h; ++i)
527 for (int j = 0; j < w; ++j) {
528 pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
529 ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
530 }
531 const int in_w = block_size_wide[block_idx];
532 const int in_h = block_size_high[block_idx];
533
534 DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
535 dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
536 int sub_x_q3, sub_y_q3;
537 int subpel_search;
538 for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
539 ++subpel_search) {
540 for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
541 for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
542 for (int ii = 0; ii < 2; ii++) {
543 for (int jj = 0; jj < 4; jj++) {
544 dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[jj][ii];
545 dist_wtd_comp_params.bck_offset =
546 quant_dist_lookup_table[jj][1 - ii];
547
548 const int offset_r = 3 + rnd_.PseudoUniform(h - in_h - 7);
549 const int offset_c = 3 + rnd_.PseudoUniform(w - in_w - 7);
550
551 aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
552 nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output),
553 CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c, in_w,
554 in_h, sub_x_q3, sub_y_q3,
555 CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c, in_w, bd,
556 &dist_wtd_comp_params, subpel_search);
557 test_impl(nullptr, nullptr, 0, 0, nullptr,
558 CONVERT_TO_BYTEPTR(output2),
559 CONVERT_TO_BYTEPTR(pred8) + offset_r * w + offset_c,
560 in_w, in_h, sub_x_q3, sub_y_q3,
561 CONVERT_TO_BYTEPTR(ref8) + offset_r * w + offset_c,
562 in_w, bd, &dist_wtd_comp_params, subpel_search);
563
564 for (int i = 0; i < in_h; ++i) {
565 for (int j = 0; j < in_w; ++j) {
566 int idx = i * in_w + j;
567 ASSERT_EQ(output[idx], output2[idx])
568 << "Mismatch at unit tests for "
569 "AV1HighBDDistWtdCompAvgUpsampledTest\n"
570 << in_w << "x" << in_h << " Pixel mismatch at index "
571 << idx << " = (" << i << ", " << j
572 << "), sub pixel offset = (" << sub_y_q3 << ", "
573 << sub_x_q3 << ")";
574 }
575 }
576 }
577 }
578 }
579 }
580 }
581 }
RunSpeedTest(highbddistwtdcompavgupsampled_func test_impl)582 void RunSpeedTest(highbddistwtdcompavgupsampled_func test_impl) {
583 const int w = kMaxSize, h = kMaxSize;
584 const int block_idx = GET_PARAM(2);
585 const int bd = GET_PARAM(0);
586 uint16_t pred8[kMaxSize * kMaxSize];
587 uint16_t ref8[kMaxSize * kMaxSize];
588 DECLARE_ALIGNED(16, uint16_t, output[kMaxSize * kMaxSize]);
589 DECLARE_ALIGNED(16, uint16_t, output2[kMaxSize * kMaxSize]);
590
591 for (int i = 0; i < h; ++i)
592 for (int j = 0; j < w; ++j) {
593 pred8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
594 ref8[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
595 }
596 const int in_w = block_size_wide[block_idx];
597 const int in_h = block_size_high[block_idx];
598
599 DIST_WTD_COMP_PARAMS dist_wtd_comp_params;
600 dist_wtd_comp_params.use_dist_wtd_comp_avg = 1;
601
602 dist_wtd_comp_params.fwd_offset = quant_dist_lookup_table[0][0];
603 dist_wtd_comp_params.bck_offset = quant_dist_lookup_table[0][1];
604 int sub_x_q3 = 0;
605 int sub_y_q3 = 0;
606 const int num_loops = 1000000000 / (in_w + in_h);
607 aom_usec_timer timer;
608 aom_usec_timer_start(&timer);
609 int subpel_search = USE_8_TAPS; // set to USE_4_TAPS to test 4-tap filter.
610 for (int i = 0; i < num_loops; ++i)
611 aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
612 nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output),
613 CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
614 CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params,
615 subpel_search);
616
617 aom_usec_timer_mark(&timer);
618 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
619 printf("highbddistwtdcompavgupsampled c_code %3dx%-3d: %7.2f us\n", in_w,
620 in_h, 1000.0 * elapsed_time / num_loops);
621
622 aom_usec_timer timer1;
623 aom_usec_timer_start(&timer1);
624
625 for (int i = 0; i < num_loops; ++i)
626 test_impl(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(output2),
627 CONVERT_TO_BYTEPTR(pred8), in_w, in_h, sub_x_q3, sub_y_q3,
628 CONVERT_TO_BYTEPTR(ref8), in_w, bd, &dist_wtd_comp_params,
629 subpel_search);
630
631 aom_usec_timer_mark(&timer1);
632 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));
633 printf("highbddistwtdcompavgupsampled test_code %3dx%-3d: %7.2f us\n", in_w,
634 in_h, 1000.0 * elapsed_time1 / num_loops);
635 }
636
637 libaom_test::ACMRandom rnd_;
638 }; // class AV1HighBDDistWtdCompAvgUpsampledTest
639
640 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(
641 AV1HighBDDistWtdCompAvgUpsampledTest);
642 #endif // CONFIG_AV1_HIGHBITDEPTH
643
TEST_P(AV1DistWtdCompAvgTest,DISABLED_Speed)644 TEST_P(AV1DistWtdCompAvgTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
645
TEST_P(AV1DistWtdCompAvgTest,CheckOutput)646 TEST_P(AV1DistWtdCompAvgTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
647
648 #if HAVE_SSSE3
649 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1DistWtdCompAvgTest,
650 BuildParams(aom_dist_wtd_comp_avg_pred_ssse3));
651 #endif
652
TEST_P(DistWtdCompAvgTest,MaxRef)653 TEST_P(DistWtdCompAvgTest, MaxRef) {
654 FillConstant(reference_data_, reference_stride_, mask_);
655 FillConstant(second_pred_, width_, 0);
656 CheckCompAvg();
657 }
658
TEST_P(DistWtdCompAvgTest,MaxSecondPred)659 TEST_P(DistWtdCompAvgTest, MaxSecondPred) {
660 FillConstant(reference_data_, reference_stride_, 0);
661 FillConstant(second_pred_, width_, mask_);
662 CheckCompAvg();
663 }
664
TEST_P(DistWtdCompAvgTest,ShortRef)665 TEST_P(DistWtdCompAvgTest, ShortRef) {
666 const int tmp_stride = reference_stride_;
667 reference_stride_ >>= 1;
668 FillRandom(reference_data_, reference_stride_);
669 FillRandom(second_pred_, width_);
670 CheckCompAvg();
671 reference_stride_ = tmp_stride;
672 }
673
TEST_P(DistWtdCompAvgTest,UnalignedRef)674 TEST_P(DistWtdCompAvgTest, UnalignedRef) {
675 // The reference frame, but not the source frame, may be unaligned for
676 // certain types of searches.
677 const int tmp_stride = reference_stride_;
678 reference_stride_ -= 1;
679 FillRandom(reference_data_, reference_stride_);
680 FillRandom(second_pred_, width_);
681 CheckCompAvg();
682 reference_stride_ = tmp_stride;
683 }
684
685 // TODO(chengchen): add highbd tests
686 const DistWtdCompAvgParam dist_wtd_comp_avg_c_tests[] = {
687 make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_c, -1),
688 make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
689 make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_c, -1),
690 make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
691 make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
692 make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
693 make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
694 make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
695 make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
696 make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
697 make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
698 make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
699 make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
700 make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
701 make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
702 make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
703
704 #if !CONFIG_REALTIME_ONLY
705 make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
706 make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_c, -1),
707 make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_c, -1),
708 make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_c, -1),
709 make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_c, -1),
710 make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_c, -1),
711 #endif
712 };
713
714 INSTANTIATE_TEST_SUITE_P(C, DistWtdCompAvgTest,
715 ::testing::ValuesIn(dist_wtd_comp_avg_c_tests));
716
717 #if HAVE_SSSE3
718 const DistWtdCompAvgParam dist_wtd_comp_avg_ssse3_tests[] = {
719 make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
720 make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
721 make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
722 make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
723 make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
724 make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
725 make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
726 make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
727 make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
728 make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
729 make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
730 make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
731 make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
732 make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
733 make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
734 make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
735 make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
736 #if !CONFIG_REALTIME_ONLY
737 make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
738 make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
739 make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
740 make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
741 make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
742 make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_ssse3, -1),
743 #endif
744 };
745
746 INSTANTIATE_TEST_SUITE_P(SSSE3, DistWtdCompAvgTest,
747 ::testing::ValuesIn(dist_wtd_comp_avg_ssse3_tests));
748 #endif // HAVE_SSSE3
749
750 #if HAVE_NEON
751 const DistWtdCompAvgParam dist_wtd_comp_avg_neon_tests[] = {
752 make_tuple(128, 128, &aom_dist_wtd_comp_avg_pred_neon, -1),
753 make_tuple(128, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
754 make_tuple(64, 128, &aom_dist_wtd_comp_avg_pred_neon, -1),
755 make_tuple(64, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
756 make_tuple(64, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
757 make_tuple(32, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
758 make_tuple(32, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
759 make_tuple(32, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
760 make_tuple(16, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
761 make_tuple(16, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
762 make_tuple(16, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
763 make_tuple(8, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
764 make_tuple(8, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
765 make_tuple(8, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
766 make_tuple(4, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
767 make_tuple(4, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
768 #if !CONFIG_REALTIME_ONLY
769 make_tuple(64, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
770 make_tuple(16, 64, &aom_dist_wtd_comp_avg_pred_neon, -1),
771 make_tuple(32, 8, &aom_dist_wtd_comp_avg_pred_neon, -1),
772 make_tuple(8, 32, &aom_dist_wtd_comp_avg_pred_neon, -1),
773 make_tuple(16, 4, &aom_dist_wtd_comp_avg_pred_neon, -1),
774 make_tuple(4, 16, &aom_dist_wtd_comp_avg_pred_neon, -1),
775 #endif // !CONFIG_REALTIME_ONLY
776 };
777
778 INSTANTIATE_TEST_SUITE_P(NEON, DistWtdCompAvgTest,
779 ::testing::ValuesIn(dist_wtd_comp_avg_neon_tests));
780 #endif // HAVE_NEON
781
782 #if CONFIG_AV1_HIGHBITDEPTH
TEST_P(AV1HighBDDistWtdCompAvgTest,DISABLED_Speed)783 TEST_P(AV1HighBDDistWtdCompAvgTest, DISABLED_Speed) {
784 RunSpeedTest(GET_PARAM(1));
785 }
786
TEST_P(AV1HighBDDistWtdCompAvgTest,CheckOutput)787 TEST_P(AV1HighBDDistWtdCompAvgTest, CheckOutput) {
788 RunCheckOutput(GET_PARAM(1));
789 }
790
791 #if HAVE_SSE2
792 INSTANTIATE_TEST_SUITE_P(SSE2, AV1HighBDDistWtdCompAvgTest,
793 BuildParams(aom_highbd_dist_wtd_comp_avg_pred_sse2,
794 1));
795 #endif
796
797 #if HAVE_NEON
798 INSTANTIATE_TEST_SUITE_P(NEON, AV1HighBDDistWtdCompAvgTest,
799 BuildParams(aom_highbd_dist_wtd_comp_avg_pred_neon,
800 1));
801 #endif
802
TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest,DISABLED_Speed)803 TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest, DISABLED_Speed) {
804 RunSpeedTest(GET_PARAM(1));
805 }
806
TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest,CheckOutput)807 TEST_P(AV1HighBDDistWtdCompAvgUpsampledTest, CheckOutput) {
808 RunCheckOutput(GET_PARAM(1));
809 }
810
811 #if HAVE_SSE2
812 INSTANTIATE_TEST_SUITE_P(
813 SSE2, AV1HighBDDistWtdCompAvgUpsampledTest,
814 BuildParams(aom_highbd_dist_wtd_comp_avg_upsampled_pred_sse2));
815 #endif
816
817 #if HAVE_NEON
818 INSTANTIATE_TEST_SUITE_P(
819 NEON, AV1HighBDDistWtdCompAvgUpsampledTest,
820 BuildParams(aom_highbd_dist_wtd_comp_avg_upsampled_pred_neon));
821 #endif
822
823 #endif // CONFIG_AV1_HIGHBITDEPTH
824
825 } // namespace
826