1 /*
2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <cstddef>
13 #include <cstdint>
14 #include <ostream>
15 #include <set>
16 #include <vector>
17 #include "config/av1_rtcd.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "aom_ports/aom_timer.h"
20 #include "gtest/gtest.h"
21 #include "test/acm_random.h"
22
23 namespace {
24
25 // TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter
26 // is tested once 12-tap filter SIMD is done.
27 #undef INTERP_FILTERS_ALL
28 #define INTERP_FILTERS_ALL 4
29
30 // All single reference convolve tests are parameterized on block size,
31 // bit-depth, and function to test.
32 //
33 // Note that parameterizing on these variables (and not other parameters) is
34 // a conscious decision - Jenkins needs some degree of parallelization to run
35 // the tests within the time limit, but if the number of parameters increases
36 // too much, the gtest framework does not handle it well (increased overhead per
37 // test, huge amount of output to stdout, etc.).
38 //
39 // Also note that the test suites must be named with the architecture, e.g.,
40 // C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests
41 // that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86
42 // binaries) and will disable tests using a filter like
43 // --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the
44 // testing infrastructure will not selectively filter them properly.
45 class BlockSize {
46 public:
BlockSize(int w,int h)47 BlockSize(int w, int h) : width_(w), height_(h) {}
48
Width() const49 int Width() const { return width_; }
Height() const50 int Height() const { return height_; }
51
operator <(const BlockSize & other) const52 bool operator<(const BlockSize &other) const {
53 if (Width() == other.Width()) {
54 return Height() < other.Height();
55 }
56 return Width() < other.Width();
57 }
58
operator ==(const BlockSize & other) const59 bool operator==(const BlockSize &other) const {
60 return Width() == other.Width() && Height() == other.Height();
61 }
62
63 private:
64 int width_;
65 int height_;
66 };
67
68 // Block size / bit depth / test function used to parameterize the tests.
69 template <typename T>
70 class TestParam {
71 public:
TestParam(const BlockSize & block,int bd,T test_func)72 TestParam(const BlockSize &block, int bd, T test_func)
73 : block_(block), bd_(bd), test_func_(test_func) {}
74
Block() const75 const BlockSize &Block() const { return block_; }
BitDepth() const76 int BitDepth() const { return bd_; }
TestFunction() const77 T TestFunction() const { return test_func_; }
78
operator ==(const TestParam & other) const79 bool operator==(const TestParam &other) const {
80 return Block() == other.Block() && BitDepth() == other.BitDepth() &&
81 TestFunction() == other.TestFunction();
82 }
83
84 private:
85 BlockSize block_;
86 int bd_;
87 T test_func_;
88 };
89
90 template <typename T>
operator <<(std::ostream & os,const TestParam<T> & test_arg)91 std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) {
92 return os << "TestParam { width:" << test_arg.Block().Width()
93 << " height:" << test_arg.Block().Height()
94 << " bd:" << test_arg.BitDepth() << " }";
95 }
96
97 // Generate the list of all block widths / heights that need to be tested,
98 // includes chroma and luma sizes, for the given bit-depths. The test
99 // function is the same for all generated parameters.
100 template <typename T>
GetTestParams(std::initializer_list<int> bit_depths,T test_func)101 std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths,
102 T test_func) {
103 std::set<BlockSize> sizes;
104 for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
105 const int w = block_size_wide[b];
106 const int h = block_size_high[b];
107 sizes.insert(BlockSize(w, h));
108 // Add in smaller chroma sizes as well.
109 if (w == 4 || h == 4) {
110 sizes.insert(BlockSize(w / 2, h / 2));
111 }
112 }
113 std::vector<TestParam<T>> result;
114 for (const BlockSize &block : sizes) {
115 for (int bd : bit_depths) {
116 result.push_back(TestParam<T>(block, bd, test_func));
117 }
118 }
119 return result;
120 }
121
122 template <typename T>
GetLowbdTestParams(T test_func)123 std::vector<TestParam<T>> GetLowbdTestParams(T test_func) {
124 return GetTestParams({ 8 }, test_func);
125 }
126
127 template <typename T>
BuildLowbdParams(T test_func)128 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams(
129 T test_func) {
130 return ::testing::ValuesIn(GetLowbdTestParams(test_func));
131 }
132
133 // Test the test-parameters generators work as expected.
134 class AV1ConvolveParametersTest : public ::testing::Test {};
135
TEST_F(AV1ConvolveParametersTest,GetLowbdTestParams)136 TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) {
137 auto v = GetLowbdTestParams(av1_convolve_x_sr_c);
138 ASSERT_EQ(27U, v.size());
139 for (const auto &p : v) {
140 ASSERT_EQ(8, p.BitDepth());
141 // Needed (instead of ASSERT_EQ(...) since gtest does not
142 // have built in printing for arbitrary functions, which
143 // causes a compilation error.
144 bool same_fn = av1_convolve_x_sr_c == p.TestFunction();
145 ASSERT_TRUE(same_fn);
146 }
147 }
148
149 #if CONFIG_AV1_HIGHBITDEPTH
150 template <typename T>
GetHighbdTestParams(T test_func)151 std::vector<TestParam<T>> GetHighbdTestParams(T test_func) {
152 return GetTestParams({ 10, 12 }, test_func);
153 }
154
155 template <typename T>
BuildHighbdParams(T test_func)156 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams(
157 T test_func) {
158 return ::testing::ValuesIn(GetHighbdTestParams(test_func));
159 }
160
TEST_F(AV1ConvolveParametersTest,GetHighbdTestParams)161 TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) {
162 auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c);
163 ASSERT_EQ(54U, v.size());
164 int num_10 = 0;
165 int num_12 = 0;
166 for (const auto &p : v) {
167 ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12);
168 bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction();
169 ASSERT_TRUE(same_fn);
170 if (p.BitDepth() == 10) {
171 ++num_10;
172 } else {
173 ++num_12;
174 }
175 }
176 ASSERT_EQ(num_10, num_12);
177 }
178 #endif // CONFIG_AV1_HIGHBITDEPTH
179
180 // AV1ConvolveTest is the base class that all convolve tests should derive from.
181 // It provides storage/methods for generating randomized buffers for both
182 // low bit-depth and high bit-depth, and setup/teardown methods for clearing
183 // system state. Implementors can get the bit-depth / block-size /
184 // test function by calling GetParam().
185 template <typename T>
186 class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {
187 public:
188 ~AV1ConvolveTest() override = default;
189
SetUp()190 void SetUp() override {
191 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
192 }
193
194 // Randomizes the 8-bit input buffer and returns a pointer to it. Note that
195 // the pointer is safe to use with an 8-tap filter. The stride can range
196 // from width to (width + kPadding). Also note that the pointer is to the
197 // same memory location.
198 static constexpr int kInputPadding = 12;
199
200 // Get a pointer to a buffer with stride == width. Note that we must have
201 // the test param passed in explicitly -- the gtest framework does not
202 // support calling GetParam() within a templatized class.
203 // Note that FirstRandomInput8 always returns the same pointer -- if two
204 // inputs are needed, also use SecondRandomInput8.
FirstRandomInput8(const TestParam<T> & param)205 const uint8_t *FirstRandomInput8(const TestParam<T> ¶m) {
206 // Note we can't call GetParam() directly -- gtest does not support
207 // this for parameterized types.
208 return RandomInput8(input8_1_, param);
209 }
210
SecondRandomInput8(const TestParam<T> & param)211 const uint8_t *SecondRandomInput8(const TestParam<T> ¶m) {
212 return RandomInput8(input8_2_, param);
213 }
214
215 // Some of the intrinsics perform writes in 32 byte chunks. Moreover, some
216 // of the instrinsics assume that the stride is also a multiple of 32.
217 // To satisfy these constraints and also remain simple, output buffer strides
218 // are assumed MAX_SB_SIZE.
219 static constexpr int kOutputStride = MAX_SB_SIZE;
220
221 // Check that two 8-bit output buffers are identical.
AssertOutputBufferEq(const uint8_t * p1,const uint8_t * p2,int width,int height)222 void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width,
223 int height) {
224 ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations";
225 for (int j = 0; j < height; ++j) {
226 if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
227 p1 += kOutputStride;
228 p2 += kOutputStride;
229 continue;
230 }
231 for (int i = 0; i < width; ++i) {
232 ASSERT_EQ(p1[i], p2[i])
233 << width << "x" << height << " Pixel mismatch at (" << i << ", "
234 << j << ")";
235 }
236 }
237 }
238
239 // Check that two 16-bit output buffers are identical.
AssertOutputBufferEq(const uint16_t * p1,const uint16_t * p2,int width,int height)240 void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width,
241 int height) {
242 ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations";
243 for (int j = 0; j < height; ++j) {
244 if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
245 p1 += kOutputStride;
246 p2 += kOutputStride;
247 continue;
248 }
249 for (int i = 0; i < width; ++i) {
250 ASSERT_EQ(p1[i], p2[i])
251 << width << "x" << height << " Pixel mismatch at (" << i << ", "
252 << j << ")";
253 }
254 }
255 }
256
257 #if CONFIG_AV1_HIGHBITDEPTH
258 // Note that the randomized values are capped by bit-depth.
FirstRandomInput16(const TestParam<T> & param)259 const uint16_t *FirstRandomInput16(const TestParam<T> ¶m) {
260 return RandomInput16(input16_1_, param);
261 }
262
SecondRandomInput16(const TestParam<T> & param)263 const uint16_t *SecondRandomInput16(const TestParam<T> ¶m) {
264 return RandomInput16(input16_2_, param);
265 }
266 #endif
267
268 private:
RandomInput8(uint8_t * p,const TestParam<T> & param)269 const uint8_t *RandomInput8(uint8_t *p, const TestParam<T> ¶m) {
270 EXPECT_EQ(8, param.BitDepth());
271 EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
272 EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
273 const int padded_width = param.Block().Width() + kInputPadding;
274 const int padded_height = param.Block().Height() + kInputPadding;
275 Randomize(p, padded_width * padded_height);
276 return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
277 }
278
Randomize(uint8_t * p,int size)279 void Randomize(uint8_t *p, int size) {
280 for (int i = 0; i < size; ++i) {
281 p[i] = rnd_.Rand8();
282 }
283 }
284
285 #if CONFIG_AV1_HIGHBITDEPTH
RandomInput16(uint16_t * p,const TestParam<T> & param)286 const uint16_t *RandomInput16(uint16_t *p, const TestParam<T> ¶m) {
287 // Check that this is only called with high bit-depths.
288 EXPECT_TRUE(param.BitDepth() == 10 || param.BitDepth() == 12);
289 EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
290 EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
291 const int padded_width = param.Block().Width() + kInputPadding;
292 const int padded_height = param.Block().Height() + kInputPadding;
293 Randomize(p, padded_width * padded_height, param.BitDepth());
294 return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
295 }
296
Randomize(uint16_t * p,int size,int bit_depth)297 void Randomize(uint16_t *p, int size, int bit_depth) {
298 for (int i = 0; i < size; ++i) {
299 p[i] = rnd_.Rand16() & ((1 << bit_depth) - 1);
300 }
301 }
302 #endif
303
304 static constexpr int kInputStride = MAX_SB_SIZE + kInputPadding;
305
306 libaom_test::ACMRandom rnd_;
307 // Statically allocate all the memory that is needed for the tests. Note
308 // that we cannot allocate output memory here. It must use DECLARE_ALIGNED,
309 // which is a C99 feature and interacts badly with C++ member variables.
310 uint8_t input8_1_[kInputStride * kInputStride];
311 uint8_t input8_2_[kInputStride * kInputStride];
312 #if CONFIG_AV1_HIGHBITDEPTH
313 uint16_t input16_1_[kInputStride * kInputStride];
314 uint16_t input16_2_[kInputStride * kInputStride];
315 #endif
316 };
317
318 ////////////////////////////////////////////////////////
319 // Single reference convolve-x functions (low bit-depth)
320 ////////////////////////////////////////////////////////
321 typedef void (*convolve_x_func)(const uint8_t *src, int src_stride,
322 uint8_t *dst, int dst_stride, int w, int h,
323 const InterpFilterParams *filter_params_x,
324 const int subpel_x_qn,
325 ConvolveParams *conv_params);
326
327 class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> {
328 public:
RunTest()329 void RunTest() {
330 // Do not test the no-op filter.
331 for (int sub_x = 1; sub_x < 16; ++sub_x) {
332 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
333 ++filter) {
334 InterpFilter f = static_cast<InterpFilter>(filter);
335 TestConvolve(sub_x, f);
336 }
337 }
338 }
339
340 public:
SpeedTest()341 void SpeedTest() {
342 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
343 ++filter) {
344 InterpFilter f = static_cast<InterpFilter>(filter);
345 TestConvolveSpeed(f, 10000);
346 }
347 }
348
349 private:
TestConvolve(const int sub_x,const InterpFilter filter)350 void TestConvolve(const int sub_x, const InterpFilter filter) {
351 const int width = GetParam().Block().Width();
352 const int height = GetParam().Block().Height();
353
354 const InterpFilterParams *filter_params_x =
355 av1_get_interp_filter_params_with_block_size(filter, width);
356 ConvolveParams conv_params1 =
357 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
358 const uint8_t *input = FirstRandomInput8(GetParam());
359 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
360 av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
361 filter_params_x, sub_x, &conv_params1);
362
363 ConvolveParams conv_params2 =
364 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
365 convolve_x_func test_func = GetParam().TestFunction();
366 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
367 test_func(input, width, test, kOutputStride, width, height, filter_params_x,
368 sub_x, &conv_params2);
369 AssertOutputBufferEq(reference, test, width, height);
370 }
371
372 private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)373 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
374 const int width = GetParam().Block().Width();
375 const int height = GetParam().Block().Height();
376
377 const InterpFilterParams *filter_params_x =
378 av1_get_interp_filter_params_with_block_size(filter, width);
379 ConvolveParams conv_params1 =
380 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
381 const uint8_t *input = FirstRandomInput8(GetParam());
382 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
383
384 aom_usec_timer timer;
385 aom_usec_timer_start(&timer);
386 for (int i = 0; i < num_iters; ++i) {
387 av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
388 filter_params_x, 0, &conv_params1);
389 }
390 aom_usec_timer_mark(&timer);
391 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
392 ConvolveParams conv_params2 =
393 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
394 convolve_x_func test_func = GetParam().TestFunction();
395 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
396
397 aom_usec_timer_start(&timer);
398 for (int i = 0; i < num_iters; ++i) {
399 test_func(input, width, test, kOutputStride, width, height,
400 filter_params_x, 0, &conv_params2);
401 }
402 aom_usec_timer_mark(&timer);
403 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
404 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
405 time2, time1 / time2);
406 }
407 };
408
TEST_P(AV1ConvolveXTest,RunTest)409 TEST_P(AV1ConvolveXTest, RunTest) { RunTest(); }
410
TEST_P(AV1ConvolveXTest,DISABLED_SpeedTest)411 TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) { SpeedTest(); }
412
413 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXTest,
414 BuildLowbdParams(av1_convolve_x_sr_c));
415
416 #if HAVE_SSE2
417 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXTest,
418 BuildLowbdParams(av1_convolve_x_sr_sse2));
419 #endif
420
421 #if HAVE_AVX2
422 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXTest,
423 BuildLowbdParams(av1_convolve_x_sr_avx2));
424 #endif
425
426 #if HAVE_NEON
427 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest,
428 BuildLowbdParams(av1_convolve_x_sr_neon));
429 #endif
430
431 #if HAVE_NEON_DOTPROD
432 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest,
433 BuildLowbdParams(av1_convolve_x_sr_neon_dotprod));
434 #endif
435
436 #if HAVE_NEON_I8MM
437 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest,
438 BuildLowbdParams(av1_convolve_x_sr_neon_i8mm));
439 #endif
440
441 ////////////////////////////////////////////////////////////////
442 // Single reference convolve-x IntraBC functions (low bit-depth)
443 ////////////////////////////////////////////////////////////////
444
445 class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> {
446 public:
RunTest()447 void RunTest() {
448 // IntraBC functions only operate for subpel_x_qn = 8.
449 constexpr int kSubX = 8;
450 const int width = GetParam().Block().Width();
451 const int height = GetParam().Block().Height();
452 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
453 const uint8_t *input = FirstRandomInput8(GetParam());
454
455 ConvolveParams conv_params1 =
456 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
457 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
458 // Use a stride different from width to avoid potential storing errors that
459 // would go undetected. The input buffer is filled using a padding of 12, so
460 // the stride can be anywhere between width and width + 12.
461 av1_convolve_x_sr_intrabc_c(input, width + 2, reference, kOutputStride,
462 width, height, filter_params_x, kSubX,
463 &conv_params1);
464
465 ConvolveParams conv_params2 =
466 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
467 convolve_x_func test_func = GetParam().TestFunction();
468 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
469 test_func(input, width + 2, test, kOutputStride, width, height,
470 filter_params_x, kSubX, &conv_params2);
471
472 AssertOutputBufferEq(reference, test, width, height);
473 }
474
SpeedTest()475 void SpeedTest() {
476 constexpr int kNumIters = 10000;
477 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
478 const int width = GetParam().Block().Width();
479 const int height = GetParam().Block().Height();
480 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
481 const uint8_t *input = FirstRandomInput8(GetParam());
482
483 ConvolveParams conv_params1 =
484 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
485 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
486 aom_usec_timer timer;
487 aom_usec_timer_start(&timer);
488 for (int i = 0; i < kNumIters; ++i) {
489 av1_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, width,
490 height, filter_params_x, 0, &conv_params1);
491 }
492 aom_usec_timer_mark(&timer);
493 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
494
495 ConvolveParams conv_params2 =
496 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
497 convolve_x_func test_func = GetParam().TestFunction();
498 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
499 aom_usec_timer_start(&timer);
500 for (int i = 0; i < kNumIters; ++i) {
501 test_func(input, width, test, kOutputStride, width, height,
502 filter_params_x, 0, &conv_params2);
503 }
504 aom_usec_timer_mark(&timer);
505 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
506
507 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
508 time2, time1 / time2);
509 }
510 };
511
TEST_P(AV1ConvolveXIntraBCTest,RunTest)512 TEST_P(AV1ConvolveXIntraBCTest, RunTest) { RunTest(); }
513
TEST_P(AV1ConvolveXIntraBCTest,DISABLED_SpeedTest)514 TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
515
516 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXIntraBCTest,
517 BuildLowbdParams(av1_convolve_x_sr_intrabc_c));
518
519 #if HAVE_NEON
520 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest,
521 BuildLowbdParams(av1_convolve_x_sr_intrabc_neon));
522 #endif
523
524 #if CONFIG_AV1_HIGHBITDEPTH
525 /////////////////////////////////////////////////////////
526 // Single reference convolve-x functions (high bit-depth)
527 /////////////////////////////////////////////////////////
528 typedef void (*highbd_convolve_x_func)(
529 const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
530 int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn,
531 ConvolveParams *conv_params, int bd);
532
533 class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> {
534 public:
RunTest()535 void RunTest() {
536 // Do not test the no-op filter.
537 for (int sub_x = 1; sub_x < 16; ++sub_x) {
538 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
539 ++filter) {
540 InterpFilter f = static_cast<InterpFilter>(filter);
541 TestConvolve(sub_x, f);
542 }
543 }
544 }
545
546 public:
SpeedTest()547 void SpeedTest() {
548 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
549 ++filter) {
550 InterpFilter f = static_cast<InterpFilter>(filter);
551 TestConvolveSpeed(f, 10000);
552 }
553 }
554
555 private:
TestConvolve(const int sub_x,const InterpFilter filter)556 void TestConvolve(const int sub_x, const InterpFilter filter) {
557 const int width = GetParam().Block().Width();
558 const int height = GetParam().Block().Height();
559 const int bit_depth = GetParam().BitDepth();
560 const InterpFilterParams *filter_params_x =
561 av1_get_interp_filter_params_with_block_size(filter, width);
562 ConvolveParams conv_params1 =
563 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
564 const uint16_t *input = FirstRandomInput16(GetParam());
565 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
566 av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
567 height, filter_params_x, sub_x, &conv_params1,
568 bit_depth);
569
570 ConvolveParams conv_params2 =
571 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
572 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
573 GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
574 filter_params_x, sub_x, &conv_params2, bit_depth);
575 AssertOutputBufferEq(reference, test, width, height);
576 }
577
578 private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)579 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
580 const int width = GetParam().Block().Width();
581 const int height = GetParam().Block().Height();
582 const int bit_depth = GetParam().BitDepth();
583 const InterpFilterParams *filter_params_x =
584 av1_get_interp_filter_params_with_block_size(filter, width);
585 ConvolveParams conv_params1 =
586 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
587 const uint16_t *input = FirstRandomInput16(GetParam());
588 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
589
590 aom_usec_timer timer;
591 aom_usec_timer_start(&timer);
592 for (int i = 0; i < num_iters; ++i) {
593 av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
594 height, filter_params_x, 0, &conv_params1,
595 bit_depth);
596 }
597 aom_usec_timer_mark(&timer);
598 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
599 ConvolveParams conv_params2 =
600 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
601 highbd_convolve_x_func test_func = GetParam().TestFunction();
602 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
603
604 aom_usec_timer_start(&timer);
605 for (int i = 0; i < num_iters; ++i) {
606 test_func(input, width, test, kOutputStride, width, height,
607 filter_params_x, 0, &conv_params2, bit_depth);
608 }
609 aom_usec_timer_mark(&timer);
610 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
611 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
612 time2, time1 / time2);
613 }
614 };
615
TEST_P(AV1ConvolveXHighbdTest,RunTest)616 TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); }
617
TEST_P(AV1ConvolveXHighbdTest,DISABLED_SpeedTest)618 TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
619
620 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest,
621 BuildHighbdParams(av1_highbd_convolve_x_sr_c));
622
623 #if HAVE_SSSE3
624 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest,
625 BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3));
626 #endif
627
628 #if HAVE_AVX2
629 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest,
630 BuildHighbdParams(av1_highbd_convolve_x_sr_avx2));
631 #endif
632
633 #if HAVE_NEON
634 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest,
635 BuildHighbdParams(av1_highbd_convolve_x_sr_neon));
636 #endif
637
638 #if HAVE_SVE2
639 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveXHighbdTest,
640 BuildHighbdParams(av1_highbd_convolve_x_sr_sve2));
641 #endif
642
643 /////////////////////////////////////////////////////////////////
644 // Single reference convolve-x IntraBC functions (high bit-depth)
645 /////////////////////////////////////////////////////////////////
646
647 class AV1ConvolveXHighbdIntraBCTest
648 : public AV1ConvolveTest<highbd_convolve_x_func> {
649 public:
RunTest()650 void RunTest() {
651 // IntraBC functions only operate for subpel_x_qn = 8.
652 constexpr int kSubX = 8;
653 const int width = GetParam().Block().Width();
654 const int height = GetParam().Block().Height();
655 const int bit_depth = GetParam().BitDepth();
656 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
657 const uint16_t *input = FirstRandomInput16(GetParam());
658
659 ConvolveParams conv_params1 =
660 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
661 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
662 // Use a stride different from width to avoid potential storing errors that
663 // would go undetected. The input buffer is filled using a padding of 12, so
664 // the stride can be anywhere between width and width + 12.
665 av1_highbd_convolve_x_sr_intrabc_c(
666 input, width + 2, reference, kOutputStride, width, height,
667 filter_params_x, kSubX, &conv_params1, bit_depth);
668
669 ConvolveParams conv_params2 =
670 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
671 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
672 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
673 height, filter_params_x, kSubX, &conv_params2,
674 bit_depth);
675
676 AssertOutputBufferEq(reference, test, width, height);
677 }
678
SpeedTest()679 void SpeedTest() {
680 constexpr int kNumIters = 10000;
681 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
682 const int width = GetParam().Block().Width();
683 const int height = GetParam().Block().Height();
684 const int bit_depth = GetParam().BitDepth();
685 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
686 const uint16_t *input = FirstRandomInput16(GetParam());
687
688 ConvolveParams conv_params1 =
689 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
690 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
691 aom_usec_timer timer;
692 aom_usec_timer_start(&timer);
693 for (int i = 0; i < kNumIters; ++i) {
694 av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride,
695 width, height, filter_params_x, 0,
696 &conv_params1, bit_depth);
697 }
698 aom_usec_timer_mark(&timer);
699 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
700
701 ConvolveParams conv_params2 =
702 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
703 highbd_convolve_x_func test_func = GetParam().TestFunction();
704 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
705 aom_usec_timer_start(&timer);
706 for (int i = 0; i < kNumIters; ++i) {
707 test_func(input, width, test, kOutputStride, width, height,
708 filter_params_x, 0, &conv_params2, bit_depth);
709 }
710 aom_usec_timer_mark(&timer);
711 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
712
713 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
714 time2, time1 / time2);
715 }
716 };
717
TEST_P(AV1ConvolveXHighbdIntraBCTest,RunTest)718 TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); }
719
TEST_P(AV1ConvolveXHighbdIntraBCTest,DISABLED_SpeedTest)720 TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
721
722 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest,
723 BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c));
724
725 #if HAVE_NEON
726 INSTANTIATE_TEST_SUITE_P(
727 NEON, AV1ConvolveXHighbdIntraBCTest,
728 BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon));
729 #endif
730
731 #endif // CONFIG_AV1_HIGHBITDEPTH
732
733 ////////////////////////////////////////////////////////
734 // Single reference convolve-y functions (low bit-depth)
735 ////////////////////////////////////////////////////////
736 typedef void (*convolve_y_func)(const uint8_t *src, int src_stride,
737 uint8_t *dst, int dst_stride, int w, int h,
738 const InterpFilterParams *filter_params_y,
739 const int subpel_y_qn);
740
741 class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> {
742 public:
RunTest()743 void RunTest() {
744 // Do not test the no-op filter.
745 for (int sub_y = 1; sub_y < 16; ++sub_y) {
746 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
747 ++filter) {
748 InterpFilter f = static_cast<InterpFilter>(filter);
749 TestConvolve(sub_y, f);
750 }
751 }
752 }
753
754 public:
SpeedTest()755 void SpeedTest() {
756 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
757 ++filter) {
758 InterpFilter f = static_cast<InterpFilter>(filter);
759 TestConvolveSpeed(f, 10000);
760 }
761 }
762
763 private:
TestConvolve(const int sub_y,const InterpFilter filter)764 void TestConvolve(const int sub_y, const InterpFilter filter) {
765 const int width = GetParam().Block().Width();
766 const int height = GetParam().Block().Height();
767
768 const InterpFilterParams *filter_params_y =
769 av1_get_interp_filter_params_with_block_size(filter, height);
770 const uint8_t *input = FirstRandomInput8(GetParam());
771 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
772 av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
773 filter_params_y, sub_y);
774 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
775 GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
776 filter_params_y, sub_y);
777 AssertOutputBufferEq(reference, test, width, height);
778 }
779
780 private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)781 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
782 const int width = GetParam().Block().Width();
783 const int height = GetParam().Block().Height();
784
785 const InterpFilterParams *filter_params_y =
786 av1_get_interp_filter_params_with_block_size(filter, height);
787 const uint8_t *input = FirstRandomInput8(GetParam());
788 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
789
790 aom_usec_timer timer;
791 aom_usec_timer_start(&timer);
792 for (int i = 0; i < num_iters; ++i) {
793 av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
794 filter_params_y, 0);
795 }
796 aom_usec_timer_mark(&timer);
797 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
798
799 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
800
801 aom_usec_timer_start(&timer);
802 for (int i = 0; i < num_iters; ++i) {
803 GetParam().TestFunction()(input, width, test, kOutputStride, width,
804 height, filter_params_y, 0);
805 }
806 aom_usec_timer_mark(&timer);
807 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
808 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
809 time2, time1 / time2);
810 }
811 };
812
TEST_P(AV1ConvolveYTest,RunTest)813 TEST_P(AV1ConvolveYTest, RunTest) { RunTest(); }
814
TEST_P(AV1ConvolveYTest,DISABLED_SpeedTest)815 TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) { SpeedTest(); }
816
817 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYTest,
818 BuildLowbdParams(av1_convolve_y_sr_c));
819
820 #if HAVE_SSE2
821 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYTest,
822 BuildLowbdParams(av1_convolve_y_sr_sse2));
823 #endif
824
825 #if HAVE_AVX2
826 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYTest,
827 BuildLowbdParams(av1_convolve_y_sr_avx2));
828 #endif
829
830 #if HAVE_NEON
831 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest,
832 BuildLowbdParams(av1_convolve_y_sr_neon));
833 #endif
834
835 #if HAVE_NEON_DOTPROD
836 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveYTest,
837 BuildLowbdParams(av1_convolve_y_sr_neon_dotprod));
838 #endif
839
840 #if HAVE_NEON_I8MM
841 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveYTest,
842 BuildLowbdParams(av1_convolve_y_sr_neon_i8mm));
843 #endif
844
845 ////////////////////////////////////////////////////////////////
846 // Single reference convolve-y IntraBC functions (low bit-depth)
847 ////////////////////////////////////////////////////////////////
848
849 class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> {
850 public:
RunTest()851 void RunTest() {
852 // IntraBC functions only operate for subpel_y_qn = 8.
853 constexpr int kSubY = 8;
854 const int width = GetParam().Block().Width();
855 const int height = GetParam().Block().Height();
856 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
857 const uint8_t *input = FirstRandomInput8(GetParam());
858
859 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
860 // Use a stride different from width to avoid potential storing errors that
861 // would go undetected. The input buffer is filled using a padding of 12, so
862 // the stride can be anywhere between width and width + 12.
863 av1_convolve_y_sr_intrabc_c(input, width + 2, reference, kOutputStride,
864 width, height, filter_params_y, kSubY);
865
866 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
867 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
868 height, filter_params_y, kSubY);
869
870 AssertOutputBufferEq(reference, test, width, height);
871 }
872
SpeedTest()873 void SpeedTest() {
874 constexpr int kNumIters = 10000;
875 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
876 const int width = GetParam().Block().Width();
877 const int height = GetParam().Block().Height();
878
879 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
880 const uint8_t *input = FirstRandomInput8(GetParam());
881 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
882
883 aom_usec_timer timer;
884 aom_usec_timer_start(&timer);
885 for (int i = 0; i < kNumIters; ++i) {
886 av1_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, width,
887 height, filter_params_y, 0);
888 }
889 aom_usec_timer_mark(&timer);
890 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
891
892 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
893 convolve_y_func test_func = GetParam().TestFunction();
894 aom_usec_timer_start(&timer);
895 for (int i = 0; i < kNumIters; ++i) {
896 test_func(input, width, test, kOutputStride, width, height,
897 filter_params_y, 0);
898 }
899 aom_usec_timer_mark(&timer);
900 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
901
902 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
903 time2, time1 / time2);
904 }
905 };
906
TEST_P(AV1ConvolveYIntraBCTest,RunTest)907 TEST_P(AV1ConvolveYIntraBCTest, RunTest) { RunTest(); }
908
TEST_P(AV1ConvolveYIntraBCTest,DISABLED_SpeedTest)909 TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
910
911 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYIntraBCTest,
912 BuildLowbdParams(av1_convolve_y_sr_intrabc_c));
913
914 #if HAVE_NEON
915 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest,
916 BuildLowbdParams(av1_convolve_y_sr_intrabc_neon));
917 #endif
918
919 #if CONFIG_AV1_HIGHBITDEPTH
920 /////////////////////////////////////////////////////////
921 // Single reference convolve-y functions (high bit-depth)
922 /////////////////////////////////////////////////////////
923 typedef void (*highbd_convolve_y_func)(
924 const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
925 int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn,
926 int bd);
927
928 class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> {
929 public:
RunTest()930 void RunTest() {
931 // Do not test the no-op filter.
932 for (int sub_y = 1; sub_y < 16; ++sub_y) {
933 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
934 ++filter) {
935 InterpFilter f = static_cast<InterpFilter>(filter);
936 TestConvolve(sub_y, f);
937 }
938 }
939 }
940
941 public:
SpeedTest()942 void SpeedTest() {
943 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
944 ++filter) {
945 InterpFilter f = static_cast<InterpFilter>(filter);
946 TestConvolveSpeed(f, 10000);
947 }
948 }
949
950 private:
TestConvolve(const int sub_y,const InterpFilter filter)951 void TestConvolve(const int sub_y, const InterpFilter filter) {
952 const int width = GetParam().Block().Width();
953 const int height = GetParam().Block().Height();
954 const int bit_depth = GetParam().BitDepth();
955 const InterpFilterParams *filter_params_y =
956 av1_get_interp_filter_params_with_block_size(filter, height);
957 const uint16_t *input = FirstRandomInput16(GetParam());
958 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
959 av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
960 height, filter_params_y, sub_y, bit_depth);
961 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
962 GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
963 filter_params_y, sub_y, bit_depth);
964 AssertOutputBufferEq(reference, test, width, height);
965 }
966
967 private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)968 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
969 const int width = GetParam().Block().Width();
970 const int height = GetParam().Block().Height();
971 const int bit_depth = GetParam().BitDepth();
972 const InterpFilterParams *filter_params_y =
973 av1_get_interp_filter_params_with_block_size(filter, width);
974 const uint16_t *input = FirstRandomInput16(GetParam());
975 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
976
977 aom_usec_timer timer;
978 aom_usec_timer_start(&timer);
979 for (int i = 0; i < num_iters; ++i) {
980 av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
981 height, filter_params_y, 0, bit_depth);
982 }
983 aom_usec_timer_mark(&timer);
984 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
985 highbd_convolve_y_func test_func = GetParam().TestFunction();
986 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
987
988 aom_usec_timer_start(&timer);
989 for (int i = 0; i < num_iters; ++i) {
990 test_func(input, width, test, kOutputStride, width, height,
991 filter_params_y, 0, bit_depth);
992 }
993 aom_usec_timer_mark(&timer);
994 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
995 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
996 time2, time1 / time2);
997 }
998 };
999
TEST_P(AV1ConvolveYHighbdTest,RunTest)1000 TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); }
1001
TEST_P(AV1ConvolveYHighbdTest,DISABLED_SpeedTest)1002 TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
1003
1004 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest,
1005 BuildHighbdParams(av1_highbd_convolve_y_sr_c));
1006
1007 #if HAVE_SSSE3
1008 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest,
1009 BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3));
1010 #endif
1011
1012 #if HAVE_AVX2
1013 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest,
1014 BuildHighbdParams(av1_highbd_convolve_y_sr_avx2));
1015 #endif
1016
1017 #if HAVE_NEON
1018 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest,
1019 BuildHighbdParams(av1_highbd_convolve_y_sr_neon));
1020 #endif
1021
1022 #if HAVE_SVE2
1023 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveYHighbdTest,
1024 BuildHighbdParams(av1_highbd_convolve_y_sr_sve2));
1025 #endif
1026
1027 /////////////////////////////////////////////////////////////////
1028 // Single reference convolve-y IntraBC functions (high bit-depth)
1029 /////////////////////////////////////////////////////////////////
1030
1031 class AV1ConvolveYHighbdIntraBCTest
1032 : public AV1ConvolveTest<highbd_convolve_y_func> {
1033 public:
RunTest()1034 void RunTest() {
1035 // IntraBC functions only operate for subpel_y_qn = 8.
1036 constexpr int kSubY = 8;
1037 const int width = GetParam().Block().Width();
1038 const int height = GetParam().Block().Height();
1039 const int bit_depth = GetParam().BitDepth();
1040 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1041 const uint16_t *input = FirstRandomInput16(GetParam());
1042
1043 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1044 // Use a stride different from width to avoid potential storing errors that
1045 // would go undetected. The input buffer is filled using a padding of 12, so
1046 // the stride can be anywhere between width and width + 12.
1047 av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference,
1048 kOutputStride, width, height,
1049 filter_params_y, kSubY, bit_depth);
1050
1051 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1052 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1053 height, filter_params_y, kSubY, bit_depth);
1054
1055 AssertOutputBufferEq(reference, test, width, height);
1056 }
1057
SpeedTest()1058 void SpeedTest() {
1059 constexpr int kNumIters = 10000;
1060 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
1061 const int width = GetParam().Block().Width();
1062 const int height = GetParam().Block().Height();
1063 const int bit_depth = GetParam().BitDepth();
1064 const InterpFilterParams *filter_params_y =
1065 av1_get_interp_filter_params_with_block_size(filter, width);
1066 const uint16_t *input = FirstRandomInput16(GetParam());
1067
1068 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1069 aom_usec_timer timer;
1070 aom_usec_timer_start(&timer);
1071 for (int i = 0; i < kNumIters; ++i) {
1072 av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride,
1073 width, height, filter_params_y, 0,
1074 bit_depth);
1075 }
1076 aom_usec_timer_mark(&timer);
1077 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1078
1079 highbd_convolve_y_func test_func = GetParam().TestFunction();
1080 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1081 aom_usec_timer_start(&timer);
1082 for (int i = 0; i < kNumIters; ++i) {
1083 test_func(input, width, test, kOutputStride, width, height,
1084 filter_params_y, 0, bit_depth);
1085 }
1086 aom_usec_timer_mark(&timer);
1087 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1088
1089 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
1090 time2, time1 / time2);
1091 }
1092 };
1093
TEST_P(AV1ConvolveYHighbdIntraBCTest,RunTest)1094 TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); }
1095
TEST_P(AV1ConvolveYHighbdIntraBCTest,DISABLED_SpeedTest)1096 TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1097
1098 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest,
1099 BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c));
1100
1101 #if HAVE_NEON
1102 INSTANTIATE_TEST_SUITE_P(
1103 NEON, AV1ConvolveYHighbdIntraBCTest,
1104 BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon));
1105 #endif
1106
1107 #endif // CONFIG_AV1_HIGHBITDEPTH
1108
1109 //////////////////////////////////////////////////////////////
1110 // Single reference convolve-copy functions (low bit-depth)
1111 //////////////////////////////////////////////////////////////
1112 typedef void (*convolve_copy_func)(const uint8_t *src, ptrdiff_t src_stride,
1113 uint8_t *dst, ptrdiff_t dst_stride, int w,
1114 int h);
1115
1116 class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> {
1117 public:
RunTest()1118 void RunTest() {
1119 const int width = GetParam().Block().Width();
1120 const int height = GetParam().Block().Height();
1121 const uint8_t *input = FirstRandomInput8(GetParam());
1122 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1123 aom_convolve_copy_c(input, width, reference, kOutputStride, width, height);
1124 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1125 GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
1126 AssertOutputBufferEq(reference, test, width, height);
1127 }
1128 };
1129
1130 // Note that even though these are AOM convolve functions, we are using the
1131 // newer AV1 test framework.
TEST_P(AV1ConvolveCopyTest,RunTest)1132 TEST_P(AV1ConvolveCopyTest, RunTest) { RunTest(); }
1133
1134 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyTest,
1135 BuildLowbdParams(aom_convolve_copy_c));
1136
1137 #if HAVE_SSE2
1138 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyTest,
1139 BuildLowbdParams(aom_convolve_copy_sse2));
1140 #endif
1141
1142 #if HAVE_AVX2
1143 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyTest,
1144 BuildLowbdParams(aom_convolve_copy_avx2));
1145 #endif
1146
1147 #if HAVE_NEON
1148 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest,
1149 BuildLowbdParams(aom_convolve_copy_neon));
1150 #endif
1151
1152 #if CONFIG_AV1_HIGHBITDEPTH
1153 ///////////////////////////////////////////////////////////////
1154 // Single reference convolve-copy functions (high bit-depth)
1155 ///////////////////////////////////////////////////////////////
1156 typedef void (*highbd_convolve_copy_func)(const uint16_t *src,
1157 ptrdiff_t src_stride, uint16_t *dst,
1158 ptrdiff_t dst_stride, int w, int h);
1159
1160 class AV1ConvolveCopyHighbdTest
1161 : public AV1ConvolveTest<highbd_convolve_copy_func> {
1162 public:
RunTest()1163 void RunTest() {
1164 const BlockSize &block = GetParam().Block();
1165 const int width = block.Width();
1166 const int height = block.Height();
1167 const uint16_t *input = FirstRandomInput16(GetParam());
1168 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1169 aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width,
1170 height);
1171 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1172 GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
1173 AssertOutputBufferEq(reference, test, width, height);
1174 }
1175 };
1176
TEST_P(AV1ConvolveCopyHighbdTest,RunTest)1177 TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); }
1178
1179 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest,
1180 BuildHighbdParams(aom_highbd_convolve_copy_c));
1181
1182 #if HAVE_SSE2
1183 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest,
1184 BuildHighbdParams(aom_highbd_convolve_copy_sse2));
1185 #endif
1186
1187 #if HAVE_AVX2
1188 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest,
1189 BuildHighbdParams(aom_highbd_convolve_copy_avx2));
1190 #endif
1191
1192 #if HAVE_NEON
1193 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest,
1194 BuildHighbdParams(aom_highbd_convolve_copy_neon));
1195 #endif
1196
1197 #endif // CONFIG_AV1_HIGHBITDEPTH
1198
1199 /////////////////////////////////////////////////////////
1200 // Single reference convolve-2D functions (low bit-depth)
1201 /////////////////////////////////////////////////////////
1202 typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
1203 uint8_t *dst, int dst_stride, int w, int h,
1204 const InterpFilterParams *filter_params_x,
1205 const InterpFilterParams *filter_params_y,
1206 const int subpel_x_qn, const int subpel_y_qn,
1207 ConvolveParams *conv_params);
1208
1209 class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> {
1210 public:
RunTest()1211 void RunTest() {
1212 // Do not test the no-op filter.
1213 for (int sub_x = 1; sub_x < 16; ++sub_x) {
1214 for (int sub_y = 1; sub_y < 16; ++sub_y) {
1215 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1216 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1217 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1218 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1219 continue;
1220 TestConvolve(static_cast<InterpFilter>(h_f),
1221 static_cast<InterpFilter>(v_f), sub_x, sub_y);
1222 }
1223 }
1224 }
1225 }
1226 }
1227
1228 public:
SpeedTest()1229 void SpeedTest() {
1230 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1231 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1232 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1233 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1234 continue;
1235 TestConvolveSpeed(static_cast<InterpFilter>(h_f),
1236 static_cast<InterpFilter>(v_f), 10000);
1237 }
1238 }
1239 }
1240
1241 private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)1242 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
1243 const int sub_x, const int sub_y) {
1244 const int width = GetParam().Block().Width();
1245 const int height = GetParam().Block().Height();
1246 const InterpFilterParams *filter_params_x =
1247 av1_get_interp_filter_params_with_block_size(h_f, width);
1248 const InterpFilterParams *filter_params_y =
1249 av1_get_interp_filter_params_with_block_size(v_f, height);
1250 const uint8_t *input = FirstRandomInput8(GetParam());
1251 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1252 ConvolveParams conv_params1 =
1253 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1254 av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, height,
1255 filter_params_x, filter_params_y, sub_x, sub_y,
1256 &conv_params1);
1257 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1258 ConvolveParams conv_params2 =
1259 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1260 GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
1261 filter_params_x, filter_params_y, sub_x, sub_y,
1262 &conv_params2);
1263 AssertOutputBufferEq(reference, test, width, height);
1264 }
1265
1266 private:
TestConvolveSpeed(const InterpFilter h_f,const InterpFilter v_f,int num_iters)1267 void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
1268 int num_iters) {
1269 const int width = GetParam().Block().Width();
1270 const int height = GetParam().Block().Height();
1271 const InterpFilterParams *filter_params_x =
1272 av1_get_interp_filter_params_with_block_size(h_f, width);
1273 const InterpFilterParams *filter_params_y =
1274 av1_get_interp_filter_params_with_block_size(v_f, height);
1275 const uint8_t *input = FirstRandomInput8(GetParam());
1276 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1277 ConvolveParams conv_params1 =
1278 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1279 aom_usec_timer timer;
1280 aom_usec_timer_start(&timer);
1281 for (int i = 0; i < num_iters; ++i) {
1282 av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1283 height, filter_params_x, filter_params_y, 0, 0,
1284 &conv_params1);
1285 }
1286 aom_usec_timer_mark(&timer);
1287 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1288 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1289 ConvolveParams conv_params2 =
1290 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1291 aom_usec_timer_start(&timer);
1292 for (int i = 0; i < num_iters; ++i) {
1293 GetParam().TestFunction()(input, width, test, kOutputStride, width,
1294 height, filter_params_x, filter_params_y, 0, 0,
1295 &conv_params2);
1296 }
1297 aom_usec_timer_mark(&timer);
1298 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1299 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1300 time1, time2, time1 / time2);
1301 }
1302 };
1303
TEST_P(AV1Convolve2DTest,RunTest)1304 TEST_P(AV1Convolve2DTest, RunTest) { RunTest(); }
1305
TEST_P(AV1Convolve2DTest,DISABLED_SpeedTest)1306 TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) { SpeedTest(); }
1307
1308 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DTest,
1309 BuildLowbdParams(av1_convolve_2d_sr_c));
1310
1311 #if HAVE_SSE2
1312 INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DTest,
1313 BuildLowbdParams(av1_convolve_2d_sr_sse2));
1314 #endif
1315
1316 #if HAVE_AVX2
1317 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DTest,
1318 BuildLowbdParams(av1_convolve_2d_sr_avx2));
1319 #endif
1320
1321 #if HAVE_NEON
1322 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest,
1323 BuildLowbdParams(av1_convolve_2d_sr_neon));
1324 #endif
1325
1326 #if HAVE_NEON_DOTPROD
1327 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest,
1328 BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod));
1329 #endif
1330
1331 #if HAVE_NEON_I8MM
1332 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest,
1333 BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm));
1334 #endif
1335
1336 #if HAVE_SVE2
1337 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DTest,
1338 BuildLowbdParams(av1_convolve_2d_sr_sve2));
1339 #endif
1340
1341 /////////////////////////////////////////////////////////////////
1342 // Single reference convolve-2D IntraBC functions (low bit-depth)
1343 /////////////////////////////////////////////////////////////////
1344
1345 class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> {
1346 public:
RunTest()1347 void RunTest() {
1348 // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
1349 constexpr int kSubX = 8;
1350 constexpr int kSubY = 8;
1351 const int width = GetParam().Block().Width();
1352 const int height = GetParam().Block().Height();
1353 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1354 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1355 const uint8_t *input = FirstRandomInput8(GetParam());
1356
1357 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1358 ConvolveParams conv_params1 =
1359 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1360 // Use a stride different from width to avoid potential storing errors that
1361 // would go undetected. The input buffer is filled using a padding of 12, so
1362 // the stride can be anywhere between width and width + 12.
1363 av1_convolve_2d_sr_intrabc_c(input, width + 2, reference, kOutputStride,
1364 width, height, filter_params_x,
1365 filter_params_y, kSubX, kSubY, &conv_params1);
1366
1367 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1368 ConvolveParams conv_params2 =
1369 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1370 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1371 height, filter_params_x, filter_params_y, kSubX,
1372 kSubY, &conv_params2);
1373
1374 AssertOutputBufferEq(reference, test, width, height);
1375 }
1376
SpeedTest()1377 void SpeedTest() {
1378 constexpr int kNumIters = 10000;
1379 const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
1380 const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
1381 const int width = GetParam().Block().Width();
1382 const int height = GetParam().Block().Height();
1383 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1384 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1385 const uint8_t *input = FirstRandomInput8(GetParam());
1386
1387 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1388 ConvolveParams conv_params1 =
1389 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1390 aom_usec_timer timer;
1391 aom_usec_timer_start(&timer);
1392 for (int i = 0; i < kNumIters; ++i) {
1393 av1_convolve_2d_sr_intrabc_c(input, width, reference, kOutputStride,
1394 width, height, filter_params_x,
1395 filter_params_y, 8, 8, &conv_params1);
1396 }
1397 aom_usec_timer_mark(&timer);
1398 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1399
1400 convolve_2d_func test_func = GetParam().TestFunction();
1401 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1402 ConvolveParams conv_params2 =
1403 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1404 aom_usec_timer_start(&timer);
1405 for (int i = 0; i < kNumIters; ++i) {
1406 test_func(input, width, test, kOutputStride, width, height,
1407 filter_params_x, filter_params_y, 8, 8, &conv_params2);
1408 }
1409 aom_usec_timer_mark(&timer);
1410 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1411
1412 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1413 time1, time2, time1 / time2);
1414 }
1415 };
1416
TEST_P(AV1Convolve2DIntraBCTest,RunTest)1417 TEST_P(AV1Convolve2DIntraBCTest, RunTest) { RunTest(); }
1418
TEST_P(AV1Convolve2DIntraBCTest,DISABLED_SpeedTest)1419 TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1420
1421 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DIntraBCTest,
1422 BuildLowbdParams(av1_convolve_2d_sr_intrabc_c));
1423
1424 #if HAVE_NEON
1425 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest,
1426 BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon));
1427 #endif
1428
1429 #if CONFIG_AV1_HIGHBITDEPTH
1430 //////////////////////////////////////////////////////////
1431 // Single reference convolve-2d functions (high bit-depth)
1432 //////////////////////////////////////////////////////////
1433
1434 typedef void (*highbd_convolve_2d_func)(
1435 const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
1436 int h, const InterpFilterParams *filter_params_x,
1437 const InterpFilterParams *filter_params_y, const int subpel_x_qn,
1438 const int subpel_y_qn, ConvolveParams *conv_params, int bd);
1439
1440 class AV1Convolve2DHighbdTest
1441 : public AV1ConvolveTest<highbd_convolve_2d_func> {
1442 public:
RunTest()1443 void RunTest() {
1444 // Do not test the no-op filter.
1445 for (int sub_x = 1; sub_x < 16; ++sub_x) {
1446 for (int sub_y = 1; sub_y < 16; ++sub_y) {
1447 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1448 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1449 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1450 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1451 continue;
1452 TestConvolve(static_cast<InterpFilter>(h_f),
1453 static_cast<InterpFilter>(v_f), sub_x, sub_y);
1454 }
1455 }
1456 }
1457 }
1458 }
1459
1460 public:
SpeedTest()1461 void SpeedTest() {
1462 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1463 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1464 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1465 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1466 continue;
1467 TestConvolveSpeed(static_cast<InterpFilter>(h_f),
1468 static_cast<InterpFilter>(v_f), 10000);
1469 }
1470 }
1471 }
1472
1473 private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)1474 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
1475 const int sub_x, const int sub_y) {
1476 const int width = GetParam().Block().Width();
1477 const int height = GetParam().Block().Height();
1478 const int bit_depth = GetParam().BitDepth();
1479 const InterpFilterParams *filter_params_x =
1480 av1_get_interp_filter_params_with_block_size(h_f, width);
1481 const InterpFilterParams *filter_params_y =
1482 av1_get_interp_filter_params_with_block_size(v_f, height);
1483 const uint16_t *input = FirstRandomInput16(GetParam());
1484 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1485 ConvolveParams conv_params1 =
1486 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1487 av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1488 height, filter_params_x, filter_params_y, sub_x,
1489 sub_y, &conv_params1, bit_depth);
1490 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1491 ConvolveParams conv_params2 =
1492 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1493 GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
1494 filter_params_x, filter_params_y, sub_x, sub_y,
1495 &conv_params2, bit_depth);
1496 AssertOutputBufferEq(reference, test, width, height);
1497 }
1498
TestConvolveSpeed(const InterpFilter h_f,const InterpFilter v_f,int num_iters)1499 void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
1500 int num_iters) {
1501 const int width = GetParam().Block().Width();
1502 const int height = GetParam().Block().Height();
1503 const int bit_depth = GetParam().BitDepth();
1504 const InterpFilterParams *filter_params_x =
1505 av1_get_interp_filter_params_with_block_size(h_f, width);
1506 const InterpFilterParams *filter_params_y =
1507 av1_get_interp_filter_params_with_block_size(v_f, height);
1508 const uint16_t *input = FirstRandomInput16(GetParam());
1509 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1510 ConvolveParams conv_params1 =
1511 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1512 aom_usec_timer timer;
1513 aom_usec_timer_start(&timer);
1514 for (int i = 0; i < num_iters; ++i) {
1515 av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1516 height, filter_params_x, filter_params_y, 0,
1517 0, &conv_params1, bit_depth);
1518 }
1519 aom_usec_timer_mark(&timer);
1520 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1521 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1522 ConvolveParams conv_params2 =
1523 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1524 aom_usec_timer_start(&timer);
1525 for (int i = 0; i < num_iters; ++i) {
1526 GetParam().TestFunction()(input, width, test, kOutputStride, width,
1527 height, filter_params_x, filter_params_y, 0, 0,
1528 &conv_params2, bit_depth);
1529 }
1530 aom_usec_timer_mark(&timer);
1531 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1532 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1533 time1, time2, time1 / time2);
1534 }
1535 };
1536
TEST_P(AV1Convolve2DHighbdTest,RunTest)1537 TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); }
1538
TEST_P(AV1Convolve2DHighbdTest,DISABLED_SpeedTest)1539 TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
1540
1541 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest,
1542 BuildHighbdParams(av1_highbd_convolve_2d_sr_c));
1543
1544 #if HAVE_SSSE3
1545 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest,
1546 BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3));
1547 #endif
1548
1549 #if HAVE_AVX2
1550 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest,
1551 BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2));
1552 #endif
1553
1554 #if HAVE_NEON
1555 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest,
1556 BuildHighbdParams(av1_highbd_convolve_2d_sr_neon));
1557 #endif
1558
1559 #if HAVE_SVE2
1560 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DHighbdTest,
1561 BuildHighbdParams(av1_highbd_convolve_2d_sr_sve2));
1562 #endif
1563
1564 //////////////////////////////////////////////////////////////////
1565 // Single reference convolve-2d IntraBC functions (high bit-depth)
1566 //////////////////////////////////////////////////////////////////
1567
1568 class AV1Convolve2DHighbdIntraBCTest
1569 : public AV1ConvolveTest<highbd_convolve_2d_func> {
1570 public:
RunTest()1571 void RunTest() {
1572 // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
1573 constexpr int kSubX = 8;
1574 constexpr int kSubY = 8;
1575 const int width = GetParam().Block().Width();
1576 const int height = GetParam().Block().Height();
1577 const int bit_depth = GetParam().BitDepth();
1578 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1579 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1580 const uint16_t *input = FirstRandomInput16(GetParam());
1581
1582 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1583 ConvolveParams conv_params1 =
1584 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1585 // Use a stride different from width to avoid potential storing errors that
1586 // would go undetected. The input buffer is filled using a padding of 12, so
1587 // the stride can be anywhere between width and width + 12.
1588 av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference,
1589 kOutputStride, width, height,
1590 filter_params_x, filter_params_y, kSubX,
1591 kSubY, &conv_params1, bit_depth);
1592
1593 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1594 ConvolveParams conv_params2 =
1595 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1596 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1597 height, filter_params_x, filter_params_y, kSubX,
1598 kSubY, &conv_params2, bit_depth);
1599
1600 AssertOutputBufferEq(reference, test, width, height);
1601 }
1602
SpeedTest()1603 void SpeedTest() {
1604 constexpr int kNumIters = 10000;
1605 const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
1606 const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
1607 const int width = GetParam().Block().Width();
1608 const int height = GetParam().Block().Height();
1609 const int bit_depth = GetParam().BitDepth();
1610 const InterpFilterParams *filter_params_x =
1611 av1_get_interp_filter_params_with_block_size(h_f, width);
1612 const InterpFilterParams *filter_params_y =
1613 av1_get_interp_filter_params_with_block_size(v_f, height);
1614 const uint16_t *input = FirstRandomInput16(GetParam());
1615
1616 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1617 ConvolveParams conv_params1 =
1618 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1619 aom_usec_timer timer;
1620 aom_usec_timer_start(&timer);
1621 for (int i = 0; i < kNumIters; ++i) {
1622 av1_highbd_convolve_2d_sr_intrabc_c(
1623 input, width, reference, kOutputStride, width, height,
1624 filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth);
1625 }
1626 aom_usec_timer_mark(&timer);
1627 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1628
1629 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1630 highbd_convolve_2d_func test_func = GetParam().TestFunction();
1631 ConvolveParams conv_params2 =
1632 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1633 aom_usec_timer_start(&timer);
1634 for (int i = 0; i < kNumIters; ++i) {
1635 test_func(input, width, test, kOutputStride, width, height,
1636 filter_params_x, filter_params_y, 0, 0, &conv_params2,
1637 bit_depth);
1638 }
1639 aom_usec_timer_mark(&timer);
1640 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1641
1642 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1643 time1, time2, time1 / time2);
1644 }
1645 };
1646
TEST_P(AV1Convolve2DHighbdIntraBCTest,RunTest)1647 TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); }
1648
TEST_P(AV1Convolve2DHighbdIntraBCTest,DISABLED_SpeedTest)1649 TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1650
1651 INSTANTIATE_TEST_SUITE_P(
1652 C, AV1Convolve2DHighbdIntraBCTest,
1653 BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c));
1654
1655 #if HAVE_NEON
1656 INSTANTIATE_TEST_SUITE_P(
1657 NEON, AV1Convolve2DHighbdIntraBCTest,
1658 BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon));
1659 #endif
1660
1661 #endif // CONFIG_AV1_HIGHBITDEPTH
1662
1663 //////////////////////////
1664 // Compound Convolve Tests
1665 //////////////////////////
1666
1667 // The compound functions do not work for chroma block sizes. Provide
1668 // a function to generate test parameters for just luma block sizes.
1669 template <typename T>
GetLumaTestParams(std::initializer_list<int> bit_depths,T test_func)1670 std::vector<TestParam<T>> GetLumaTestParams(
1671 std::initializer_list<int> bit_depths, T test_func) {
1672 std::set<BlockSize> sizes;
1673 for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
1674 const int w = block_size_wide[b];
1675 const int h = block_size_high[b];
1676 sizes.insert(BlockSize(w, h));
1677 }
1678 std::vector<TestParam<T>> result;
1679 for (int bit_depth : bit_depths) {
1680 for (const auto &block : sizes) {
1681 result.push_back(TestParam<T>(block, bit_depth, test_func));
1682 }
1683 }
1684 return result;
1685 }
1686
1687 template <typename T>
GetLowbdLumaTestParams(T test_func)1688 std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) {
1689 return GetLumaTestParams({ 8 }, test_func);
1690 }
1691
1692 template <typename T>
BuildLowbdLumaParams(T test_func)1693 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams(
1694 T test_func) {
1695 return ::testing::ValuesIn(GetLowbdLumaTestParams(test_func));
1696 }
1697
TEST_F(AV1ConvolveParametersTest,GetLowbdLumaTestParams)1698 TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) {
1699 auto v = GetLowbdLumaTestParams(av1_dist_wtd_convolve_x_c);
1700 ASSERT_EQ(22U, v.size());
1701 for (const auto &e : v) {
1702 ASSERT_EQ(8, e.BitDepth());
1703 bool same_fn = av1_dist_wtd_convolve_x_c == e.TestFunction();
1704 ASSERT_TRUE(same_fn);
1705 }
1706 }
1707
1708 #if CONFIG_AV1_HIGHBITDEPTH
1709 template <typename T>
GetHighbdLumaTestParams(T test_func)1710 std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) {
1711 return GetLumaTestParams({ 10, 12 }, test_func);
1712 }
1713
TEST_F(AV1ConvolveParametersTest,GetHighbdLumaTestParams)1714 TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) {
1715 auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c);
1716 ASSERT_EQ(44U, v.size());
1717 int num_10 = 0;
1718 int num_12 = 0;
1719 for (const auto &e : v) {
1720 ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth());
1721 bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction();
1722 ASSERT_TRUE(same_fn);
1723 if (e.BitDepth() == 10) {
1724 ++num_10;
1725 } else {
1726 ++num_12;
1727 }
1728 }
1729 ASSERT_EQ(num_10, num_12);
1730 }
1731
1732 template <typename T>
BuildHighbdLumaParams(T test_func)1733 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams(
1734 T test_func) {
1735 return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func));
1736 }
1737
1738 #endif // CONFIG_AV1_HIGHBITDEPTH
1739
1740 // Compound cases also need to test different frame offsets and weightings.
1741 class CompoundParam {
1742 public:
CompoundParam(bool use_dist_wtd_comp_avg,int fwd_offset,int bck_offset)1743 CompoundParam(bool use_dist_wtd_comp_avg, int fwd_offset, int bck_offset)
1744 : use_dist_wtd_comp_avg_(use_dist_wtd_comp_avg), fwd_offset_(fwd_offset),
1745 bck_offset_(bck_offset) {}
1746
UseDistWtdCompAvg() const1747 bool UseDistWtdCompAvg() const { return use_dist_wtd_comp_avg_; }
FwdOffset() const1748 int FwdOffset() const { return fwd_offset_; }
BckOffset() const1749 int BckOffset() const { return bck_offset_; }
1750
1751 private:
1752 bool use_dist_wtd_comp_avg_;
1753 int fwd_offset_;
1754 int bck_offset_;
1755 };
1756
GetCompoundParams()1757 std::vector<CompoundParam> GetCompoundParams() {
1758 std::vector<CompoundParam> result;
1759 result.push_back(CompoundParam(false, 0, 0));
1760 for (int k = 0; k < 2; ++k) {
1761 for (int l = 0; l < 4; ++l) {
1762 result.push_back(CompoundParam(true, quant_dist_lookup_table[l][k],
1763 quant_dist_lookup_table[l][1 - k]));
1764 }
1765 }
1766 return result;
1767 }
1768
TEST_F(AV1ConvolveParametersTest,GetCompoundParams)1769 TEST_F(AV1ConvolveParametersTest, GetCompoundParams) {
1770 auto v = GetCompoundParams();
1771 ASSERT_EQ(9U, v.size());
1772 ASSERT_FALSE(v[0].UseDistWtdCompAvg());
1773 for (size_t i = 1; i < v.size(); ++i) {
1774 ASSERT_TRUE(v[i].UseDistWtdCompAvg());
1775 }
1776 }
1777
1778 ////////////////////////////////////////////////
1779 // Compound convolve-x functions (low bit-depth)
1780 ////////////////////////////////////////////////
1781
GetConvolveParams(int do_average,CONV_BUF_TYPE * conv_buf,int width,int bit_depth,const CompoundParam & compound)1782 ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf,
1783 int width, int bit_depth,
1784 const CompoundParam &compound) {
1785 ConvolveParams conv_params =
1786 get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth);
1787 conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg();
1788 conv_params.fwd_offset = compound.FwdOffset();
1789 conv_params.bck_offset = compound.BckOffset();
1790 return conv_params;
1791 }
1792
1793 class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> {
1794 public:
RunTest()1795 void RunTest() {
1796 auto compound_params = GetCompoundParams();
1797 // Do not test the no-op filter.
1798 for (int sub_pix = 1; sub_pix < 16; ++sub_pix) {
1799 for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
1800 for (const auto &c : compound_params) {
1801 TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
1802 }
1803 }
1804 }
1805 }
1806
1807 protected:
FilterParams(InterpFilter f,const BlockSize & block) const1808 virtual const InterpFilterParams *FilterParams(InterpFilter f,
1809 const BlockSize &block) const {
1810 return av1_get_interp_filter_params_with_block_size(f, block.Width());
1811 }
1812
ReferenceFunc() const1813 virtual convolve_x_func ReferenceFunc() const {
1814 return av1_dist_wtd_convolve_x_c;
1815 }
1816
1817 private:
TestConvolve(const int sub_pix,const InterpFilter filter,const CompoundParam & compound)1818 void TestConvolve(const int sub_pix, const InterpFilter filter,
1819 const CompoundParam &compound) {
1820 const int width = GetParam().Block().Width();
1821 const int height = GetParam().Block().Height();
1822 const uint8_t *input1 = FirstRandomInput8(GetParam());
1823 const uint8_t *input2 = SecondRandomInput8(GetParam());
1824 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1825 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
1826 Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
1827 compound, sub_pix, filter);
1828
1829 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1830 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
1831 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
1832 compound, sub_pix, filter);
1833
1834 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
1835 AssertOutputBufferEq(reference, test, width, height);
1836 }
1837
1838 private:
Convolve(convolve_x_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,CONV_BUF_TYPE * conv_buf,const CompoundParam & compound,const int sub_pix,const InterpFilter filter)1839 void Convolve(convolve_x_func test_func, const uint8_t *src1,
1840 const uint8_t *src2, uint8_t *dst, CONV_BUF_TYPE *conv_buf,
1841 const CompoundParam &compound, const int sub_pix,
1842 const InterpFilter filter) {
1843 const int width = GetParam().Block().Width();
1844 const int height = GetParam().Block().Height();
1845 const InterpFilterParams *filter_params =
1846 FilterParams(filter, GetParam().Block());
1847
1848 ConvolveParams conv_params =
1849 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
1850 test_func(src1, width, dst, kOutputStride, width, height, filter_params,
1851 sub_pix, &conv_params);
1852
1853 conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
1854 test_func(src2, width, dst, kOutputStride, width, height, filter_params,
1855 sub_pix, &conv_params);
1856 }
1857 };
1858
TEST_P(AV1ConvolveXCompoundTest,RunTest)1859 TEST_P(AV1ConvolveXCompoundTest, RunTest) { RunTest(); }
1860
1861 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXCompoundTest,
1862 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_c));
1863
1864 #if HAVE_SSE2
1865 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXCompoundTest,
1866 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_sse2));
1867 #endif
1868
1869 #if HAVE_AVX2
1870 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXCompoundTest,
1871 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_avx2));
1872 #endif
1873
1874 #if HAVE_NEON
1875 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest,
1876 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon));
1877 #endif
1878
1879 #if HAVE_NEON_DOTPROD
1880 INSTANTIATE_TEST_SUITE_P(
1881 NEON_DOTPROD, AV1ConvolveXCompoundTest,
1882 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod));
1883 #endif
1884
1885 #if HAVE_NEON_I8MM
1886 INSTANTIATE_TEST_SUITE_P(
1887 NEON_I8MM, AV1ConvolveXCompoundTest,
1888 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm));
1889 #endif
1890
1891 #if CONFIG_AV1_HIGHBITDEPTH
1892 /////////////////////////////////////////////////
1893 // Compound convolve-x functions (high bit-depth)
1894 /////////////////////////////////////////////////
1895 class AV1ConvolveXHighbdCompoundTest
1896 : public AV1ConvolveTest<highbd_convolve_x_func> {
1897 public:
RunTest()1898 void RunTest() {
1899 auto compound_params = GetCompoundParams();
1900 // Do not test the no-op filter.
1901 for (int sub_pix = 1; sub_pix < 16; ++sub_pix) {
1902 for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
1903 for (const auto &c : compound_params) {
1904 TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
1905 }
1906 }
1907 }
1908 }
1909
1910 protected:
FilterParams(InterpFilter f,const BlockSize & block) const1911 virtual const InterpFilterParams *FilterParams(InterpFilter f,
1912 const BlockSize &block) const {
1913 return av1_get_interp_filter_params_with_block_size(f, block.Width());
1914 }
1915
ReferenceFunc() const1916 virtual highbd_convolve_x_func ReferenceFunc() const {
1917 return av1_highbd_dist_wtd_convolve_x_c;
1918 }
1919
1920 private:
TestConvolve(const int sub_pix,const InterpFilter filter,const CompoundParam & compound)1921 void TestConvolve(const int sub_pix, const InterpFilter filter,
1922 const CompoundParam &compound) {
1923 const int width = GetParam().Block().Width();
1924 const int height = GetParam().Block().Height();
1925
1926 const uint16_t *input1 = FirstRandomInput16(GetParam());
1927 const uint16_t *input2 = SecondRandomInput16(GetParam());
1928 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1929 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
1930 Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
1931 compound, sub_pix, filter);
1932
1933 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1934 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
1935 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
1936 compound, sub_pix, filter);
1937
1938 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
1939 AssertOutputBufferEq(reference, test, width, height);
1940 }
1941
Convolve(highbd_convolve_x_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,CONV_BUF_TYPE * conv_buf,const CompoundParam & compound,const int sub_pix,const InterpFilter filter)1942 void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1,
1943 const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf,
1944 const CompoundParam &compound, const int sub_pix,
1945 const InterpFilter filter) {
1946 const int width = GetParam().Block().Width();
1947 const int height = GetParam().Block().Height();
1948 const int bit_depth = GetParam().BitDepth();
1949 const InterpFilterParams *filter_params =
1950 FilterParams(filter, GetParam().Block());
1951 ConvolveParams conv_params =
1952 GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
1953 test_func(src1, width, dst, kOutputStride, width, height, filter_params,
1954 sub_pix, &conv_params, bit_depth);
1955 conv_params =
1956 GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
1957 test_func(src2, width, dst, kOutputStride, width, height, filter_params,
1958 sub_pix, &conv_params, bit_depth);
1959 }
1960 };
1961
TEST_P(AV1ConvolveXHighbdCompoundTest,RunTest)1962 TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); }
1963
1964 INSTANTIATE_TEST_SUITE_P(
1965 C, AV1ConvolveXHighbdCompoundTest,
1966 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c));
1967
1968 #if HAVE_SSE4_1
1969 INSTANTIATE_TEST_SUITE_P(
1970 SSE4_1, AV1ConvolveXHighbdCompoundTest,
1971 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1));
1972 #endif
1973
1974 #if HAVE_AVX2
1975 INSTANTIATE_TEST_SUITE_P(
1976 AVX2, AV1ConvolveXHighbdCompoundTest,
1977 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2));
1978 #endif
1979
1980 #if HAVE_NEON
1981 INSTANTIATE_TEST_SUITE_P(
1982 NEON, AV1ConvolveXHighbdCompoundTest,
1983 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon));
1984 #endif
1985
1986 #if HAVE_SVE2
1987 INSTANTIATE_TEST_SUITE_P(
1988 SVE2, AV1ConvolveXHighbdCompoundTest,
1989 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sve2));
1990 #endif
1991
1992 #endif // CONFIG_AV1_HIGHBITDEPTH
1993
1994 ////////////////////////////////////////////////
1995 // Compound convolve-y functions (low bit-depth)
1996 ////////////////////////////////////////////////
1997
1998 // Note that the X and Y convolve functions have the same type signature and
1999 // logic; they only differentiate the filter parameters and reference function.
2000 class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest {
2001 protected:
FilterParams(InterpFilter f,const BlockSize & block) const2002 const InterpFilterParams *FilterParams(
2003 InterpFilter f, const BlockSize &block) const override {
2004 return av1_get_interp_filter_params_with_block_size(f, block.Height());
2005 }
2006
ReferenceFunc() const2007 convolve_x_func ReferenceFunc() const override {
2008 return av1_dist_wtd_convolve_y_c;
2009 }
2010 };
2011
TEST_P(AV1ConvolveYCompoundTest,RunTest)2012 TEST_P(AV1ConvolveYCompoundTest, RunTest) { RunTest(); }
2013
2014 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYCompoundTest,
2015 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_c));
2016
2017 #if HAVE_SSE2
2018 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYCompoundTest,
2019 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_sse2));
2020 #endif
2021
2022 #if HAVE_AVX2
2023 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYCompoundTest,
2024 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_avx2));
2025 #endif
2026
2027 #if HAVE_NEON
2028 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest,
2029 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon));
2030 #endif
2031
2032 #if CONFIG_AV1_HIGHBITDEPTH
2033 /////////////////////////////////////////////////
2034 // Compound convolve-y functions (high bit-depth)
2035 /////////////////////////////////////////////////
2036
2037 // Again, the X and Y convolve functions have the same type signature and logic.
2038 class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest {
ReferenceFunc() const2039 highbd_convolve_x_func ReferenceFunc() const override {
2040 return av1_highbd_dist_wtd_convolve_y_c;
2041 }
FilterParams(InterpFilter f,const BlockSize & block) const2042 const InterpFilterParams *FilterParams(
2043 InterpFilter f, const BlockSize &block) const override {
2044 return av1_get_interp_filter_params_with_block_size(f, block.Height());
2045 }
2046 };
2047
TEST_P(AV1ConvolveYHighbdCompoundTest,RunTest)2048 TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); }
2049
2050 INSTANTIATE_TEST_SUITE_P(
2051 C, AV1ConvolveYHighbdCompoundTest,
2052 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c));
2053
2054 #if HAVE_SSE4_1
2055 INSTANTIATE_TEST_SUITE_P(
2056 SSE4_1, AV1ConvolveYHighbdCompoundTest,
2057 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1));
2058 #endif
2059
2060 #if HAVE_AVX2
2061 INSTANTIATE_TEST_SUITE_P(
2062 AVX2, AV1ConvolveYHighbdCompoundTest,
2063 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2));
2064 #endif
2065
2066 #if HAVE_NEON
2067 INSTANTIATE_TEST_SUITE_P(
2068 NEON, AV1ConvolveYHighbdCompoundTest,
2069 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon));
2070 #endif
2071
2072 #if HAVE_SVE2
2073 INSTANTIATE_TEST_SUITE_P(
2074 SVE2, AV1ConvolveYHighbdCompoundTest,
2075 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sve2));
2076 #endif
2077
2078 #endif // CONFIG_AV1_HIGHBITDEPTH
2079
2080 //////////////////////////////////////////////////////
2081 // Compound convolve-2d-copy functions (low bit-depth)
2082 //////////////////////////////////////////////////////
2083 typedef void (*compound_conv_2d_copy_func)(const uint8_t *src, int src_stride,
2084 uint8_t *dst, int dst_stride, int w,
2085 int h, ConvolveParams *conv_params);
2086
2087 class AV1Convolve2DCopyCompoundTest
2088 : public AV1ConvolveTest<compound_conv_2d_copy_func> {
2089 public:
RunTest()2090 void RunTest() {
2091 auto compound_params = GetCompoundParams();
2092 for (const auto &compound : compound_params) {
2093 TestConvolve(compound);
2094 }
2095 }
SpeedTest()2096 void SpeedTest() {
2097 for (const auto &compound : GetCompoundParams()) {
2098 TestConvolveSpeed(compound, 100000);
2099 }
2100 }
2101
2102 private:
TestConvolve(const CompoundParam & compound)2103 void TestConvolve(const CompoundParam &compound) {
2104 const BlockSize &block = GetParam().Block();
2105 const int width = block.Width();
2106 const int height = block.Height();
2107
2108 const uint8_t *input1 = FirstRandomInput8(GetParam());
2109 const uint8_t *input2 = SecondRandomInput8(GetParam());
2110 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
2111 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2112 Convolve(av1_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
2113 reference_conv_buf, compound);
2114
2115 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
2116 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2117 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2118 compound);
2119
2120 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2121 AssertOutputBufferEq(reference, test, width, height);
2122 }
2123
TestConvolveSpeed(const CompoundParam & compound,const int num_iters)2124 void TestConvolveSpeed(const CompoundParam &compound, const int num_iters) {
2125 const int width = GetParam().Block().Width();
2126 const int height = GetParam().Block().Height();
2127
2128 const uint8_t *src0 = FirstRandomInput8(GetParam());
2129 const uint8_t *src1 = SecondRandomInput8(GetParam());
2130 DECLARE_ALIGNED(32, uint8_t, dst[MAX_SB_SQUARE]);
2131 DECLARE_ALIGNED(32, CONV_BUF_TYPE, conv_buf[MAX_SB_SQUARE]);
2132
2133 const auto test_func = GetParam().TestFunction();
2134
2135 ConvolveParams conv_params_0 =
2136 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2137 ConvolveParams conv_params_1 =
2138 GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2139
2140 aom_usec_timer timer;
2141 aom_usec_timer_start(&timer);
2142 for (int i = 0; i < num_iters; ++i) {
2143 av1_dist_wtd_convolve_2d_copy_c(src0, width, dst, kOutputStride, width,
2144 height, &conv_params_0);
2145 av1_dist_wtd_convolve_2d_copy_c(src1, width, dst, kOutputStride, width,
2146 height, &conv_params_1);
2147 }
2148 aom_usec_timer_mark(&timer);
2149 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
2150
2151 aom_usec_timer_start(&timer);
2152 for (int i = 0; i < num_iters; ++i) {
2153 test_func(src0, width, dst, kOutputStride, width, height, &conv_params_0);
2154 test_func(src1, width, dst, kOutputStride, width, height, &conv_params_1);
2155 }
2156 aom_usec_timer_mark(&timer);
2157 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
2158 printf("Dist Weighted: %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n",
2159 compound.UseDistWtdCompAvg(), width, height, time1, time2,
2160 time1 / time2);
2161 }
2162
Convolve(compound_conv_2d_copy_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,uint16_t * conv_buf,const CompoundParam & compound)2163 void Convolve(compound_conv_2d_copy_func test_func, const uint8_t *src1,
2164 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
2165 const CompoundParam &compound) {
2166 const BlockSize &block = GetParam().Block();
2167 const int width = block.Width();
2168 const int height = block.Height();
2169 ConvolveParams conv_params =
2170 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2171 test_func(src1, width, dst, kOutputStride, width, height, &conv_params);
2172
2173 conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2174 test_func(src2, width, dst, kOutputStride, width, height, &conv_params);
2175 }
2176 };
2177
TEST_P(AV1Convolve2DCopyCompoundTest,RunTest)2178 TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) { RunTest(); }
TEST_P(AV1Convolve2DCopyCompoundTest,DISABLED_SpeedTest)2179 TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) { SpeedTest(); }
2180
2181 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCopyCompoundTest,
2182 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_c));
2183
2184 #if HAVE_SSE2
2185 INSTANTIATE_TEST_SUITE_P(
2186 SSE2, AV1Convolve2DCopyCompoundTest,
2187 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_sse2));
2188 #endif
2189
2190 #if HAVE_AVX2
2191 INSTANTIATE_TEST_SUITE_P(
2192 AVX2, AV1Convolve2DCopyCompoundTest,
2193 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_avx2));
2194 #endif
2195
2196 #if HAVE_NEON
2197 INSTANTIATE_TEST_SUITE_P(
2198 NEON, AV1Convolve2DCopyCompoundTest,
2199 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon));
2200 #endif
2201
2202 #if CONFIG_AV1_HIGHBITDEPTH
2203 ///////////////////////////////////////////////////////
2204 // Compound convolve-2d-copy functions (high bit-depth)
2205 ///////////////////////////////////////////////////////
2206 typedef void (*highbd_compound_conv_2d_copy_func)(const uint16_t *src,
2207 int src_stride, uint16_t *dst,
2208 int dst_stride, int w, int h,
2209 ConvolveParams *conv_params,
2210 int bd);
2211
2212 class AV1Convolve2DCopyHighbdCompoundTest
2213 : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> {
2214 public:
RunTest()2215 void RunTest() {
2216 auto compound_params = GetCompoundParams();
2217 for (const auto &compound : compound_params) {
2218 TestConvolve(compound);
2219 }
2220 }
2221
2222 private:
TestConvolve(const CompoundParam & compound)2223 void TestConvolve(const CompoundParam &compound) {
2224 const BlockSize &block = GetParam().Block();
2225 const int width = block.Width();
2226 const int height = block.Height();
2227
2228 const uint16_t *input1 = FirstRandomInput16(GetParam());
2229 const uint16_t *input2 = SecondRandomInput16(GetParam());
2230 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
2231 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2232 Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
2233 reference_conv_buf, compound);
2234
2235 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
2236 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2237 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2238 compound);
2239
2240 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2241 AssertOutputBufferEq(reference, test, width, height);
2242 }
2243
Convolve(highbd_compound_conv_2d_copy_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,uint16_t * conv_buf,const CompoundParam & compound)2244 void Convolve(highbd_compound_conv_2d_copy_func test_func,
2245 const uint16_t *src1, const uint16_t *src2, uint16_t *dst,
2246 uint16_t *conv_buf, const CompoundParam &compound) {
2247 const BlockSize &block = GetParam().Block();
2248 const int width = block.Width();
2249 const int height = block.Height();
2250 const int bit_depth = GetParam().BitDepth();
2251
2252 ConvolveParams conv_params =
2253 GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
2254 test_func(src1, width, dst, kOutputStride, width, height, &conv_params,
2255 bit_depth);
2256
2257 conv_params =
2258 GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
2259 test_func(src2, width, dst, kOutputStride, width, height, &conv_params,
2260 bit_depth);
2261 }
2262 };
2263
TEST_P(AV1Convolve2DCopyHighbdCompoundTest,RunTest)2264 TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); }
2265
2266 INSTANTIATE_TEST_SUITE_P(
2267 C, AV1Convolve2DCopyHighbdCompoundTest,
2268 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c));
2269
2270 #if HAVE_SSE4_1
2271 INSTANTIATE_TEST_SUITE_P(
2272 SSE4_1, AV1Convolve2DCopyHighbdCompoundTest,
2273 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1));
2274 #endif
2275
2276 #if HAVE_AVX2
2277 INSTANTIATE_TEST_SUITE_P(
2278 AVX2, AV1Convolve2DCopyHighbdCompoundTest,
2279 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2));
2280 #endif
2281
2282 #if HAVE_NEON
2283 INSTANTIATE_TEST_SUITE_P(
2284 NEON, AV1Convolve2DCopyHighbdCompoundTest,
2285 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon));
2286 #endif
2287
2288 #endif // CONFIG_AV1_HIGHBITDEPTH
2289
2290 /////////////////////////////////////////////////
2291 // Compound convolve-2d functions (low bit-depth)
2292 /////////////////////////////////////////////////
2293
2294 class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> {
2295 public:
RunTest()2296 void RunTest() {
2297 auto compound_params = GetCompoundParams();
2298 for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
2299 for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
2300 // Do not test the no-op filter.
2301 for (int sub_x = 1; sub_x < 16; ++sub_x) {
2302 for (int sub_y = 1; sub_y < 16; ++sub_y) {
2303 for (const auto &compound : compound_params) {
2304 TestConvolve(static_cast<InterpFilter>(h_f),
2305 static_cast<InterpFilter>(v_f), sub_x, sub_y,
2306 compound);
2307 }
2308 }
2309 }
2310 }
2311 }
2312 }
2313
2314 private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y,const CompoundParam & compound)2315 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
2316 const int sub_x, const int sub_y,
2317 const CompoundParam &compound) {
2318 const BlockSize &block = GetParam().Block();
2319 const int width = block.Width();
2320 const int height = block.Height();
2321
2322 const uint8_t *input1 = FirstRandomInput8(GetParam());
2323 const uint8_t *input2 = SecondRandomInput8(GetParam());
2324 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
2325 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2326 Convolve(av1_dist_wtd_convolve_2d_c, input1, input2, reference,
2327 reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
2328
2329 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
2330 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2331 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2332 compound, h_f, v_f, sub_x, sub_y);
2333
2334 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2335 AssertOutputBufferEq(reference, test, width, height);
2336 }
2337
2338 private:
Convolve(convolve_2d_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,uint16_t * conv_buf,const CompoundParam & compound,const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)2339 void Convolve(convolve_2d_func test_func, const uint8_t *src1,
2340 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
2341 const CompoundParam &compound, const InterpFilter h_f,
2342 const InterpFilter v_f, const int sub_x, const int sub_y) {
2343 const BlockSize &block = GetParam().Block();
2344 const int width = block.Width();
2345 const int height = block.Height();
2346
2347 const InterpFilterParams *filter_params_x =
2348 av1_get_interp_filter_params_with_block_size(h_f, width);
2349 const InterpFilterParams *filter_params_y =
2350 av1_get_interp_filter_params_with_block_size(v_f, height);
2351 ConvolveParams conv_params =
2352 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2353
2354 test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
2355 filter_params_y, sub_x, sub_y, &conv_params);
2356
2357 conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2358 test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
2359 filter_params_y, sub_x, sub_y, &conv_params);
2360 }
2361 };
2362
TEST_P(AV1Convolve2DCompoundTest,RunTest)2363 TEST_P(AV1Convolve2DCompoundTest, RunTest) { RunTest(); }
2364
2365 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCompoundTest,
2366 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_c));
2367
2368 #if HAVE_SSSE3
2369 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DCompoundTest,
2370 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_ssse3));
2371 #endif
2372
2373 #if HAVE_AVX2
2374 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DCompoundTest,
2375 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_avx2));
2376 #endif
2377
2378 #if HAVE_NEON
2379 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest,
2380 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon));
2381 #endif
2382
2383 #if HAVE_NEON_DOTPROD
2384 INSTANTIATE_TEST_SUITE_P(
2385 NEON_DOTPROD, AV1Convolve2DCompoundTest,
2386 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod));
2387 #endif
2388
2389 #if HAVE_NEON_I8MM
2390 INSTANTIATE_TEST_SUITE_P(
2391 NEON_I8MM, AV1Convolve2DCompoundTest,
2392 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm));
2393 #endif
2394
2395 #if CONFIG_AV1_HIGHBITDEPTH
2396 //////////////////////////////////////////////////
2397 // Compound convolve-2d functions (high bit-depth)
2398 //////////////////////////////////////////////////
2399
2400 class AV1Convolve2DHighbdCompoundTest
2401 : public AV1ConvolveTest<highbd_convolve_2d_func> {
2402 public:
RunTest()2403 void RunTest() {
2404 auto compound_params = GetCompoundParams();
2405 for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
2406 for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
2407 // Do not test the no-op filter.
2408 for (int sub_x = 1; sub_x < 16; ++sub_x) {
2409 for (int sub_y = 1; sub_y < 16; ++sub_y) {
2410 for (const auto &compound : compound_params) {
2411 TestConvolve(static_cast<InterpFilter>(h_f),
2412 static_cast<InterpFilter>(v_f), sub_x, sub_y,
2413 compound);
2414 }
2415 }
2416 }
2417 }
2418 }
2419 }
2420
2421 private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y,const CompoundParam & compound)2422 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
2423 const int sub_x, const int sub_y,
2424 const CompoundParam &compound) {
2425 const BlockSize &block = GetParam().Block();
2426 const int width = block.Width();
2427 const int height = block.Height();
2428 const uint16_t *input1 = FirstRandomInput16(GetParam());
2429 const uint16_t *input2 = SecondRandomInput16(GetParam());
2430 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
2431 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2432 Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference,
2433 reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
2434
2435 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
2436 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2437 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2438 compound, h_f, v_f, sub_x, sub_y);
2439
2440 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2441 AssertOutputBufferEq(reference, test, width, height);
2442 }
2443
2444 private:
Convolve(highbd_convolve_2d_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,uint16_t * conv_buf,const CompoundParam & compound,const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)2445 void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1,
2446 const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf,
2447 const CompoundParam &compound, const InterpFilter h_f,
2448 const InterpFilter v_f, const int sub_x, const int sub_y) {
2449 const BlockSize &block = GetParam().Block();
2450 const int width = block.Width();
2451 const int height = block.Height();
2452
2453 const InterpFilterParams *filter_params_x =
2454 av1_get_interp_filter_params_with_block_size(h_f, width);
2455 const InterpFilterParams *filter_params_y =
2456 av1_get_interp_filter_params_with_block_size(v_f, height);
2457 const int bit_depth = GetParam().BitDepth();
2458 ConvolveParams conv_params =
2459 GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
2460 test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
2461 filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
2462
2463 conv_params =
2464 GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
2465 test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
2466 filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
2467 }
2468 };
2469
TEST_P(AV1Convolve2DHighbdCompoundTest,RunTest)2470 TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); }
2471
2472 INSTANTIATE_TEST_SUITE_P(
2473 C, AV1Convolve2DHighbdCompoundTest,
2474 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c));
2475
2476 #if HAVE_SSE4_1
2477 INSTANTIATE_TEST_SUITE_P(
2478 SSE4_1, AV1Convolve2DHighbdCompoundTest,
2479 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1));
2480 #endif
2481
2482 #if HAVE_AVX2
2483 INSTANTIATE_TEST_SUITE_P(
2484 AVX2, AV1Convolve2DHighbdCompoundTest,
2485 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2));
2486 #endif
2487
2488 #if HAVE_NEON
2489 INSTANTIATE_TEST_SUITE_P(
2490 NEON, AV1Convolve2DHighbdCompoundTest,
2491 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon));
2492 #endif
2493
2494 #if HAVE_SVE2
2495 INSTANTIATE_TEST_SUITE_P(
2496 SVE2, AV1Convolve2DHighbdCompoundTest,
2497 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sve2));
2498 #endif
2499
2500 #endif // CONFIG_AV1_HIGHBITDEPTH
2501
2502 } // namespace
2503