xref: /aosp_15_r20/external/libaom/test/av1_convolve_test.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2020, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cstddef>
13 #include <cstdint>
14 #include <ostream>
15 #include <set>
16 #include <vector>
17 #include "config/av1_rtcd.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "aom_ports/aom_timer.h"
20 #include "gtest/gtest.h"
21 #include "test/acm_random.h"
22 
23 namespace {
24 
25 // TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter
26 // is tested once 12-tap filter SIMD is done.
27 #undef INTERP_FILTERS_ALL
28 #define INTERP_FILTERS_ALL 4
29 
30 // All single reference convolve tests are parameterized on block size,
31 // bit-depth, and function to test.
32 //
33 // Note that parameterizing on these variables (and not other parameters) is
34 // a conscious decision - Jenkins needs some degree of parallelization to run
35 // the tests within the time limit, but if the number of parameters increases
36 // too much, the gtest framework does not handle it well (increased overhead per
37 // test, huge amount of output to stdout, etc.).
38 //
39 // Also note that the test suites must be named with the architecture, e.g.,
40 // C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests
41 // that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86
42 // binaries) and will disable tests using a filter like
43 // --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the
44 // testing infrastructure will not selectively filter them properly.
45 class BlockSize {
46  public:
BlockSize(int w,int h)47   BlockSize(int w, int h) : width_(w), height_(h) {}
48 
Width() const49   int Width() const { return width_; }
Height() const50   int Height() const { return height_; }
51 
operator <(const BlockSize & other) const52   bool operator<(const BlockSize &other) const {
53     if (Width() == other.Width()) {
54       return Height() < other.Height();
55     }
56     return Width() < other.Width();
57   }
58 
operator ==(const BlockSize & other) const59   bool operator==(const BlockSize &other) const {
60     return Width() == other.Width() && Height() == other.Height();
61   }
62 
63  private:
64   int width_;
65   int height_;
66 };
67 
68 // Block size / bit depth / test function used to parameterize the tests.
69 template <typename T>
70 class TestParam {
71  public:
TestParam(const BlockSize & block,int bd,T test_func)72   TestParam(const BlockSize &block, int bd, T test_func)
73       : block_(block), bd_(bd), test_func_(test_func) {}
74 
Block() const75   const BlockSize &Block() const { return block_; }
BitDepth() const76   int BitDepth() const { return bd_; }
TestFunction() const77   T TestFunction() const { return test_func_; }
78 
operator ==(const TestParam & other) const79   bool operator==(const TestParam &other) const {
80     return Block() == other.Block() && BitDepth() == other.BitDepth() &&
81            TestFunction() == other.TestFunction();
82   }
83 
84  private:
85   BlockSize block_;
86   int bd_;
87   T test_func_;
88 };
89 
90 template <typename T>
operator <<(std::ostream & os,const TestParam<T> & test_arg)91 std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) {
92   return os << "TestParam { width:" << test_arg.Block().Width()
93             << " height:" << test_arg.Block().Height()
94             << " bd:" << test_arg.BitDepth() << " }";
95 }
96 
97 // Generate the list of all block widths / heights that need to be tested,
98 // includes chroma and luma sizes, for the given bit-depths. The test
99 // function is the same for all generated parameters.
100 template <typename T>
GetTestParams(std::initializer_list<int> bit_depths,T test_func)101 std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths,
102                                         T test_func) {
103   std::set<BlockSize> sizes;
104   for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
105     const int w = block_size_wide[b];
106     const int h = block_size_high[b];
107     sizes.insert(BlockSize(w, h));
108     // Add in smaller chroma sizes as well.
109     if (w == 4 || h == 4) {
110       sizes.insert(BlockSize(w / 2, h / 2));
111     }
112   }
113   std::vector<TestParam<T>> result;
114   for (const BlockSize &block : sizes) {
115     for (int bd : bit_depths) {
116       result.push_back(TestParam<T>(block, bd, test_func));
117     }
118   }
119   return result;
120 }
121 
122 template <typename T>
GetLowbdTestParams(T test_func)123 std::vector<TestParam<T>> GetLowbdTestParams(T test_func) {
124   return GetTestParams({ 8 }, test_func);
125 }
126 
127 template <typename T>
BuildLowbdParams(T test_func)128 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams(
129     T test_func) {
130   return ::testing::ValuesIn(GetLowbdTestParams(test_func));
131 }
132 
133 // Test the test-parameters generators work as expected.
134 class AV1ConvolveParametersTest : public ::testing::Test {};
135 
TEST_F(AV1ConvolveParametersTest,GetLowbdTestParams)136 TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) {
137   auto v = GetLowbdTestParams(av1_convolve_x_sr_c);
138   ASSERT_EQ(27U, v.size());
139   for (const auto &p : v) {
140     ASSERT_EQ(8, p.BitDepth());
141     // Needed (instead of ASSERT_EQ(...) since gtest does not
142     // have built in printing for arbitrary functions, which
143     // causes a compilation error.
144     bool same_fn = av1_convolve_x_sr_c == p.TestFunction();
145     ASSERT_TRUE(same_fn);
146   }
147 }
148 
149 #if CONFIG_AV1_HIGHBITDEPTH
150 template <typename T>
GetHighbdTestParams(T test_func)151 std::vector<TestParam<T>> GetHighbdTestParams(T test_func) {
152   return GetTestParams({ 10, 12 }, test_func);
153 }
154 
155 template <typename T>
BuildHighbdParams(T test_func)156 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams(
157     T test_func) {
158   return ::testing::ValuesIn(GetHighbdTestParams(test_func));
159 }
160 
TEST_F(AV1ConvolveParametersTest,GetHighbdTestParams)161 TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) {
162   auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c);
163   ASSERT_EQ(54U, v.size());
164   int num_10 = 0;
165   int num_12 = 0;
166   for (const auto &p : v) {
167     ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12);
168     bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction();
169     ASSERT_TRUE(same_fn);
170     if (p.BitDepth() == 10) {
171       ++num_10;
172     } else {
173       ++num_12;
174     }
175   }
176   ASSERT_EQ(num_10, num_12);
177 }
178 #endif  // CONFIG_AV1_HIGHBITDEPTH
179 
180 // AV1ConvolveTest is the base class that all convolve tests should derive from.
181 // It provides storage/methods for generating randomized buffers for both
182 // low bit-depth and high bit-depth, and setup/teardown methods for clearing
183 // system state. Implementors can get the bit-depth / block-size /
184 // test function by calling GetParam().
185 template <typename T>
186 class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {
187  public:
188   ~AV1ConvolveTest() override = default;
189 
SetUp()190   void SetUp() override {
191     rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
192   }
193 
194   // Randomizes the 8-bit input buffer and returns a pointer to it. Note that
195   // the pointer is safe to use with an 8-tap filter. The stride can range
196   // from width to (width + kPadding). Also note that the pointer is to the
197   // same memory location.
198   static constexpr int kInputPadding = 12;
199 
200   // Get a pointer to a buffer with stride == width. Note that we must have
201   // the test param passed in explicitly -- the gtest framework does not
202   // support calling GetParam() within a templatized class.
203   // Note that FirstRandomInput8 always returns the same pointer -- if two
204   // inputs are needed, also use SecondRandomInput8.
FirstRandomInput8(const TestParam<T> & param)205   const uint8_t *FirstRandomInput8(const TestParam<T> &param) {
206     // Note we can't call GetParam() directly -- gtest does not support
207     // this for parameterized types.
208     return RandomInput8(input8_1_, param);
209   }
210 
SecondRandomInput8(const TestParam<T> & param)211   const uint8_t *SecondRandomInput8(const TestParam<T> &param) {
212     return RandomInput8(input8_2_, param);
213   }
214 
215   // Some of the intrinsics perform writes in 32 byte chunks. Moreover, some
216   // of the instrinsics assume that the stride is also a multiple of 32.
217   // To satisfy these constraints and also remain simple, output buffer strides
218   // are assumed MAX_SB_SIZE.
219   static constexpr int kOutputStride = MAX_SB_SIZE;
220 
221   // Check that two 8-bit output buffers are identical.
AssertOutputBufferEq(const uint8_t * p1,const uint8_t * p2,int width,int height)222   void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width,
223                             int height) {
224     ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations";
225     for (int j = 0; j < height; ++j) {
226       if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
227         p1 += kOutputStride;
228         p2 += kOutputStride;
229         continue;
230       }
231       for (int i = 0; i < width; ++i) {
232         ASSERT_EQ(p1[i], p2[i])
233             << width << "x" << height << " Pixel mismatch at (" << i << ", "
234             << j << ")";
235       }
236     }
237   }
238 
239   // Check that two 16-bit output buffers are identical.
AssertOutputBufferEq(const uint16_t * p1,const uint16_t * p2,int width,int height)240   void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width,
241                             int height) {
242     ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations";
243     for (int j = 0; j < height; ++j) {
244       if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
245         p1 += kOutputStride;
246         p2 += kOutputStride;
247         continue;
248       }
249       for (int i = 0; i < width; ++i) {
250         ASSERT_EQ(p1[i], p2[i])
251             << width << "x" << height << " Pixel mismatch at (" << i << ", "
252             << j << ")";
253       }
254     }
255   }
256 
257 #if CONFIG_AV1_HIGHBITDEPTH
258   // Note that the randomized values are capped by bit-depth.
FirstRandomInput16(const TestParam<T> & param)259   const uint16_t *FirstRandomInput16(const TestParam<T> &param) {
260     return RandomInput16(input16_1_, param);
261   }
262 
SecondRandomInput16(const TestParam<T> & param)263   const uint16_t *SecondRandomInput16(const TestParam<T> &param) {
264     return RandomInput16(input16_2_, param);
265   }
266 #endif
267 
268  private:
RandomInput8(uint8_t * p,const TestParam<T> & param)269   const uint8_t *RandomInput8(uint8_t *p, const TestParam<T> &param) {
270     EXPECT_EQ(8, param.BitDepth());
271     EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
272     EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
273     const int padded_width = param.Block().Width() + kInputPadding;
274     const int padded_height = param.Block().Height() + kInputPadding;
275     Randomize(p, padded_width * padded_height);
276     return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
277   }
278 
Randomize(uint8_t * p,int size)279   void Randomize(uint8_t *p, int size) {
280     for (int i = 0; i < size; ++i) {
281       p[i] = rnd_.Rand8();
282     }
283   }
284 
285 #if CONFIG_AV1_HIGHBITDEPTH
RandomInput16(uint16_t * p,const TestParam<T> & param)286   const uint16_t *RandomInput16(uint16_t *p, const TestParam<T> &param) {
287     // Check that this is only called with high bit-depths.
288     EXPECT_TRUE(param.BitDepth() == 10 || param.BitDepth() == 12);
289     EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
290     EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
291     const int padded_width = param.Block().Width() + kInputPadding;
292     const int padded_height = param.Block().Height() + kInputPadding;
293     Randomize(p, padded_width * padded_height, param.BitDepth());
294     return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
295   }
296 
Randomize(uint16_t * p,int size,int bit_depth)297   void Randomize(uint16_t *p, int size, int bit_depth) {
298     for (int i = 0; i < size; ++i) {
299       p[i] = rnd_.Rand16() & ((1 << bit_depth) - 1);
300     }
301   }
302 #endif
303 
304   static constexpr int kInputStride = MAX_SB_SIZE + kInputPadding;
305 
306   libaom_test::ACMRandom rnd_;
307   // Statically allocate all the memory that is needed for the tests. Note
308   // that we cannot allocate output memory here. It must use DECLARE_ALIGNED,
309   // which is a C99 feature and interacts badly with C++ member variables.
310   uint8_t input8_1_[kInputStride * kInputStride];
311   uint8_t input8_2_[kInputStride * kInputStride];
312 #if CONFIG_AV1_HIGHBITDEPTH
313   uint16_t input16_1_[kInputStride * kInputStride];
314   uint16_t input16_2_[kInputStride * kInputStride];
315 #endif
316 };
317 
318 ////////////////////////////////////////////////////////
319 // Single reference convolve-x functions (low bit-depth)
320 ////////////////////////////////////////////////////////
321 typedef void (*convolve_x_func)(const uint8_t *src, int src_stride,
322                                 uint8_t *dst, int dst_stride, int w, int h,
323                                 const InterpFilterParams *filter_params_x,
324                                 const int subpel_x_qn,
325                                 ConvolveParams *conv_params);
326 
327 class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> {
328  public:
RunTest()329   void RunTest() {
330     // Do not test the no-op filter.
331     for (int sub_x = 1; sub_x < 16; ++sub_x) {
332       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
333            ++filter) {
334         InterpFilter f = static_cast<InterpFilter>(filter);
335         TestConvolve(sub_x, f);
336       }
337     }
338   }
339 
340  public:
SpeedTest()341   void SpeedTest() {
342     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
343          ++filter) {
344       InterpFilter f = static_cast<InterpFilter>(filter);
345       TestConvolveSpeed(f, 10000);
346     }
347   }
348 
349  private:
TestConvolve(const int sub_x,const InterpFilter filter)350   void TestConvolve(const int sub_x, const InterpFilter filter) {
351     const int width = GetParam().Block().Width();
352     const int height = GetParam().Block().Height();
353 
354     const InterpFilterParams *filter_params_x =
355         av1_get_interp_filter_params_with_block_size(filter, width);
356     ConvolveParams conv_params1 =
357         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
358     const uint8_t *input = FirstRandomInput8(GetParam());
359     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
360     av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
361                         filter_params_x, sub_x, &conv_params1);
362 
363     ConvolveParams conv_params2 =
364         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
365     convolve_x_func test_func = GetParam().TestFunction();
366     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
367     test_func(input, width, test, kOutputStride, width, height, filter_params_x,
368               sub_x, &conv_params2);
369     AssertOutputBufferEq(reference, test, width, height);
370   }
371 
372  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)373   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
374     const int width = GetParam().Block().Width();
375     const int height = GetParam().Block().Height();
376 
377     const InterpFilterParams *filter_params_x =
378         av1_get_interp_filter_params_with_block_size(filter, width);
379     ConvolveParams conv_params1 =
380         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
381     const uint8_t *input = FirstRandomInput8(GetParam());
382     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
383 
384     aom_usec_timer timer;
385     aom_usec_timer_start(&timer);
386     for (int i = 0; i < num_iters; ++i) {
387       av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
388                           filter_params_x, 0, &conv_params1);
389     }
390     aom_usec_timer_mark(&timer);
391     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
392     ConvolveParams conv_params2 =
393         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
394     convolve_x_func test_func = GetParam().TestFunction();
395     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
396 
397     aom_usec_timer_start(&timer);
398     for (int i = 0; i < num_iters; ++i) {
399       test_func(input, width, test, kOutputStride, width, height,
400                 filter_params_x, 0, &conv_params2);
401     }
402     aom_usec_timer_mark(&timer);
403     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
404     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
405            time2, time1 / time2);
406   }
407 };
408 
TEST_P(AV1ConvolveXTest,RunTest)409 TEST_P(AV1ConvolveXTest, RunTest) { RunTest(); }
410 
TEST_P(AV1ConvolveXTest,DISABLED_SpeedTest)411 TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) { SpeedTest(); }
412 
413 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXTest,
414                          BuildLowbdParams(av1_convolve_x_sr_c));
415 
416 #if HAVE_SSE2
417 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXTest,
418                          BuildLowbdParams(av1_convolve_x_sr_sse2));
419 #endif
420 
421 #if HAVE_AVX2
422 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXTest,
423                          BuildLowbdParams(av1_convolve_x_sr_avx2));
424 #endif
425 
426 #if HAVE_NEON
427 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest,
428                          BuildLowbdParams(av1_convolve_x_sr_neon));
429 #endif
430 
431 #if HAVE_NEON_DOTPROD
432 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest,
433                          BuildLowbdParams(av1_convolve_x_sr_neon_dotprod));
434 #endif
435 
436 #if HAVE_NEON_I8MM
437 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest,
438                          BuildLowbdParams(av1_convolve_x_sr_neon_i8mm));
439 #endif
440 
441 ////////////////////////////////////////////////////////////////
442 // Single reference convolve-x IntraBC functions (low bit-depth)
443 ////////////////////////////////////////////////////////////////
444 
445 class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> {
446  public:
RunTest()447   void RunTest() {
448     // IntraBC functions only operate for subpel_x_qn = 8.
449     constexpr int kSubX = 8;
450     const int width = GetParam().Block().Width();
451     const int height = GetParam().Block().Height();
452     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
453     const uint8_t *input = FirstRandomInput8(GetParam());
454 
455     ConvolveParams conv_params1 =
456         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
457     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
458     // Use a stride different from width to avoid potential storing errors that
459     // would go undetected. The input buffer is filled using a padding of 12, so
460     // the stride can be anywhere between width and width + 12.
461     av1_convolve_x_sr_intrabc_c(input, width + 2, reference, kOutputStride,
462                                 width, height, filter_params_x, kSubX,
463                                 &conv_params1);
464 
465     ConvolveParams conv_params2 =
466         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
467     convolve_x_func test_func = GetParam().TestFunction();
468     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
469     test_func(input, width + 2, test, kOutputStride, width, height,
470               filter_params_x, kSubX, &conv_params2);
471 
472     AssertOutputBufferEq(reference, test, width, height);
473   }
474 
SpeedTest()475   void SpeedTest() {
476     constexpr int kNumIters = 10000;
477     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
478     const int width = GetParam().Block().Width();
479     const int height = GetParam().Block().Height();
480     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
481     const uint8_t *input = FirstRandomInput8(GetParam());
482 
483     ConvolveParams conv_params1 =
484         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
485     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
486     aom_usec_timer timer;
487     aom_usec_timer_start(&timer);
488     for (int i = 0; i < kNumIters; ++i) {
489       av1_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, width,
490                                   height, filter_params_x, 0, &conv_params1);
491     }
492     aom_usec_timer_mark(&timer);
493     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
494 
495     ConvolveParams conv_params2 =
496         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
497     convolve_x_func test_func = GetParam().TestFunction();
498     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
499     aom_usec_timer_start(&timer);
500     for (int i = 0; i < kNumIters; ++i) {
501       test_func(input, width, test, kOutputStride, width, height,
502                 filter_params_x, 0, &conv_params2);
503     }
504     aom_usec_timer_mark(&timer);
505     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
506 
507     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
508            time2, time1 / time2);
509   }
510 };
511 
TEST_P(AV1ConvolveXIntraBCTest,RunTest)512 TEST_P(AV1ConvolveXIntraBCTest, RunTest) { RunTest(); }
513 
TEST_P(AV1ConvolveXIntraBCTest,DISABLED_SpeedTest)514 TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
515 
516 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXIntraBCTest,
517                          BuildLowbdParams(av1_convolve_x_sr_intrabc_c));
518 
519 #if HAVE_NEON
520 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest,
521                          BuildLowbdParams(av1_convolve_x_sr_intrabc_neon));
522 #endif
523 
524 #if CONFIG_AV1_HIGHBITDEPTH
525 /////////////////////////////////////////////////////////
526 // Single reference convolve-x functions (high bit-depth)
527 /////////////////////////////////////////////////////////
528 typedef void (*highbd_convolve_x_func)(
529     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
530     int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn,
531     ConvolveParams *conv_params, int bd);
532 
533 class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> {
534  public:
RunTest()535   void RunTest() {
536     // Do not test the no-op filter.
537     for (int sub_x = 1; sub_x < 16; ++sub_x) {
538       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
539            ++filter) {
540         InterpFilter f = static_cast<InterpFilter>(filter);
541         TestConvolve(sub_x, f);
542       }
543     }
544   }
545 
546  public:
SpeedTest()547   void SpeedTest() {
548     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
549          ++filter) {
550       InterpFilter f = static_cast<InterpFilter>(filter);
551       TestConvolveSpeed(f, 10000);
552     }
553   }
554 
555  private:
TestConvolve(const int sub_x,const InterpFilter filter)556   void TestConvolve(const int sub_x, const InterpFilter filter) {
557     const int width = GetParam().Block().Width();
558     const int height = GetParam().Block().Height();
559     const int bit_depth = GetParam().BitDepth();
560     const InterpFilterParams *filter_params_x =
561         av1_get_interp_filter_params_with_block_size(filter, width);
562     ConvolveParams conv_params1 =
563         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
564     const uint16_t *input = FirstRandomInput16(GetParam());
565     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
566     av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
567                                height, filter_params_x, sub_x, &conv_params1,
568                                bit_depth);
569 
570     ConvolveParams conv_params2 =
571         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
572     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
573     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
574                               filter_params_x, sub_x, &conv_params2, bit_depth);
575     AssertOutputBufferEq(reference, test, width, height);
576   }
577 
578  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)579   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
580     const int width = GetParam().Block().Width();
581     const int height = GetParam().Block().Height();
582     const int bit_depth = GetParam().BitDepth();
583     const InterpFilterParams *filter_params_x =
584         av1_get_interp_filter_params_with_block_size(filter, width);
585     ConvolveParams conv_params1 =
586         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
587     const uint16_t *input = FirstRandomInput16(GetParam());
588     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
589 
590     aom_usec_timer timer;
591     aom_usec_timer_start(&timer);
592     for (int i = 0; i < num_iters; ++i) {
593       av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
594                                  height, filter_params_x, 0, &conv_params1,
595                                  bit_depth);
596     }
597     aom_usec_timer_mark(&timer);
598     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
599     ConvolveParams conv_params2 =
600         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
601     highbd_convolve_x_func test_func = GetParam().TestFunction();
602     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
603 
604     aom_usec_timer_start(&timer);
605     for (int i = 0; i < num_iters; ++i) {
606       test_func(input, width, test, kOutputStride, width, height,
607                 filter_params_x, 0, &conv_params2, bit_depth);
608     }
609     aom_usec_timer_mark(&timer);
610     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
611     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
612            time2, time1 / time2);
613   }
614 };
615 
TEST_P(AV1ConvolveXHighbdTest,RunTest)616 TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); }
617 
TEST_P(AV1ConvolveXHighbdTest,DISABLED_SpeedTest)618 TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
619 
620 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest,
621                          BuildHighbdParams(av1_highbd_convolve_x_sr_c));
622 
623 #if HAVE_SSSE3
624 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest,
625                          BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3));
626 #endif
627 
628 #if HAVE_AVX2
629 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest,
630                          BuildHighbdParams(av1_highbd_convolve_x_sr_avx2));
631 #endif
632 
633 #if HAVE_NEON
634 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest,
635                          BuildHighbdParams(av1_highbd_convolve_x_sr_neon));
636 #endif
637 
638 #if HAVE_SVE2
639 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveXHighbdTest,
640                          BuildHighbdParams(av1_highbd_convolve_x_sr_sve2));
641 #endif
642 
643 /////////////////////////////////////////////////////////////////
644 // Single reference convolve-x IntraBC functions (high bit-depth)
645 /////////////////////////////////////////////////////////////////
646 
647 class AV1ConvolveXHighbdIntraBCTest
648     : public AV1ConvolveTest<highbd_convolve_x_func> {
649  public:
RunTest()650   void RunTest() {
651     // IntraBC functions only operate for subpel_x_qn = 8.
652     constexpr int kSubX = 8;
653     const int width = GetParam().Block().Width();
654     const int height = GetParam().Block().Height();
655     const int bit_depth = GetParam().BitDepth();
656     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
657     const uint16_t *input = FirstRandomInput16(GetParam());
658 
659     ConvolveParams conv_params1 =
660         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
661     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
662     // Use a stride different from width to avoid potential storing errors that
663     // would go undetected. The input buffer is filled using a padding of 12, so
664     // the stride can be anywhere between width and width + 12.
665     av1_highbd_convolve_x_sr_intrabc_c(
666         input, width + 2, reference, kOutputStride, width, height,
667         filter_params_x, kSubX, &conv_params1, bit_depth);
668 
669     ConvolveParams conv_params2 =
670         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
671     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
672     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
673                               height, filter_params_x, kSubX, &conv_params2,
674                               bit_depth);
675 
676     AssertOutputBufferEq(reference, test, width, height);
677   }
678 
SpeedTest()679   void SpeedTest() {
680     constexpr int kNumIters = 10000;
681     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
682     const int width = GetParam().Block().Width();
683     const int height = GetParam().Block().Height();
684     const int bit_depth = GetParam().BitDepth();
685     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
686     const uint16_t *input = FirstRandomInput16(GetParam());
687 
688     ConvolveParams conv_params1 =
689         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
690     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
691     aom_usec_timer timer;
692     aom_usec_timer_start(&timer);
693     for (int i = 0; i < kNumIters; ++i) {
694       av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride,
695                                          width, height, filter_params_x, 0,
696                                          &conv_params1, bit_depth);
697     }
698     aom_usec_timer_mark(&timer);
699     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
700 
701     ConvolveParams conv_params2 =
702         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
703     highbd_convolve_x_func test_func = GetParam().TestFunction();
704     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
705     aom_usec_timer_start(&timer);
706     for (int i = 0; i < kNumIters; ++i) {
707       test_func(input, width, test, kOutputStride, width, height,
708                 filter_params_x, 0, &conv_params2, bit_depth);
709     }
710     aom_usec_timer_mark(&timer);
711     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
712 
713     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
714            time2, time1 / time2);
715   }
716 };
717 
TEST_P(AV1ConvolveXHighbdIntraBCTest,RunTest)718 TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); }
719 
TEST_P(AV1ConvolveXHighbdIntraBCTest,DISABLED_SpeedTest)720 TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
721 
722 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest,
723                          BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c));
724 
725 #if HAVE_NEON
726 INSTANTIATE_TEST_SUITE_P(
727     NEON, AV1ConvolveXHighbdIntraBCTest,
728     BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon));
729 #endif
730 
731 #endif  // CONFIG_AV1_HIGHBITDEPTH
732 
733 ////////////////////////////////////////////////////////
734 // Single reference convolve-y functions (low bit-depth)
735 ////////////////////////////////////////////////////////
736 typedef void (*convolve_y_func)(const uint8_t *src, int src_stride,
737                                 uint8_t *dst, int dst_stride, int w, int h,
738                                 const InterpFilterParams *filter_params_y,
739                                 const int subpel_y_qn);
740 
741 class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> {
742  public:
RunTest()743   void RunTest() {
744     // Do not test the no-op filter.
745     for (int sub_y = 1; sub_y < 16; ++sub_y) {
746       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
747            ++filter) {
748         InterpFilter f = static_cast<InterpFilter>(filter);
749         TestConvolve(sub_y, f);
750       }
751     }
752   }
753 
754  public:
SpeedTest()755   void SpeedTest() {
756     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
757          ++filter) {
758       InterpFilter f = static_cast<InterpFilter>(filter);
759       TestConvolveSpeed(f, 10000);
760     }
761   }
762 
763  private:
TestConvolve(const int sub_y,const InterpFilter filter)764   void TestConvolve(const int sub_y, const InterpFilter filter) {
765     const int width = GetParam().Block().Width();
766     const int height = GetParam().Block().Height();
767 
768     const InterpFilterParams *filter_params_y =
769         av1_get_interp_filter_params_with_block_size(filter, height);
770     const uint8_t *input = FirstRandomInput8(GetParam());
771     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
772     av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
773                         filter_params_y, sub_y);
774     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
775     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
776                               filter_params_y, sub_y);
777     AssertOutputBufferEq(reference, test, width, height);
778   }
779 
780  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)781   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
782     const int width = GetParam().Block().Width();
783     const int height = GetParam().Block().Height();
784 
785     const InterpFilterParams *filter_params_y =
786         av1_get_interp_filter_params_with_block_size(filter, height);
787     const uint8_t *input = FirstRandomInput8(GetParam());
788     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
789 
790     aom_usec_timer timer;
791     aom_usec_timer_start(&timer);
792     for (int i = 0; i < num_iters; ++i) {
793       av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
794                           filter_params_y, 0);
795     }
796     aom_usec_timer_mark(&timer);
797     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
798 
799     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
800 
801     aom_usec_timer_start(&timer);
802     for (int i = 0; i < num_iters; ++i) {
803       GetParam().TestFunction()(input, width, test, kOutputStride, width,
804                                 height, filter_params_y, 0);
805     }
806     aom_usec_timer_mark(&timer);
807     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
808     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
809            time2, time1 / time2);
810   }
811 };
812 
TEST_P(AV1ConvolveYTest,RunTest)813 TEST_P(AV1ConvolveYTest, RunTest) { RunTest(); }
814 
TEST_P(AV1ConvolveYTest,DISABLED_SpeedTest)815 TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) { SpeedTest(); }
816 
817 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYTest,
818                          BuildLowbdParams(av1_convolve_y_sr_c));
819 
820 #if HAVE_SSE2
821 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYTest,
822                          BuildLowbdParams(av1_convolve_y_sr_sse2));
823 #endif
824 
825 #if HAVE_AVX2
826 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYTest,
827                          BuildLowbdParams(av1_convolve_y_sr_avx2));
828 #endif
829 
830 #if HAVE_NEON
831 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest,
832                          BuildLowbdParams(av1_convolve_y_sr_neon));
833 #endif
834 
835 #if HAVE_NEON_DOTPROD
836 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveYTest,
837                          BuildLowbdParams(av1_convolve_y_sr_neon_dotprod));
838 #endif
839 
840 #if HAVE_NEON_I8MM
841 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveYTest,
842                          BuildLowbdParams(av1_convolve_y_sr_neon_i8mm));
843 #endif
844 
845 ////////////////////////////////////////////////////////////////
846 // Single reference convolve-y IntraBC functions (low bit-depth)
847 ////////////////////////////////////////////////////////////////
848 
849 class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> {
850  public:
RunTest()851   void RunTest() {
852     // IntraBC functions only operate for subpel_y_qn = 8.
853     constexpr int kSubY = 8;
854     const int width = GetParam().Block().Width();
855     const int height = GetParam().Block().Height();
856     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
857     const uint8_t *input = FirstRandomInput8(GetParam());
858 
859     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
860     // Use a stride different from width to avoid potential storing errors that
861     // would go undetected. The input buffer is filled using a padding of 12, so
862     // the stride can be anywhere between width and width + 12.
863     av1_convolve_y_sr_intrabc_c(input, width + 2, reference, kOutputStride,
864                                 width, height, filter_params_y, kSubY);
865 
866     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
867     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
868                               height, filter_params_y, kSubY);
869 
870     AssertOutputBufferEq(reference, test, width, height);
871   }
872 
SpeedTest()873   void SpeedTest() {
874     constexpr int kNumIters = 10000;
875     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
876     const int width = GetParam().Block().Width();
877     const int height = GetParam().Block().Height();
878 
879     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
880     const uint8_t *input = FirstRandomInput8(GetParam());
881     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
882 
883     aom_usec_timer timer;
884     aom_usec_timer_start(&timer);
885     for (int i = 0; i < kNumIters; ++i) {
886       av1_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, width,
887                                   height, filter_params_y, 0);
888     }
889     aom_usec_timer_mark(&timer);
890     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
891 
892     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
893     convolve_y_func test_func = GetParam().TestFunction();
894     aom_usec_timer_start(&timer);
895     for (int i = 0; i < kNumIters; ++i) {
896       test_func(input, width, test, kOutputStride, width, height,
897                 filter_params_y, 0);
898     }
899     aom_usec_timer_mark(&timer);
900     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
901 
902     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
903            time2, time1 / time2);
904   }
905 };
906 
TEST_P(AV1ConvolveYIntraBCTest,RunTest)907 TEST_P(AV1ConvolveYIntraBCTest, RunTest) { RunTest(); }
908 
TEST_P(AV1ConvolveYIntraBCTest,DISABLED_SpeedTest)909 TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
910 
911 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYIntraBCTest,
912                          BuildLowbdParams(av1_convolve_y_sr_intrabc_c));
913 
914 #if HAVE_NEON
915 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest,
916                          BuildLowbdParams(av1_convolve_y_sr_intrabc_neon));
917 #endif
918 
919 #if CONFIG_AV1_HIGHBITDEPTH
920 /////////////////////////////////////////////////////////
921 // Single reference convolve-y functions (high bit-depth)
922 /////////////////////////////////////////////////////////
923 typedef void (*highbd_convolve_y_func)(
924     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
925     int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn,
926     int bd);
927 
928 class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> {
929  public:
RunTest()930   void RunTest() {
931     // Do not test the no-op filter.
932     for (int sub_y = 1; sub_y < 16; ++sub_y) {
933       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
934            ++filter) {
935         InterpFilter f = static_cast<InterpFilter>(filter);
936         TestConvolve(sub_y, f);
937       }
938     }
939   }
940 
941  public:
SpeedTest()942   void SpeedTest() {
943     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
944          ++filter) {
945       InterpFilter f = static_cast<InterpFilter>(filter);
946       TestConvolveSpeed(f, 10000);
947     }
948   }
949 
950  private:
TestConvolve(const int sub_y,const InterpFilter filter)951   void TestConvolve(const int sub_y, const InterpFilter filter) {
952     const int width = GetParam().Block().Width();
953     const int height = GetParam().Block().Height();
954     const int bit_depth = GetParam().BitDepth();
955     const InterpFilterParams *filter_params_y =
956         av1_get_interp_filter_params_with_block_size(filter, height);
957     const uint16_t *input = FirstRandomInput16(GetParam());
958     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
959     av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
960                                height, filter_params_y, sub_y, bit_depth);
961     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
962     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
963                               filter_params_y, sub_y, bit_depth);
964     AssertOutputBufferEq(reference, test, width, height);
965   }
966 
967  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)968   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
969     const int width = GetParam().Block().Width();
970     const int height = GetParam().Block().Height();
971     const int bit_depth = GetParam().BitDepth();
972     const InterpFilterParams *filter_params_y =
973         av1_get_interp_filter_params_with_block_size(filter, width);
974     const uint16_t *input = FirstRandomInput16(GetParam());
975     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
976 
977     aom_usec_timer timer;
978     aom_usec_timer_start(&timer);
979     for (int i = 0; i < num_iters; ++i) {
980       av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
981                                  height, filter_params_y, 0, bit_depth);
982     }
983     aom_usec_timer_mark(&timer);
984     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
985     highbd_convolve_y_func test_func = GetParam().TestFunction();
986     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
987 
988     aom_usec_timer_start(&timer);
989     for (int i = 0; i < num_iters; ++i) {
990       test_func(input, width, test, kOutputStride, width, height,
991                 filter_params_y, 0, bit_depth);
992     }
993     aom_usec_timer_mark(&timer);
994     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
995     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
996            time2, time1 / time2);
997   }
998 };
999 
TEST_P(AV1ConvolveYHighbdTest,RunTest)1000 TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); }
1001 
TEST_P(AV1ConvolveYHighbdTest,DISABLED_SpeedTest)1002 TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
1003 
1004 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest,
1005                          BuildHighbdParams(av1_highbd_convolve_y_sr_c));
1006 
1007 #if HAVE_SSSE3
1008 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest,
1009                          BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3));
1010 #endif
1011 
1012 #if HAVE_AVX2
1013 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest,
1014                          BuildHighbdParams(av1_highbd_convolve_y_sr_avx2));
1015 #endif
1016 
1017 #if HAVE_NEON
1018 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest,
1019                          BuildHighbdParams(av1_highbd_convolve_y_sr_neon));
1020 #endif
1021 
1022 #if HAVE_SVE2
1023 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveYHighbdTest,
1024                          BuildHighbdParams(av1_highbd_convolve_y_sr_sve2));
1025 #endif
1026 
1027 /////////////////////////////////////////////////////////////////
1028 // Single reference convolve-y IntraBC functions (high bit-depth)
1029 /////////////////////////////////////////////////////////////////
1030 
1031 class AV1ConvolveYHighbdIntraBCTest
1032     : public AV1ConvolveTest<highbd_convolve_y_func> {
1033  public:
RunTest()1034   void RunTest() {
1035     // IntraBC functions only operate for subpel_y_qn = 8.
1036     constexpr int kSubY = 8;
1037     const int width = GetParam().Block().Width();
1038     const int height = GetParam().Block().Height();
1039     const int bit_depth = GetParam().BitDepth();
1040     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1041     const uint16_t *input = FirstRandomInput16(GetParam());
1042 
1043     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1044     // Use a stride different from width to avoid potential storing errors that
1045     // would go undetected. The input buffer is filled using a padding of 12, so
1046     // the stride can be anywhere between width and width + 12.
1047     av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference,
1048                                        kOutputStride, width, height,
1049                                        filter_params_y, kSubY, bit_depth);
1050 
1051     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1052     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1053                               height, filter_params_y, kSubY, bit_depth);
1054 
1055     AssertOutputBufferEq(reference, test, width, height);
1056   }
1057 
SpeedTest()1058   void SpeedTest() {
1059     constexpr int kNumIters = 10000;
1060     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
1061     const int width = GetParam().Block().Width();
1062     const int height = GetParam().Block().Height();
1063     const int bit_depth = GetParam().BitDepth();
1064     const InterpFilterParams *filter_params_y =
1065         av1_get_interp_filter_params_with_block_size(filter, width);
1066     const uint16_t *input = FirstRandomInput16(GetParam());
1067 
1068     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1069     aom_usec_timer timer;
1070     aom_usec_timer_start(&timer);
1071     for (int i = 0; i < kNumIters; ++i) {
1072       av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride,
1073                                          width, height, filter_params_y, 0,
1074                                          bit_depth);
1075     }
1076     aom_usec_timer_mark(&timer);
1077     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1078 
1079     highbd_convolve_y_func test_func = GetParam().TestFunction();
1080     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1081     aom_usec_timer_start(&timer);
1082     for (int i = 0; i < kNumIters; ++i) {
1083       test_func(input, width, test, kOutputStride, width, height,
1084                 filter_params_y, 0, bit_depth);
1085     }
1086     aom_usec_timer_mark(&timer);
1087     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1088 
1089     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
1090            time2, time1 / time2);
1091   }
1092 };
1093 
TEST_P(AV1ConvolveYHighbdIntraBCTest,RunTest)1094 TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); }
1095 
TEST_P(AV1ConvolveYHighbdIntraBCTest,DISABLED_SpeedTest)1096 TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1097 
1098 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest,
1099                          BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c));
1100 
1101 #if HAVE_NEON
1102 INSTANTIATE_TEST_SUITE_P(
1103     NEON, AV1ConvolveYHighbdIntraBCTest,
1104     BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon));
1105 #endif
1106 
1107 #endif  // CONFIG_AV1_HIGHBITDEPTH
1108 
1109 //////////////////////////////////////////////////////////////
1110 // Single reference convolve-copy functions (low bit-depth)
1111 //////////////////////////////////////////////////////////////
1112 typedef void (*convolve_copy_func)(const uint8_t *src, ptrdiff_t src_stride,
1113                                    uint8_t *dst, ptrdiff_t dst_stride, int w,
1114                                    int h);
1115 
1116 class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> {
1117  public:
RunTest()1118   void RunTest() {
1119     const int width = GetParam().Block().Width();
1120     const int height = GetParam().Block().Height();
1121     const uint8_t *input = FirstRandomInput8(GetParam());
1122     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1123     aom_convolve_copy_c(input, width, reference, kOutputStride, width, height);
1124     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1125     GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
1126     AssertOutputBufferEq(reference, test, width, height);
1127   }
1128 };
1129 
1130 // Note that even though these are AOM convolve functions, we are using the
1131 // newer AV1 test framework.
TEST_P(AV1ConvolveCopyTest,RunTest)1132 TEST_P(AV1ConvolveCopyTest, RunTest) { RunTest(); }
1133 
1134 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyTest,
1135                          BuildLowbdParams(aom_convolve_copy_c));
1136 
1137 #if HAVE_SSE2
1138 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyTest,
1139                          BuildLowbdParams(aom_convolve_copy_sse2));
1140 #endif
1141 
1142 #if HAVE_AVX2
1143 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyTest,
1144                          BuildLowbdParams(aom_convolve_copy_avx2));
1145 #endif
1146 
1147 #if HAVE_NEON
1148 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest,
1149                          BuildLowbdParams(aom_convolve_copy_neon));
1150 #endif
1151 
1152 #if CONFIG_AV1_HIGHBITDEPTH
1153 ///////////////////////////////////////////////////////////////
1154 // Single reference convolve-copy functions (high bit-depth)
1155 ///////////////////////////////////////////////////////////////
1156 typedef void (*highbd_convolve_copy_func)(const uint16_t *src,
1157                                           ptrdiff_t src_stride, uint16_t *dst,
1158                                           ptrdiff_t dst_stride, int w, int h);
1159 
1160 class AV1ConvolveCopyHighbdTest
1161     : public AV1ConvolveTest<highbd_convolve_copy_func> {
1162  public:
RunTest()1163   void RunTest() {
1164     const BlockSize &block = GetParam().Block();
1165     const int width = block.Width();
1166     const int height = block.Height();
1167     const uint16_t *input = FirstRandomInput16(GetParam());
1168     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1169     aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width,
1170                                height);
1171     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1172     GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
1173     AssertOutputBufferEq(reference, test, width, height);
1174   }
1175 };
1176 
TEST_P(AV1ConvolveCopyHighbdTest,RunTest)1177 TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); }
1178 
1179 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest,
1180                          BuildHighbdParams(aom_highbd_convolve_copy_c));
1181 
1182 #if HAVE_SSE2
1183 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest,
1184                          BuildHighbdParams(aom_highbd_convolve_copy_sse2));
1185 #endif
1186 
1187 #if HAVE_AVX2
1188 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest,
1189                          BuildHighbdParams(aom_highbd_convolve_copy_avx2));
1190 #endif
1191 
1192 #if HAVE_NEON
1193 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest,
1194                          BuildHighbdParams(aom_highbd_convolve_copy_neon));
1195 #endif
1196 
1197 #endif  // CONFIG_AV1_HIGHBITDEPTH
1198 
1199 /////////////////////////////////////////////////////////
1200 // Single reference convolve-2D functions (low bit-depth)
1201 /////////////////////////////////////////////////////////
1202 typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
1203                                  uint8_t *dst, int dst_stride, int w, int h,
1204                                  const InterpFilterParams *filter_params_x,
1205                                  const InterpFilterParams *filter_params_y,
1206                                  const int subpel_x_qn, const int subpel_y_qn,
1207                                  ConvolveParams *conv_params);
1208 
1209 class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> {
1210  public:
RunTest()1211   void RunTest() {
1212     // Do not test the no-op filter.
1213     for (int sub_x = 1; sub_x < 16; ++sub_x) {
1214       for (int sub_y = 1; sub_y < 16; ++sub_y) {
1215         for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1216           for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1217             if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1218                 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1219               continue;
1220             TestConvolve(static_cast<InterpFilter>(h_f),
1221                          static_cast<InterpFilter>(v_f), sub_x, sub_y);
1222           }
1223         }
1224       }
1225     }
1226   }
1227 
1228  public:
SpeedTest()1229   void SpeedTest() {
1230     for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1231       for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1232         if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1233             ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1234           continue;
1235         TestConvolveSpeed(static_cast<InterpFilter>(h_f),
1236                           static_cast<InterpFilter>(v_f), 10000);
1237       }
1238     }
1239   }
1240 
1241  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)1242   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
1243                     const int sub_x, const int sub_y) {
1244     const int width = GetParam().Block().Width();
1245     const int height = GetParam().Block().Height();
1246     const InterpFilterParams *filter_params_x =
1247         av1_get_interp_filter_params_with_block_size(h_f, width);
1248     const InterpFilterParams *filter_params_y =
1249         av1_get_interp_filter_params_with_block_size(v_f, height);
1250     const uint8_t *input = FirstRandomInput8(GetParam());
1251     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1252     ConvolveParams conv_params1 =
1253         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1254     av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, height,
1255                          filter_params_x, filter_params_y, sub_x, sub_y,
1256                          &conv_params1);
1257     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1258     ConvolveParams conv_params2 =
1259         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1260     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
1261                               filter_params_x, filter_params_y, sub_x, sub_y,
1262                               &conv_params2);
1263     AssertOutputBufferEq(reference, test, width, height);
1264   }
1265 
1266  private:
TestConvolveSpeed(const InterpFilter h_f,const InterpFilter v_f,int num_iters)1267   void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
1268                          int num_iters) {
1269     const int width = GetParam().Block().Width();
1270     const int height = GetParam().Block().Height();
1271     const InterpFilterParams *filter_params_x =
1272         av1_get_interp_filter_params_with_block_size(h_f, width);
1273     const InterpFilterParams *filter_params_y =
1274         av1_get_interp_filter_params_with_block_size(v_f, height);
1275     const uint8_t *input = FirstRandomInput8(GetParam());
1276     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1277     ConvolveParams conv_params1 =
1278         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1279     aom_usec_timer timer;
1280     aom_usec_timer_start(&timer);
1281     for (int i = 0; i < num_iters; ++i) {
1282       av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1283                            height, filter_params_x, filter_params_y, 0, 0,
1284                            &conv_params1);
1285     }
1286     aom_usec_timer_mark(&timer);
1287     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1288     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1289     ConvolveParams conv_params2 =
1290         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1291     aom_usec_timer_start(&timer);
1292     for (int i = 0; i < num_iters; ++i) {
1293       GetParam().TestFunction()(input, width, test, kOutputStride, width,
1294                                 height, filter_params_x, filter_params_y, 0, 0,
1295                                 &conv_params2);
1296     }
1297     aom_usec_timer_mark(&timer);
1298     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1299     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1300            time1, time2, time1 / time2);
1301   }
1302 };
1303 
TEST_P(AV1Convolve2DTest,RunTest)1304 TEST_P(AV1Convolve2DTest, RunTest) { RunTest(); }
1305 
TEST_P(AV1Convolve2DTest,DISABLED_SpeedTest)1306 TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) { SpeedTest(); }
1307 
1308 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DTest,
1309                          BuildLowbdParams(av1_convolve_2d_sr_c));
1310 
1311 #if HAVE_SSE2
1312 INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DTest,
1313                          BuildLowbdParams(av1_convolve_2d_sr_sse2));
1314 #endif
1315 
1316 #if HAVE_AVX2
1317 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DTest,
1318                          BuildLowbdParams(av1_convolve_2d_sr_avx2));
1319 #endif
1320 
1321 #if HAVE_NEON
1322 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest,
1323                          BuildLowbdParams(av1_convolve_2d_sr_neon));
1324 #endif
1325 
1326 #if HAVE_NEON_DOTPROD
1327 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest,
1328                          BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod));
1329 #endif
1330 
1331 #if HAVE_NEON_I8MM
1332 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest,
1333                          BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm));
1334 #endif
1335 
1336 #if HAVE_SVE2
1337 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DTest,
1338                          BuildLowbdParams(av1_convolve_2d_sr_sve2));
1339 #endif
1340 
1341 /////////////////////////////////////////////////////////////////
1342 // Single reference convolve-2D IntraBC functions (low bit-depth)
1343 /////////////////////////////////////////////////////////////////
1344 
1345 class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> {
1346  public:
RunTest()1347   void RunTest() {
1348     // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
1349     constexpr int kSubX = 8;
1350     constexpr int kSubY = 8;
1351     const int width = GetParam().Block().Width();
1352     const int height = GetParam().Block().Height();
1353     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1354     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1355     const uint8_t *input = FirstRandomInput8(GetParam());
1356 
1357     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1358     ConvolveParams conv_params1 =
1359         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1360     // Use a stride different from width to avoid potential storing errors that
1361     // would go undetected. The input buffer is filled using a padding of 12, so
1362     // the stride can be anywhere between width and width + 12.
1363     av1_convolve_2d_sr_intrabc_c(input, width + 2, reference, kOutputStride,
1364                                  width, height, filter_params_x,
1365                                  filter_params_y, kSubX, kSubY, &conv_params1);
1366 
1367     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1368     ConvolveParams conv_params2 =
1369         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1370     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1371                               height, filter_params_x, filter_params_y, kSubX,
1372                               kSubY, &conv_params2);
1373 
1374     AssertOutputBufferEq(reference, test, width, height);
1375   }
1376 
SpeedTest()1377   void SpeedTest() {
1378     constexpr int kNumIters = 10000;
1379     const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
1380     const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
1381     const int width = GetParam().Block().Width();
1382     const int height = GetParam().Block().Height();
1383     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1384     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1385     const uint8_t *input = FirstRandomInput8(GetParam());
1386 
1387     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1388     ConvolveParams conv_params1 =
1389         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1390     aom_usec_timer timer;
1391     aom_usec_timer_start(&timer);
1392     for (int i = 0; i < kNumIters; ++i) {
1393       av1_convolve_2d_sr_intrabc_c(input, width, reference, kOutputStride,
1394                                    width, height, filter_params_x,
1395                                    filter_params_y, 8, 8, &conv_params1);
1396     }
1397     aom_usec_timer_mark(&timer);
1398     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1399 
1400     convolve_2d_func test_func = GetParam().TestFunction();
1401     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1402     ConvolveParams conv_params2 =
1403         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1404     aom_usec_timer_start(&timer);
1405     for (int i = 0; i < kNumIters; ++i) {
1406       test_func(input, width, test, kOutputStride, width, height,
1407                 filter_params_x, filter_params_y, 8, 8, &conv_params2);
1408     }
1409     aom_usec_timer_mark(&timer);
1410     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1411 
1412     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1413            time1, time2, time1 / time2);
1414   }
1415 };
1416 
TEST_P(AV1Convolve2DIntraBCTest,RunTest)1417 TEST_P(AV1Convolve2DIntraBCTest, RunTest) { RunTest(); }
1418 
TEST_P(AV1Convolve2DIntraBCTest,DISABLED_SpeedTest)1419 TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1420 
1421 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DIntraBCTest,
1422                          BuildLowbdParams(av1_convolve_2d_sr_intrabc_c));
1423 
1424 #if HAVE_NEON
1425 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest,
1426                          BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon));
1427 #endif
1428 
1429 #if CONFIG_AV1_HIGHBITDEPTH
1430 //////////////////////////////////////////////////////////
1431 // Single reference convolve-2d functions (high bit-depth)
1432 //////////////////////////////////////////////////////////
1433 
1434 typedef void (*highbd_convolve_2d_func)(
1435     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
1436     int h, const InterpFilterParams *filter_params_x,
1437     const InterpFilterParams *filter_params_y, const int subpel_x_qn,
1438     const int subpel_y_qn, ConvolveParams *conv_params, int bd);
1439 
1440 class AV1Convolve2DHighbdTest
1441     : public AV1ConvolveTest<highbd_convolve_2d_func> {
1442  public:
RunTest()1443   void RunTest() {
1444     // Do not test the no-op filter.
1445     for (int sub_x = 1; sub_x < 16; ++sub_x) {
1446       for (int sub_y = 1; sub_y < 16; ++sub_y) {
1447         for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1448           for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1449             if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1450                 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1451               continue;
1452             TestConvolve(static_cast<InterpFilter>(h_f),
1453                          static_cast<InterpFilter>(v_f), sub_x, sub_y);
1454           }
1455         }
1456       }
1457     }
1458   }
1459 
1460  public:
SpeedTest()1461   void SpeedTest() {
1462     for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1463       for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1464         if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1465             ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1466           continue;
1467         TestConvolveSpeed(static_cast<InterpFilter>(h_f),
1468                           static_cast<InterpFilter>(v_f), 10000);
1469       }
1470     }
1471   }
1472 
1473  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)1474   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
1475                     const int sub_x, const int sub_y) {
1476     const int width = GetParam().Block().Width();
1477     const int height = GetParam().Block().Height();
1478     const int bit_depth = GetParam().BitDepth();
1479     const InterpFilterParams *filter_params_x =
1480         av1_get_interp_filter_params_with_block_size(h_f, width);
1481     const InterpFilterParams *filter_params_y =
1482         av1_get_interp_filter_params_with_block_size(v_f, height);
1483     const uint16_t *input = FirstRandomInput16(GetParam());
1484     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1485     ConvolveParams conv_params1 =
1486         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1487     av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1488                                 height, filter_params_x, filter_params_y, sub_x,
1489                                 sub_y, &conv_params1, bit_depth);
1490     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1491     ConvolveParams conv_params2 =
1492         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1493     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
1494                               filter_params_x, filter_params_y, sub_x, sub_y,
1495                               &conv_params2, bit_depth);
1496     AssertOutputBufferEq(reference, test, width, height);
1497   }
1498 
TestConvolveSpeed(const InterpFilter h_f,const InterpFilter v_f,int num_iters)1499   void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
1500                          int num_iters) {
1501     const int width = GetParam().Block().Width();
1502     const int height = GetParam().Block().Height();
1503     const int bit_depth = GetParam().BitDepth();
1504     const InterpFilterParams *filter_params_x =
1505         av1_get_interp_filter_params_with_block_size(h_f, width);
1506     const InterpFilterParams *filter_params_y =
1507         av1_get_interp_filter_params_with_block_size(v_f, height);
1508     const uint16_t *input = FirstRandomInput16(GetParam());
1509     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1510     ConvolveParams conv_params1 =
1511         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1512     aom_usec_timer timer;
1513     aom_usec_timer_start(&timer);
1514     for (int i = 0; i < num_iters; ++i) {
1515       av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1516                                   height, filter_params_x, filter_params_y, 0,
1517                                   0, &conv_params1, bit_depth);
1518     }
1519     aom_usec_timer_mark(&timer);
1520     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1521     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1522     ConvolveParams conv_params2 =
1523         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1524     aom_usec_timer_start(&timer);
1525     for (int i = 0; i < num_iters; ++i) {
1526       GetParam().TestFunction()(input, width, test, kOutputStride, width,
1527                                 height, filter_params_x, filter_params_y, 0, 0,
1528                                 &conv_params2, bit_depth);
1529     }
1530     aom_usec_timer_mark(&timer);
1531     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1532     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1533            time1, time2, time1 / time2);
1534   }
1535 };
1536 
TEST_P(AV1Convolve2DHighbdTest,RunTest)1537 TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); }
1538 
TEST_P(AV1Convolve2DHighbdTest,DISABLED_SpeedTest)1539 TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
1540 
1541 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest,
1542                          BuildHighbdParams(av1_highbd_convolve_2d_sr_c));
1543 
1544 #if HAVE_SSSE3
1545 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest,
1546                          BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3));
1547 #endif
1548 
1549 #if HAVE_AVX2
1550 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest,
1551                          BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2));
1552 #endif
1553 
1554 #if HAVE_NEON
1555 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest,
1556                          BuildHighbdParams(av1_highbd_convolve_2d_sr_neon));
1557 #endif
1558 
1559 #if HAVE_SVE2
1560 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DHighbdTest,
1561                          BuildHighbdParams(av1_highbd_convolve_2d_sr_sve2));
1562 #endif
1563 
1564 //////////////////////////////////////////////////////////////////
1565 // Single reference convolve-2d IntraBC functions (high bit-depth)
1566 //////////////////////////////////////////////////////////////////
1567 
1568 class AV1Convolve2DHighbdIntraBCTest
1569     : public AV1ConvolveTest<highbd_convolve_2d_func> {
1570  public:
RunTest()1571   void RunTest() {
1572     // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
1573     constexpr int kSubX = 8;
1574     constexpr int kSubY = 8;
1575     const int width = GetParam().Block().Width();
1576     const int height = GetParam().Block().Height();
1577     const int bit_depth = GetParam().BitDepth();
1578     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1579     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1580     const uint16_t *input = FirstRandomInput16(GetParam());
1581 
1582     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1583     ConvolveParams conv_params1 =
1584         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1585     // Use a stride different from width to avoid potential storing errors that
1586     // would go undetected. The input buffer is filled using a padding of 12, so
1587     // the stride can be anywhere between width and width + 12.
1588     av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference,
1589                                         kOutputStride, width, height,
1590                                         filter_params_x, filter_params_y, kSubX,
1591                                         kSubY, &conv_params1, bit_depth);
1592 
1593     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1594     ConvolveParams conv_params2 =
1595         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1596     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1597                               height, filter_params_x, filter_params_y, kSubX,
1598                               kSubY, &conv_params2, bit_depth);
1599 
1600     AssertOutputBufferEq(reference, test, width, height);
1601   }
1602 
SpeedTest()1603   void SpeedTest() {
1604     constexpr int kNumIters = 10000;
1605     const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
1606     const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
1607     const int width = GetParam().Block().Width();
1608     const int height = GetParam().Block().Height();
1609     const int bit_depth = GetParam().BitDepth();
1610     const InterpFilterParams *filter_params_x =
1611         av1_get_interp_filter_params_with_block_size(h_f, width);
1612     const InterpFilterParams *filter_params_y =
1613         av1_get_interp_filter_params_with_block_size(v_f, height);
1614     const uint16_t *input = FirstRandomInput16(GetParam());
1615 
1616     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1617     ConvolveParams conv_params1 =
1618         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1619     aom_usec_timer timer;
1620     aom_usec_timer_start(&timer);
1621     for (int i = 0; i < kNumIters; ++i) {
1622       av1_highbd_convolve_2d_sr_intrabc_c(
1623           input, width, reference, kOutputStride, width, height,
1624           filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth);
1625     }
1626     aom_usec_timer_mark(&timer);
1627     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1628 
1629     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1630     highbd_convolve_2d_func test_func = GetParam().TestFunction();
1631     ConvolveParams conv_params2 =
1632         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1633     aom_usec_timer_start(&timer);
1634     for (int i = 0; i < kNumIters; ++i) {
1635       test_func(input, width, test, kOutputStride, width, height,
1636                 filter_params_x, filter_params_y, 0, 0, &conv_params2,
1637                 bit_depth);
1638     }
1639     aom_usec_timer_mark(&timer);
1640     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1641 
1642     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1643            time1, time2, time1 / time2);
1644   }
1645 };
1646 
TEST_P(AV1Convolve2DHighbdIntraBCTest,RunTest)1647 TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); }
1648 
TEST_P(AV1Convolve2DHighbdIntraBCTest,DISABLED_SpeedTest)1649 TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1650 
1651 INSTANTIATE_TEST_SUITE_P(
1652     C, AV1Convolve2DHighbdIntraBCTest,
1653     BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c));
1654 
1655 #if HAVE_NEON
1656 INSTANTIATE_TEST_SUITE_P(
1657     NEON, AV1Convolve2DHighbdIntraBCTest,
1658     BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon));
1659 #endif
1660 
1661 #endif  // CONFIG_AV1_HIGHBITDEPTH
1662 
1663 //////////////////////////
1664 // Compound Convolve Tests
1665 //////////////////////////
1666 
1667 // The compound functions do not work for chroma block sizes. Provide
1668 // a function to generate test parameters for just luma block sizes.
1669 template <typename T>
GetLumaTestParams(std::initializer_list<int> bit_depths,T test_func)1670 std::vector<TestParam<T>> GetLumaTestParams(
1671     std::initializer_list<int> bit_depths, T test_func) {
1672   std::set<BlockSize> sizes;
1673   for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
1674     const int w = block_size_wide[b];
1675     const int h = block_size_high[b];
1676     sizes.insert(BlockSize(w, h));
1677   }
1678   std::vector<TestParam<T>> result;
1679   for (int bit_depth : bit_depths) {
1680     for (const auto &block : sizes) {
1681       result.push_back(TestParam<T>(block, bit_depth, test_func));
1682     }
1683   }
1684   return result;
1685 }
1686 
1687 template <typename T>
GetLowbdLumaTestParams(T test_func)1688 std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) {
1689   return GetLumaTestParams({ 8 }, test_func);
1690 }
1691 
1692 template <typename T>
BuildLowbdLumaParams(T test_func)1693 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams(
1694     T test_func) {
1695   return ::testing::ValuesIn(GetLowbdLumaTestParams(test_func));
1696 }
1697 
TEST_F(AV1ConvolveParametersTest,GetLowbdLumaTestParams)1698 TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) {
1699   auto v = GetLowbdLumaTestParams(av1_dist_wtd_convolve_x_c);
1700   ASSERT_EQ(22U, v.size());
1701   for (const auto &e : v) {
1702     ASSERT_EQ(8, e.BitDepth());
1703     bool same_fn = av1_dist_wtd_convolve_x_c == e.TestFunction();
1704     ASSERT_TRUE(same_fn);
1705   }
1706 }
1707 
1708 #if CONFIG_AV1_HIGHBITDEPTH
1709 template <typename T>
GetHighbdLumaTestParams(T test_func)1710 std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) {
1711   return GetLumaTestParams({ 10, 12 }, test_func);
1712 }
1713 
TEST_F(AV1ConvolveParametersTest,GetHighbdLumaTestParams)1714 TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) {
1715   auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c);
1716   ASSERT_EQ(44U, v.size());
1717   int num_10 = 0;
1718   int num_12 = 0;
1719   for (const auto &e : v) {
1720     ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth());
1721     bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction();
1722     ASSERT_TRUE(same_fn);
1723     if (e.BitDepth() == 10) {
1724       ++num_10;
1725     } else {
1726       ++num_12;
1727     }
1728   }
1729   ASSERT_EQ(num_10, num_12);
1730 }
1731 
1732 template <typename T>
BuildHighbdLumaParams(T test_func)1733 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams(
1734     T test_func) {
1735   return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func));
1736 }
1737 
1738 #endif  // CONFIG_AV1_HIGHBITDEPTH
1739 
1740 // Compound cases also need to test different frame offsets and weightings.
1741 class CompoundParam {
1742  public:
CompoundParam(bool use_dist_wtd_comp_avg,int fwd_offset,int bck_offset)1743   CompoundParam(bool use_dist_wtd_comp_avg, int fwd_offset, int bck_offset)
1744       : use_dist_wtd_comp_avg_(use_dist_wtd_comp_avg), fwd_offset_(fwd_offset),
1745         bck_offset_(bck_offset) {}
1746 
UseDistWtdCompAvg() const1747   bool UseDistWtdCompAvg() const { return use_dist_wtd_comp_avg_; }
FwdOffset() const1748   int FwdOffset() const { return fwd_offset_; }
BckOffset() const1749   int BckOffset() const { return bck_offset_; }
1750 
1751  private:
1752   bool use_dist_wtd_comp_avg_;
1753   int fwd_offset_;
1754   int bck_offset_;
1755 };
1756 
GetCompoundParams()1757 std::vector<CompoundParam> GetCompoundParams() {
1758   std::vector<CompoundParam> result;
1759   result.push_back(CompoundParam(false, 0, 0));
1760   for (int k = 0; k < 2; ++k) {
1761     for (int l = 0; l < 4; ++l) {
1762       result.push_back(CompoundParam(true, quant_dist_lookup_table[l][k],
1763                                      quant_dist_lookup_table[l][1 - k]));
1764     }
1765   }
1766   return result;
1767 }
1768 
TEST_F(AV1ConvolveParametersTest,GetCompoundParams)1769 TEST_F(AV1ConvolveParametersTest, GetCompoundParams) {
1770   auto v = GetCompoundParams();
1771   ASSERT_EQ(9U, v.size());
1772   ASSERT_FALSE(v[0].UseDistWtdCompAvg());
1773   for (size_t i = 1; i < v.size(); ++i) {
1774     ASSERT_TRUE(v[i].UseDistWtdCompAvg());
1775   }
1776 }
1777 
1778 ////////////////////////////////////////////////
1779 // Compound convolve-x functions (low bit-depth)
1780 ////////////////////////////////////////////////
1781 
GetConvolveParams(int do_average,CONV_BUF_TYPE * conv_buf,int width,int bit_depth,const CompoundParam & compound)1782 ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf,
1783                                  int width, int bit_depth,
1784                                  const CompoundParam &compound) {
1785   ConvolveParams conv_params =
1786       get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth);
1787   conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg();
1788   conv_params.fwd_offset = compound.FwdOffset();
1789   conv_params.bck_offset = compound.BckOffset();
1790   return conv_params;
1791 }
1792 
1793 class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> {
1794  public:
RunTest()1795   void RunTest() {
1796     auto compound_params = GetCompoundParams();
1797     // Do not test the no-op filter.
1798     for (int sub_pix = 1; sub_pix < 16; ++sub_pix) {
1799       for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
1800         for (const auto &c : compound_params) {
1801           TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
1802         }
1803       }
1804     }
1805   }
1806 
1807  protected:
FilterParams(InterpFilter f,const BlockSize & block) const1808   virtual const InterpFilterParams *FilterParams(InterpFilter f,
1809                                                  const BlockSize &block) const {
1810     return av1_get_interp_filter_params_with_block_size(f, block.Width());
1811   }
1812 
ReferenceFunc() const1813   virtual convolve_x_func ReferenceFunc() const {
1814     return av1_dist_wtd_convolve_x_c;
1815   }
1816 
1817  private:
TestConvolve(const int sub_pix,const InterpFilter filter,const CompoundParam & compound)1818   void TestConvolve(const int sub_pix, const InterpFilter filter,
1819                     const CompoundParam &compound) {
1820     const int width = GetParam().Block().Width();
1821     const int height = GetParam().Block().Height();
1822     const uint8_t *input1 = FirstRandomInput8(GetParam());
1823     const uint8_t *input2 = SecondRandomInput8(GetParam());
1824     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1825     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
1826     Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
1827              compound, sub_pix, filter);
1828 
1829     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1830     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
1831     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
1832              compound, sub_pix, filter);
1833 
1834     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
1835     AssertOutputBufferEq(reference, test, width, height);
1836   }
1837 
1838  private:
Convolve(convolve_x_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,CONV_BUF_TYPE * conv_buf,const CompoundParam & compound,const int sub_pix,const InterpFilter filter)1839   void Convolve(convolve_x_func test_func, const uint8_t *src1,
1840                 const uint8_t *src2, uint8_t *dst, CONV_BUF_TYPE *conv_buf,
1841                 const CompoundParam &compound, const int sub_pix,
1842                 const InterpFilter filter) {
1843     const int width = GetParam().Block().Width();
1844     const int height = GetParam().Block().Height();
1845     const InterpFilterParams *filter_params =
1846         FilterParams(filter, GetParam().Block());
1847 
1848     ConvolveParams conv_params =
1849         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
1850     test_func(src1, width, dst, kOutputStride, width, height, filter_params,
1851               sub_pix, &conv_params);
1852 
1853     conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
1854     test_func(src2, width, dst, kOutputStride, width, height, filter_params,
1855               sub_pix, &conv_params);
1856   }
1857 };
1858 
TEST_P(AV1ConvolveXCompoundTest,RunTest)1859 TEST_P(AV1ConvolveXCompoundTest, RunTest) { RunTest(); }
1860 
1861 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXCompoundTest,
1862                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_c));
1863 
1864 #if HAVE_SSE2
1865 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXCompoundTest,
1866                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_sse2));
1867 #endif
1868 
1869 #if HAVE_AVX2
1870 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXCompoundTest,
1871                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_avx2));
1872 #endif
1873 
1874 #if HAVE_NEON
1875 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest,
1876                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon));
1877 #endif
1878 
1879 #if HAVE_NEON_DOTPROD
1880 INSTANTIATE_TEST_SUITE_P(
1881     NEON_DOTPROD, AV1ConvolveXCompoundTest,
1882     BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod));
1883 #endif
1884 
1885 #if HAVE_NEON_I8MM
1886 INSTANTIATE_TEST_SUITE_P(
1887     NEON_I8MM, AV1ConvolveXCompoundTest,
1888     BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm));
1889 #endif
1890 
1891 #if CONFIG_AV1_HIGHBITDEPTH
1892 /////////////////////////////////////////////////
1893 // Compound convolve-x functions (high bit-depth)
1894 /////////////////////////////////////////////////
1895 class AV1ConvolveXHighbdCompoundTest
1896     : public AV1ConvolveTest<highbd_convolve_x_func> {
1897  public:
RunTest()1898   void RunTest() {
1899     auto compound_params = GetCompoundParams();
1900     // Do not test the no-op filter.
1901     for (int sub_pix = 1; sub_pix < 16; ++sub_pix) {
1902       for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
1903         for (const auto &c : compound_params) {
1904           TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
1905         }
1906       }
1907     }
1908   }
1909 
1910  protected:
FilterParams(InterpFilter f,const BlockSize & block) const1911   virtual const InterpFilterParams *FilterParams(InterpFilter f,
1912                                                  const BlockSize &block) const {
1913     return av1_get_interp_filter_params_with_block_size(f, block.Width());
1914   }
1915 
ReferenceFunc() const1916   virtual highbd_convolve_x_func ReferenceFunc() const {
1917     return av1_highbd_dist_wtd_convolve_x_c;
1918   }
1919 
1920  private:
TestConvolve(const int sub_pix,const InterpFilter filter,const CompoundParam & compound)1921   void TestConvolve(const int sub_pix, const InterpFilter filter,
1922                     const CompoundParam &compound) {
1923     const int width = GetParam().Block().Width();
1924     const int height = GetParam().Block().Height();
1925 
1926     const uint16_t *input1 = FirstRandomInput16(GetParam());
1927     const uint16_t *input2 = SecondRandomInput16(GetParam());
1928     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1929     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
1930     Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
1931              compound, sub_pix, filter);
1932 
1933     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1934     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
1935     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
1936              compound, sub_pix, filter);
1937 
1938     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
1939     AssertOutputBufferEq(reference, test, width, height);
1940   }
1941 
Convolve(highbd_convolve_x_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,CONV_BUF_TYPE * conv_buf,const CompoundParam & compound,const int sub_pix,const InterpFilter filter)1942   void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1,
1943                 const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf,
1944                 const CompoundParam &compound, const int sub_pix,
1945                 const InterpFilter filter) {
1946     const int width = GetParam().Block().Width();
1947     const int height = GetParam().Block().Height();
1948     const int bit_depth = GetParam().BitDepth();
1949     const InterpFilterParams *filter_params =
1950         FilterParams(filter, GetParam().Block());
1951     ConvolveParams conv_params =
1952         GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
1953     test_func(src1, width, dst, kOutputStride, width, height, filter_params,
1954               sub_pix, &conv_params, bit_depth);
1955     conv_params =
1956         GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
1957     test_func(src2, width, dst, kOutputStride, width, height, filter_params,
1958               sub_pix, &conv_params, bit_depth);
1959   }
1960 };
1961 
TEST_P(AV1ConvolveXHighbdCompoundTest,RunTest)1962 TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); }
1963 
1964 INSTANTIATE_TEST_SUITE_P(
1965     C, AV1ConvolveXHighbdCompoundTest,
1966     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c));
1967 
1968 #if HAVE_SSE4_1
1969 INSTANTIATE_TEST_SUITE_P(
1970     SSE4_1, AV1ConvolveXHighbdCompoundTest,
1971     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1));
1972 #endif
1973 
1974 #if HAVE_AVX2
1975 INSTANTIATE_TEST_SUITE_P(
1976     AVX2, AV1ConvolveXHighbdCompoundTest,
1977     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2));
1978 #endif
1979 
1980 #if HAVE_NEON
1981 INSTANTIATE_TEST_SUITE_P(
1982     NEON, AV1ConvolveXHighbdCompoundTest,
1983     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon));
1984 #endif
1985 
1986 #if HAVE_SVE2
1987 INSTANTIATE_TEST_SUITE_P(
1988     SVE2, AV1ConvolveXHighbdCompoundTest,
1989     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sve2));
1990 #endif
1991 
1992 #endif  // CONFIG_AV1_HIGHBITDEPTH
1993 
1994 ////////////////////////////////////////////////
1995 // Compound convolve-y functions (low bit-depth)
1996 ////////////////////////////////////////////////
1997 
1998 // Note that the X and Y convolve functions have the same type signature and
1999 // logic; they only differentiate the filter parameters and reference function.
2000 class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest {
2001  protected:
FilterParams(InterpFilter f,const BlockSize & block) const2002   const InterpFilterParams *FilterParams(
2003       InterpFilter f, const BlockSize &block) const override {
2004     return av1_get_interp_filter_params_with_block_size(f, block.Height());
2005   }
2006 
ReferenceFunc() const2007   convolve_x_func ReferenceFunc() const override {
2008     return av1_dist_wtd_convolve_y_c;
2009   }
2010 };
2011 
TEST_P(AV1ConvolveYCompoundTest,RunTest)2012 TEST_P(AV1ConvolveYCompoundTest, RunTest) { RunTest(); }
2013 
2014 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYCompoundTest,
2015                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_c));
2016 
2017 #if HAVE_SSE2
2018 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYCompoundTest,
2019                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_sse2));
2020 #endif
2021 
2022 #if HAVE_AVX2
2023 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYCompoundTest,
2024                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_avx2));
2025 #endif
2026 
2027 #if HAVE_NEON
2028 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest,
2029                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon));
2030 #endif
2031 
2032 #if CONFIG_AV1_HIGHBITDEPTH
2033 /////////////////////////////////////////////////
2034 // Compound convolve-y functions (high bit-depth)
2035 /////////////////////////////////////////////////
2036 
2037 // Again, the X and Y convolve functions have the same type signature and logic.
2038 class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest {
ReferenceFunc() const2039   highbd_convolve_x_func ReferenceFunc() const override {
2040     return av1_highbd_dist_wtd_convolve_y_c;
2041   }
FilterParams(InterpFilter f,const BlockSize & block) const2042   const InterpFilterParams *FilterParams(
2043       InterpFilter f, const BlockSize &block) const override {
2044     return av1_get_interp_filter_params_with_block_size(f, block.Height());
2045   }
2046 };
2047 
TEST_P(AV1ConvolveYHighbdCompoundTest,RunTest)2048 TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); }
2049 
2050 INSTANTIATE_TEST_SUITE_P(
2051     C, AV1ConvolveYHighbdCompoundTest,
2052     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c));
2053 
2054 #if HAVE_SSE4_1
2055 INSTANTIATE_TEST_SUITE_P(
2056     SSE4_1, AV1ConvolveYHighbdCompoundTest,
2057     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1));
2058 #endif
2059 
2060 #if HAVE_AVX2
2061 INSTANTIATE_TEST_SUITE_P(
2062     AVX2, AV1ConvolveYHighbdCompoundTest,
2063     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2));
2064 #endif
2065 
2066 #if HAVE_NEON
2067 INSTANTIATE_TEST_SUITE_P(
2068     NEON, AV1ConvolveYHighbdCompoundTest,
2069     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon));
2070 #endif
2071 
2072 #if HAVE_SVE2
2073 INSTANTIATE_TEST_SUITE_P(
2074     SVE2, AV1ConvolveYHighbdCompoundTest,
2075     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sve2));
2076 #endif
2077 
2078 #endif  // CONFIG_AV1_HIGHBITDEPTH
2079 
2080 //////////////////////////////////////////////////////
2081 // Compound convolve-2d-copy functions (low bit-depth)
2082 //////////////////////////////////////////////////////
2083 typedef void (*compound_conv_2d_copy_func)(const uint8_t *src, int src_stride,
2084                                            uint8_t *dst, int dst_stride, int w,
2085                                            int h, ConvolveParams *conv_params);
2086 
2087 class AV1Convolve2DCopyCompoundTest
2088     : public AV1ConvolveTest<compound_conv_2d_copy_func> {
2089  public:
RunTest()2090   void RunTest() {
2091     auto compound_params = GetCompoundParams();
2092     for (const auto &compound : compound_params) {
2093       TestConvolve(compound);
2094     }
2095   }
SpeedTest()2096   void SpeedTest() {
2097     for (const auto &compound : GetCompoundParams()) {
2098       TestConvolveSpeed(compound, 100000);
2099     }
2100   }
2101 
2102  private:
TestConvolve(const CompoundParam & compound)2103   void TestConvolve(const CompoundParam &compound) {
2104     const BlockSize &block = GetParam().Block();
2105     const int width = block.Width();
2106     const int height = block.Height();
2107 
2108     const uint8_t *input1 = FirstRandomInput8(GetParam());
2109     const uint8_t *input2 = SecondRandomInput8(GetParam());
2110     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
2111     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2112     Convolve(av1_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
2113              reference_conv_buf, compound);
2114 
2115     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
2116     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2117     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2118              compound);
2119 
2120     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2121     AssertOutputBufferEq(reference, test, width, height);
2122   }
2123 
TestConvolveSpeed(const CompoundParam & compound,const int num_iters)2124   void TestConvolveSpeed(const CompoundParam &compound, const int num_iters) {
2125     const int width = GetParam().Block().Width();
2126     const int height = GetParam().Block().Height();
2127 
2128     const uint8_t *src0 = FirstRandomInput8(GetParam());
2129     const uint8_t *src1 = SecondRandomInput8(GetParam());
2130     DECLARE_ALIGNED(32, uint8_t, dst[MAX_SB_SQUARE]);
2131     DECLARE_ALIGNED(32, CONV_BUF_TYPE, conv_buf[MAX_SB_SQUARE]);
2132 
2133     const auto test_func = GetParam().TestFunction();
2134 
2135     ConvolveParams conv_params_0 =
2136         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2137     ConvolveParams conv_params_1 =
2138         GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2139 
2140     aom_usec_timer timer;
2141     aom_usec_timer_start(&timer);
2142     for (int i = 0; i < num_iters; ++i) {
2143       av1_dist_wtd_convolve_2d_copy_c(src0, width, dst, kOutputStride, width,
2144                                       height, &conv_params_0);
2145       av1_dist_wtd_convolve_2d_copy_c(src1, width, dst, kOutputStride, width,
2146                                       height, &conv_params_1);
2147     }
2148     aom_usec_timer_mark(&timer);
2149     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
2150 
2151     aom_usec_timer_start(&timer);
2152     for (int i = 0; i < num_iters; ++i) {
2153       test_func(src0, width, dst, kOutputStride, width, height, &conv_params_0);
2154       test_func(src1, width, dst, kOutputStride, width, height, &conv_params_1);
2155     }
2156     aom_usec_timer_mark(&timer);
2157     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
2158     printf("Dist Weighted: %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n",
2159            compound.UseDistWtdCompAvg(), width, height, time1, time2,
2160            time1 / time2);
2161   }
2162 
Convolve(compound_conv_2d_copy_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,uint16_t * conv_buf,const CompoundParam & compound)2163   void Convolve(compound_conv_2d_copy_func test_func, const uint8_t *src1,
2164                 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
2165                 const CompoundParam &compound) {
2166     const BlockSize &block = GetParam().Block();
2167     const int width = block.Width();
2168     const int height = block.Height();
2169     ConvolveParams conv_params =
2170         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2171     test_func(src1, width, dst, kOutputStride, width, height, &conv_params);
2172 
2173     conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2174     test_func(src2, width, dst, kOutputStride, width, height, &conv_params);
2175   }
2176 };
2177 
TEST_P(AV1Convolve2DCopyCompoundTest,RunTest)2178 TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) { RunTest(); }
TEST_P(AV1Convolve2DCopyCompoundTest,DISABLED_SpeedTest)2179 TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) { SpeedTest(); }
2180 
2181 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCopyCompoundTest,
2182                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_c));
2183 
2184 #if HAVE_SSE2
2185 INSTANTIATE_TEST_SUITE_P(
2186     SSE2, AV1Convolve2DCopyCompoundTest,
2187     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_sse2));
2188 #endif
2189 
2190 #if HAVE_AVX2
2191 INSTANTIATE_TEST_SUITE_P(
2192     AVX2, AV1Convolve2DCopyCompoundTest,
2193     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_avx2));
2194 #endif
2195 
2196 #if HAVE_NEON
2197 INSTANTIATE_TEST_SUITE_P(
2198     NEON, AV1Convolve2DCopyCompoundTest,
2199     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon));
2200 #endif
2201 
2202 #if CONFIG_AV1_HIGHBITDEPTH
2203 ///////////////////////////////////////////////////////
2204 // Compound convolve-2d-copy functions (high bit-depth)
2205 ///////////////////////////////////////////////////////
2206 typedef void (*highbd_compound_conv_2d_copy_func)(const uint16_t *src,
2207                                                   int src_stride, uint16_t *dst,
2208                                                   int dst_stride, int w, int h,
2209                                                   ConvolveParams *conv_params,
2210                                                   int bd);
2211 
2212 class AV1Convolve2DCopyHighbdCompoundTest
2213     : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> {
2214  public:
RunTest()2215   void RunTest() {
2216     auto compound_params = GetCompoundParams();
2217     for (const auto &compound : compound_params) {
2218       TestConvolve(compound);
2219     }
2220   }
2221 
2222  private:
TestConvolve(const CompoundParam & compound)2223   void TestConvolve(const CompoundParam &compound) {
2224     const BlockSize &block = GetParam().Block();
2225     const int width = block.Width();
2226     const int height = block.Height();
2227 
2228     const uint16_t *input1 = FirstRandomInput16(GetParam());
2229     const uint16_t *input2 = SecondRandomInput16(GetParam());
2230     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
2231     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2232     Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
2233              reference_conv_buf, compound);
2234 
2235     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
2236     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2237     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2238              compound);
2239 
2240     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2241     AssertOutputBufferEq(reference, test, width, height);
2242   }
2243 
Convolve(highbd_compound_conv_2d_copy_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,uint16_t * conv_buf,const CompoundParam & compound)2244   void Convolve(highbd_compound_conv_2d_copy_func test_func,
2245                 const uint16_t *src1, const uint16_t *src2, uint16_t *dst,
2246                 uint16_t *conv_buf, const CompoundParam &compound) {
2247     const BlockSize &block = GetParam().Block();
2248     const int width = block.Width();
2249     const int height = block.Height();
2250     const int bit_depth = GetParam().BitDepth();
2251 
2252     ConvolveParams conv_params =
2253         GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
2254     test_func(src1, width, dst, kOutputStride, width, height, &conv_params,
2255               bit_depth);
2256 
2257     conv_params =
2258         GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
2259     test_func(src2, width, dst, kOutputStride, width, height, &conv_params,
2260               bit_depth);
2261   }
2262 };
2263 
TEST_P(AV1Convolve2DCopyHighbdCompoundTest,RunTest)2264 TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); }
2265 
2266 INSTANTIATE_TEST_SUITE_P(
2267     C, AV1Convolve2DCopyHighbdCompoundTest,
2268     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c));
2269 
2270 #if HAVE_SSE4_1
2271 INSTANTIATE_TEST_SUITE_P(
2272     SSE4_1, AV1Convolve2DCopyHighbdCompoundTest,
2273     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1));
2274 #endif
2275 
2276 #if HAVE_AVX2
2277 INSTANTIATE_TEST_SUITE_P(
2278     AVX2, AV1Convolve2DCopyHighbdCompoundTest,
2279     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2));
2280 #endif
2281 
2282 #if HAVE_NEON
2283 INSTANTIATE_TEST_SUITE_P(
2284     NEON, AV1Convolve2DCopyHighbdCompoundTest,
2285     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon));
2286 #endif
2287 
2288 #endif  // CONFIG_AV1_HIGHBITDEPTH
2289 
2290 /////////////////////////////////////////////////
2291 // Compound convolve-2d functions (low bit-depth)
2292 /////////////////////////////////////////////////
2293 
2294 class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> {
2295  public:
RunTest()2296   void RunTest() {
2297     auto compound_params = GetCompoundParams();
2298     for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
2299       for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
2300         // Do not test the no-op filter.
2301         for (int sub_x = 1; sub_x < 16; ++sub_x) {
2302           for (int sub_y = 1; sub_y < 16; ++sub_y) {
2303             for (const auto &compound : compound_params) {
2304               TestConvolve(static_cast<InterpFilter>(h_f),
2305                            static_cast<InterpFilter>(v_f), sub_x, sub_y,
2306                            compound);
2307             }
2308           }
2309         }
2310       }
2311     }
2312   }
2313 
2314  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y,const CompoundParam & compound)2315   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
2316                     const int sub_x, const int sub_y,
2317                     const CompoundParam &compound) {
2318     const BlockSize &block = GetParam().Block();
2319     const int width = block.Width();
2320     const int height = block.Height();
2321 
2322     const uint8_t *input1 = FirstRandomInput8(GetParam());
2323     const uint8_t *input2 = SecondRandomInput8(GetParam());
2324     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
2325     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2326     Convolve(av1_dist_wtd_convolve_2d_c, input1, input2, reference,
2327              reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
2328 
2329     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
2330     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2331     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2332              compound, h_f, v_f, sub_x, sub_y);
2333 
2334     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2335     AssertOutputBufferEq(reference, test, width, height);
2336   }
2337 
2338  private:
Convolve(convolve_2d_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,uint16_t * conv_buf,const CompoundParam & compound,const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)2339   void Convolve(convolve_2d_func test_func, const uint8_t *src1,
2340                 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
2341                 const CompoundParam &compound, const InterpFilter h_f,
2342                 const InterpFilter v_f, const int sub_x, const int sub_y) {
2343     const BlockSize &block = GetParam().Block();
2344     const int width = block.Width();
2345     const int height = block.Height();
2346 
2347     const InterpFilterParams *filter_params_x =
2348         av1_get_interp_filter_params_with_block_size(h_f, width);
2349     const InterpFilterParams *filter_params_y =
2350         av1_get_interp_filter_params_with_block_size(v_f, height);
2351     ConvolveParams conv_params =
2352         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2353 
2354     test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
2355               filter_params_y, sub_x, sub_y, &conv_params);
2356 
2357     conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2358     test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
2359               filter_params_y, sub_x, sub_y, &conv_params);
2360   }
2361 };
2362 
TEST_P(AV1Convolve2DCompoundTest,RunTest)2363 TEST_P(AV1Convolve2DCompoundTest, RunTest) { RunTest(); }
2364 
2365 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCompoundTest,
2366                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_c));
2367 
2368 #if HAVE_SSSE3
2369 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DCompoundTest,
2370                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_ssse3));
2371 #endif
2372 
2373 #if HAVE_AVX2
2374 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DCompoundTest,
2375                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_avx2));
2376 #endif
2377 
2378 #if HAVE_NEON
2379 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest,
2380                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon));
2381 #endif
2382 
2383 #if HAVE_NEON_DOTPROD
2384 INSTANTIATE_TEST_SUITE_P(
2385     NEON_DOTPROD, AV1Convolve2DCompoundTest,
2386     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod));
2387 #endif
2388 
2389 #if HAVE_NEON_I8MM
2390 INSTANTIATE_TEST_SUITE_P(
2391     NEON_I8MM, AV1Convolve2DCompoundTest,
2392     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm));
2393 #endif
2394 
2395 #if CONFIG_AV1_HIGHBITDEPTH
2396 //////////////////////////////////////////////////
2397 // Compound convolve-2d functions (high bit-depth)
2398 //////////////////////////////////////////////////
2399 
2400 class AV1Convolve2DHighbdCompoundTest
2401     : public AV1ConvolveTest<highbd_convolve_2d_func> {
2402  public:
RunTest()2403   void RunTest() {
2404     auto compound_params = GetCompoundParams();
2405     for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
2406       for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
2407         // Do not test the no-op filter.
2408         for (int sub_x = 1; sub_x < 16; ++sub_x) {
2409           for (int sub_y = 1; sub_y < 16; ++sub_y) {
2410             for (const auto &compound : compound_params) {
2411               TestConvolve(static_cast<InterpFilter>(h_f),
2412                            static_cast<InterpFilter>(v_f), sub_x, sub_y,
2413                            compound);
2414             }
2415           }
2416         }
2417       }
2418     }
2419   }
2420 
2421  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y,const CompoundParam & compound)2422   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
2423                     const int sub_x, const int sub_y,
2424                     const CompoundParam &compound) {
2425     const BlockSize &block = GetParam().Block();
2426     const int width = block.Width();
2427     const int height = block.Height();
2428     const uint16_t *input1 = FirstRandomInput16(GetParam());
2429     const uint16_t *input2 = SecondRandomInput16(GetParam());
2430     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
2431     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2432     Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference,
2433              reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
2434 
2435     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
2436     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2437     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2438              compound, h_f, v_f, sub_x, sub_y);
2439 
2440     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2441     AssertOutputBufferEq(reference, test, width, height);
2442   }
2443 
2444  private:
Convolve(highbd_convolve_2d_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,uint16_t * conv_buf,const CompoundParam & compound,const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)2445   void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1,
2446                 const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf,
2447                 const CompoundParam &compound, const InterpFilter h_f,
2448                 const InterpFilter v_f, const int sub_x, const int sub_y) {
2449     const BlockSize &block = GetParam().Block();
2450     const int width = block.Width();
2451     const int height = block.Height();
2452 
2453     const InterpFilterParams *filter_params_x =
2454         av1_get_interp_filter_params_with_block_size(h_f, width);
2455     const InterpFilterParams *filter_params_y =
2456         av1_get_interp_filter_params_with_block_size(v_f, height);
2457     const int bit_depth = GetParam().BitDepth();
2458     ConvolveParams conv_params =
2459         GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
2460     test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
2461               filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
2462 
2463     conv_params =
2464         GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
2465     test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
2466               filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
2467   }
2468 };
2469 
TEST_P(AV1Convolve2DHighbdCompoundTest,RunTest)2470 TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); }
2471 
2472 INSTANTIATE_TEST_SUITE_P(
2473     C, AV1Convolve2DHighbdCompoundTest,
2474     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c));
2475 
2476 #if HAVE_SSE4_1
2477 INSTANTIATE_TEST_SUITE_P(
2478     SSE4_1, AV1Convolve2DHighbdCompoundTest,
2479     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1));
2480 #endif
2481 
2482 #if HAVE_AVX2
2483 INSTANTIATE_TEST_SUITE_P(
2484     AVX2, AV1Convolve2DHighbdCompoundTest,
2485     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2));
2486 #endif
2487 
2488 #if HAVE_NEON
2489 INSTANTIATE_TEST_SUITE_P(
2490     NEON, AV1Convolve2DHighbdCompoundTest,
2491     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon));
2492 #endif
2493 
2494 #if HAVE_SVE2
2495 INSTANTIATE_TEST_SUITE_P(
2496     SVE2, AV1Convolve2DHighbdCompoundTest,
2497     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sve2));
2498 #endif
2499 
2500 #endif  // CONFIG_AV1_HIGHBITDEPTH
2501 
2502 }  // namespace
2503