xref: /aosp_15_r20/external/libaom/test/hiprec_convolve_test_util.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "test/hiprec_convolve_test_util.h"
13 
14 #include <memory>
15 #include <new>
16 
17 #include "av1/common/restoration.h"
18 
19 using std::make_tuple;
20 using std::tuple;
21 
22 namespace libaom_test {
23 
24 // Generate a random pair of filter kernels, using the ranges
25 // of possible values from the loop-restoration experiment
generate_kernels(ACMRandom * rnd,InterpKernel hkernel,InterpKernel vkernel,int kernel_type=2)26 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
27                              InterpKernel vkernel, int kernel_type = 2) {
28   if (kernel_type == 0) {
29     // Low possible values for filter coefficients, 7-tap kernel
30     hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV;
31     hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
32     hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
33     hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
34     hkernel[7] = vkernel[7] = 0;
35   } else if (kernel_type == 1) {
36     // Max possible values for filter coefficients, 7-tap kernel
37     hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV;
38     hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
39     hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
40     hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
41     hkernel[7] = vkernel[7] = 0;
42   } else if (kernel_type == 2) {
43     // Randomly generated values for filter coefficients, 7-tap kernel
44     hkernel[0] = hkernel[6] =
45         WIENER_FILT_TAP0_MINV +
46         rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
47     hkernel[1] = hkernel[5] =
48         WIENER_FILT_TAP1_MINV +
49         rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
50     hkernel[2] = hkernel[4] =
51         WIENER_FILT_TAP2_MINV +
52         rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
53     hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
54     hkernel[7] = 0;
55 
56     vkernel[0] = vkernel[6] =
57         WIENER_FILT_TAP0_MINV +
58         rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV);
59     vkernel[1] = vkernel[5] =
60         WIENER_FILT_TAP1_MINV +
61         rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
62     vkernel[2] = vkernel[4] =
63         WIENER_FILT_TAP2_MINV +
64         rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
65     vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
66     vkernel[7] = 0;
67   } else if (kernel_type == 3) {
68     // Low possible values for filter coefficients, 5-tap kernel
69     hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
70     hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
71     hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
72     hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
73     hkernel[7] = vkernel[7] = 0;
74   } else if (kernel_type == 4) {
75     // Max possible values for filter coefficients, 5-tap kernel
76     hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
77     hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
78     hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
79     hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
80     hkernel[7] = vkernel[7] = 0;
81   } else {
82     // Randomly generated values for filter coefficients, 5-tap kernel
83     hkernel[0] = hkernel[6] = 0;
84     hkernel[1] = hkernel[5] =
85         WIENER_FILT_TAP1_MINV +
86         rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
87     hkernel[2] = hkernel[4] =
88         WIENER_FILT_TAP2_MINV +
89         rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
90     hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
91     hkernel[7] = 0;
92 
93     vkernel[0] = vkernel[6] = 0;
94     vkernel[1] = vkernel[5] =
95         WIENER_FILT_TAP1_MINV +
96         rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
97     vkernel[2] = vkernel[4] =
98         WIENER_FILT_TAP2_MINV +
99         rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
100     vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
101     vkernel[7] = 0;
102   }
103 }
104 
105 namespace AV1HiprecConvolve {
106 
BuildParams(hiprec_convolve_func filter)107 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
108     hiprec_convolve_func filter) {
109   const HiprecConvolveParam params[] = {
110     make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
111     make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
112     make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
113     make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
114     make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
115     make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
116     make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
117   };
118   return ::testing::ValuesIn(params);
119 }
120 
121 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() = default;
SetUp()122 void AV1HiprecConvolveTest::SetUp() {
123   rnd_.Reset(ACMRandom::DeterministicSeed());
124 }
125 
RunCheckOutput(hiprec_convolve_func test_impl)126 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
127   const int w = 128, h = 128;
128   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
129   const int num_iters = GET_PARAM(2);
130   int i, j, k, m;
131   const WienerConvolveParams conv_params = get_conv_params_wiener(8);
132 
133   std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
134   ASSERT_NE(input_, nullptr);
135   uint8_t *input = input_.get();
136 
137   // The AVX2 convolve functions always write rows with widths that are
138   // multiples of 16. So to avoid a buffer overflow, we may need to pad
139   // rows to a multiple of 16.
140   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
141   std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
142   ASSERT_NE(output, nullptr);
143   std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
144   ASSERT_NE(output2, nullptr);
145 
146   // Generate random filter kernels
147   DECLARE_ALIGNED(16, InterpKernel, hkernel);
148   DECLARE_ALIGNED(16, InterpKernel, vkernel);
149 
150   for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
151     generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
152     for (i = 0; i < num_iters; ++i) {
153       for (k = 0; k < h; ++k)
154         for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8();
155       // Choose random locations within the source block
156       int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
157       int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
158       av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w,
159                                     output.get(), out_w, hkernel, 16, vkernel,
160                                     16, out_w, out_h, &conv_params);
161       test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w,
162                 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params);
163 
164       for (j = 0; j < out_w * out_h; ++j)
165         ASSERT_EQ(output[j], output2[j])
166             << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
167             << (j / out_w) << ") on iteration " << i;
168     }
169   }
170 }
171 
RunSpeedTest(hiprec_convolve_func test_impl)172 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
173   const int w = 128, h = 128;
174   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
175   const int num_iters = GET_PARAM(2) / 500;
176   int i, j, k;
177   const WienerConvolveParams conv_params = get_conv_params_wiener(8);
178 
179   std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
180   ASSERT_NE(input_, nullptr);
181   uint8_t *input = input_.get();
182 
183   // The AVX2 convolve functions always write rows with widths that are
184   // multiples of 16. So to avoid a buffer overflow, we may need to pad
185   // rows to a multiple of 16.
186   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
187   std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
188   ASSERT_NE(output, nullptr);
189   std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
190   ASSERT_NE(output2, nullptr);
191 
192   // Generate random filter kernels
193   DECLARE_ALIGNED(16, InterpKernel, hkernel);
194   DECLARE_ALIGNED(16, InterpKernel, vkernel);
195 
196   generate_kernels(&rnd_, hkernel, vkernel);
197 
198   for (i = 0; i < h; ++i)
199     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
200 
201   aom_usec_timer ref_timer;
202   aom_usec_timer_start(&ref_timer);
203   for (i = 0; i < num_iters; ++i) {
204     for (j = 3; j < h - out_h - 4; j++) {
205       for (k = 3; k < w - out_w - 4; k++) {
206         av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w,
207                                       hkernel, 16, vkernel, 16, out_w, out_h,
208                                       &conv_params);
209       }
210     }
211   }
212   aom_usec_timer_mark(&ref_timer);
213   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
214 
215   aom_usec_timer tst_timer;
216   aom_usec_timer_start(&tst_timer);
217   for (i = 0; i < num_iters; ++i) {
218     for (j = 3; j < h - out_h - 4; j++) {
219       for (k = 3; k < w - out_w - 4; k++) {
220         test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16,
221                   vkernel, 16, out_w, out_h, &conv_params);
222       }
223     }
224   }
225   aom_usec_timer_mark(&tst_timer);
226   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
227 
228   std::cout << "[          ] C time = " << ref_time / 1000
229             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
230 
231   EXPECT_GT(ref_time, tst_time)
232       << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
233       << "C time: " << ref_time << " us\n"
234       << "SIMD time: " << tst_time << " us\n";
235 }
236 }  // namespace AV1HiprecConvolve
237 
238 #if CONFIG_AV1_HIGHBITDEPTH
239 namespace AV1HighbdHiprecConvolve {
240 
BuildParams(highbd_hiprec_convolve_func filter)241 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
242     highbd_hiprec_convolve_func filter) {
243   const HighbdHiprecConvolveParam params[] = {
244     make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
245     make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
246     make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
247     make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
248     make_tuple(32, 8, 10000, 12, filter),
249   };
250   return ::testing::ValuesIn(params);
251 }
252 
253 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() = default;
SetUp()254 void AV1HighbdHiprecConvolveTest::SetUp() {
255   rnd_.Reset(ACMRandom::DeterministicSeed());
256 }
257 
RunCheckOutput(highbd_hiprec_convolve_func test_impl)258 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
259     highbd_hiprec_convolve_func test_impl) {
260   const int w = 128, h = 128;
261   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
262   const int num_iters = GET_PARAM(2);
263   const int bd = GET_PARAM(3);
264   int i, j;
265   const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
266 
267   std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
268   ASSERT_NE(input, nullptr);
269 
270   // The AVX2 convolve functions always write rows with widths that are
271   // multiples of 16. So to avoid a buffer overflow, we may need to pad
272   // rows to a multiple of 16.
273   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
274   std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
275   ASSERT_NE(output, nullptr);
276   std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
277   ASSERT_NE(output2, nullptr);
278 
279   // Generate random filter kernels
280   DECLARE_ALIGNED(16, InterpKernel, hkernel);
281   DECLARE_ALIGNED(16, InterpKernel, vkernel);
282 
283   for (i = 0; i < h; ++i)
284     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
285 
286   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
287   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
288   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
289   for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
290     generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
291     for (i = 0; i < num_iters; ++i) {
292       // Choose random locations within the source block
293       int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
294       int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
295       av1_highbd_wiener_convolve_add_src_c(
296           input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel,
297           16, vkernel, 16, out_w, out_h, &conv_params, bd);
298       test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
299                 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
300 
301       for (j = 0; j < out_w * out_h; ++j)
302         ASSERT_EQ(output[j], output2[j])
303             << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
304             << (j / out_w) << ") on iteration " << i;
305     }
306   }
307 }
308 
RunSpeedTest(highbd_hiprec_convolve_func test_impl)309 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
310     highbd_hiprec_convolve_func test_impl) {
311   const int w = 128, h = 128;
312   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
313   const int num_iters = GET_PARAM(2) / 500;
314   const int bd = GET_PARAM(3);
315   int i, j, k;
316   const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
317 
318   std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
319   ASSERT_NE(input, nullptr);
320 
321   // The AVX2 convolve functions always write rows with widths that are
322   // multiples of 16. So to avoid a buffer overflow, we may need to pad
323   // rows to a multiple of 16.
324   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
325   std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
326   ASSERT_NE(output, nullptr);
327   std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
328   ASSERT_NE(output2, nullptr);
329 
330   // Generate random filter kernels
331   DECLARE_ALIGNED(16, InterpKernel, hkernel);
332   DECLARE_ALIGNED(16, InterpKernel, vkernel);
333 
334   generate_kernels(&rnd_, hkernel, vkernel);
335 
336   for (i = 0; i < h; ++i)
337     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
338 
339   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
340   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
341   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
342 
343   aom_usec_timer ref_timer;
344   aom_usec_timer_start(&ref_timer);
345   for (i = 0; i < num_iters; ++i) {
346     for (j = 3; j < h - out_h - 4; j++) {
347       for (k = 3; k < w - out_w - 4; k++) {
348         av1_highbd_wiener_convolve_add_src_c(
349             input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
350             16, out_w, out_h, &conv_params, bd);
351       }
352     }
353   }
354   aom_usec_timer_mark(&ref_timer);
355   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
356 
357   aom_usec_timer tst_timer;
358   aom_usec_timer_start(&tst_timer);
359   for (i = 0; i < num_iters; ++i) {
360     for (j = 3; j < h - out_h - 4; j++) {
361       for (k = 3; k < w - out_w - 4; k++) {
362         test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
363                   vkernel, 16, out_w, out_h, &conv_params, bd);
364       }
365     }
366   }
367   aom_usec_timer_mark(&tst_timer);
368   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
369 
370   std::cout << "[          ] C time = " << ref_time / 1000
371             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
372 
373   EXPECT_GT(ref_time, tst_time)
374       << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
375       << "C time: " << ref_time << " us\n"
376       << "SIMD time: " << tst_time << " us\n";
377 }
378 }  // namespace AV1HighbdHiprecConvolve
379 #endif  // CONFIG_AV1_HIGHBITDEPTH
380 }  // namespace libaom_test
381