1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker *
4*77c1e3ccSAndroid Build Coastguard Worker * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker */
11*77c1e3ccSAndroid Build Coastguard Worker
12*77c1e3ccSAndroid Build Coastguard Worker #include "test/hiprec_convolve_test_util.h"
13*77c1e3ccSAndroid Build Coastguard Worker
14*77c1e3ccSAndroid Build Coastguard Worker #include <memory>
15*77c1e3ccSAndroid Build Coastguard Worker #include <new>
16*77c1e3ccSAndroid Build Coastguard Worker
17*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/restoration.h"
18*77c1e3ccSAndroid Build Coastguard Worker
19*77c1e3ccSAndroid Build Coastguard Worker using std::make_tuple;
20*77c1e3ccSAndroid Build Coastguard Worker using std::tuple;
21*77c1e3ccSAndroid Build Coastguard Worker
22*77c1e3ccSAndroid Build Coastguard Worker namespace libaom_test {
23*77c1e3ccSAndroid Build Coastguard Worker
24*77c1e3ccSAndroid Build Coastguard Worker // Generate a random pair of filter kernels, using the ranges
25*77c1e3ccSAndroid Build Coastguard Worker // of possible values from the loop-restoration experiment
generate_kernels(ACMRandom * rnd,InterpKernel hkernel,InterpKernel vkernel,int kernel_type=2)26*77c1e3ccSAndroid Build Coastguard Worker static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
27*77c1e3ccSAndroid Build Coastguard Worker InterpKernel vkernel, int kernel_type = 2) {
28*77c1e3ccSAndroid Build Coastguard Worker if (kernel_type == 0) {
29*77c1e3ccSAndroid Build Coastguard Worker // Low possible values for filter coefficients, 7-tap kernel
30*77c1e3ccSAndroid Build Coastguard Worker hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV;
31*77c1e3ccSAndroid Build Coastguard Worker hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
32*77c1e3ccSAndroid Build Coastguard Worker hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
33*77c1e3ccSAndroid Build Coastguard Worker hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
34*77c1e3ccSAndroid Build Coastguard Worker hkernel[7] = vkernel[7] = 0;
35*77c1e3ccSAndroid Build Coastguard Worker } else if (kernel_type == 1) {
36*77c1e3ccSAndroid Build Coastguard Worker // Max possible values for filter coefficients, 7-tap kernel
37*77c1e3ccSAndroid Build Coastguard Worker hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV;
38*77c1e3ccSAndroid Build Coastguard Worker hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
39*77c1e3ccSAndroid Build Coastguard Worker hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
40*77c1e3ccSAndroid Build Coastguard Worker hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
41*77c1e3ccSAndroid Build Coastguard Worker hkernel[7] = vkernel[7] = 0;
42*77c1e3ccSAndroid Build Coastguard Worker } else if (kernel_type == 2) {
43*77c1e3ccSAndroid Build Coastguard Worker // Randomly generated values for filter coefficients, 7-tap kernel
44*77c1e3ccSAndroid Build Coastguard Worker hkernel[0] = hkernel[6] =
45*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP0_MINV +
46*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
47*77c1e3ccSAndroid Build Coastguard Worker hkernel[1] = hkernel[5] =
48*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP1_MINV +
49*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
50*77c1e3ccSAndroid Build Coastguard Worker hkernel[2] = hkernel[4] =
51*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP2_MINV +
52*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
53*77c1e3ccSAndroid Build Coastguard Worker hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
54*77c1e3ccSAndroid Build Coastguard Worker hkernel[7] = 0;
55*77c1e3ccSAndroid Build Coastguard Worker
56*77c1e3ccSAndroid Build Coastguard Worker vkernel[0] = vkernel[6] =
57*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP0_MINV +
58*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV);
59*77c1e3ccSAndroid Build Coastguard Worker vkernel[1] = vkernel[5] =
60*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP1_MINV +
61*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
62*77c1e3ccSAndroid Build Coastguard Worker vkernel[2] = vkernel[4] =
63*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP2_MINV +
64*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
65*77c1e3ccSAndroid Build Coastguard Worker vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
66*77c1e3ccSAndroid Build Coastguard Worker vkernel[7] = 0;
67*77c1e3ccSAndroid Build Coastguard Worker } else if (kernel_type == 3) {
68*77c1e3ccSAndroid Build Coastguard Worker // Low possible values for filter coefficients, 5-tap kernel
69*77c1e3ccSAndroid Build Coastguard Worker hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
70*77c1e3ccSAndroid Build Coastguard Worker hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
71*77c1e3ccSAndroid Build Coastguard Worker hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
72*77c1e3ccSAndroid Build Coastguard Worker hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
73*77c1e3ccSAndroid Build Coastguard Worker hkernel[7] = vkernel[7] = 0;
74*77c1e3ccSAndroid Build Coastguard Worker } else if (kernel_type == 4) {
75*77c1e3ccSAndroid Build Coastguard Worker // Max possible values for filter coefficients, 5-tap kernel
76*77c1e3ccSAndroid Build Coastguard Worker hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
77*77c1e3ccSAndroid Build Coastguard Worker hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
78*77c1e3ccSAndroid Build Coastguard Worker hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
79*77c1e3ccSAndroid Build Coastguard Worker hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
80*77c1e3ccSAndroid Build Coastguard Worker hkernel[7] = vkernel[7] = 0;
81*77c1e3ccSAndroid Build Coastguard Worker } else {
82*77c1e3ccSAndroid Build Coastguard Worker // Randomly generated values for filter coefficients, 5-tap kernel
83*77c1e3ccSAndroid Build Coastguard Worker hkernel[0] = hkernel[6] = 0;
84*77c1e3ccSAndroid Build Coastguard Worker hkernel[1] = hkernel[5] =
85*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP1_MINV +
86*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
87*77c1e3ccSAndroid Build Coastguard Worker hkernel[2] = hkernel[4] =
88*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP2_MINV +
89*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
90*77c1e3ccSAndroid Build Coastguard Worker hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
91*77c1e3ccSAndroid Build Coastguard Worker hkernel[7] = 0;
92*77c1e3ccSAndroid Build Coastguard Worker
93*77c1e3ccSAndroid Build Coastguard Worker vkernel[0] = vkernel[6] = 0;
94*77c1e3ccSAndroid Build Coastguard Worker vkernel[1] = vkernel[5] =
95*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP1_MINV +
96*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
97*77c1e3ccSAndroid Build Coastguard Worker vkernel[2] = vkernel[4] =
98*77c1e3ccSAndroid Build Coastguard Worker WIENER_FILT_TAP2_MINV +
99*77c1e3ccSAndroid Build Coastguard Worker rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
100*77c1e3ccSAndroid Build Coastguard Worker vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
101*77c1e3ccSAndroid Build Coastguard Worker vkernel[7] = 0;
102*77c1e3ccSAndroid Build Coastguard Worker }
103*77c1e3ccSAndroid Build Coastguard Worker }
104*77c1e3ccSAndroid Build Coastguard Worker
105*77c1e3ccSAndroid Build Coastguard Worker namespace AV1HiprecConvolve {
106*77c1e3ccSAndroid Build Coastguard Worker
BuildParams(hiprec_convolve_func filter)107*77c1e3ccSAndroid Build Coastguard Worker ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
108*77c1e3ccSAndroid Build Coastguard Worker hiprec_convolve_func filter) {
109*77c1e3ccSAndroid Build Coastguard Worker const HiprecConvolveParam params[] = {
110*77c1e3ccSAndroid Build Coastguard Worker make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter),
111*77c1e3ccSAndroid Build Coastguard Worker make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter),
112*77c1e3ccSAndroid Build Coastguard Worker make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter),
113*77c1e3ccSAndroid Build Coastguard Worker make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
114*77c1e3ccSAndroid Build Coastguard Worker make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
115*77c1e3ccSAndroid Build Coastguard Worker make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter),
116*77c1e3ccSAndroid Build Coastguard Worker make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
117*77c1e3ccSAndroid Build Coastguard Worker };
118*77c1e3ccSAndroid Build Coastguard Worker return ::testing::ValuesIn(params);
119*77c1e3ccSAndroid Build Coastguard Worker }
120*77c1e3ccSAndroid Build Coastguard Worker
121*77c1e3ccSAndroid Build Coastguard Worker AV1HiprecConvolveTest::~AV1HiprecConvolveTest() = default;
SetUp()122*77c1e3ccSAndroid Build Coastguard Worker void AV1HiprecConvolveTest::SetUp() {
123*77c1e3ccSAndroid Build Coastguard Worker rnd_.Reset(ACMRandom::DeterministicSeed());
124*77c1e3ccSAndroid Build Coastguard Worker }
125*77c1e3ccSAndroid Build Coastguard Worker
RunCheckOutput(hiprec_convolve_func test_impl)126*77c1e3ccSAndroid Build Coastguard Worker void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
127*77c1e3ccSAndroid Build Coastguard Worker const int w = 128, h = 128;
128*77c1e3ccSAndroid Build Coastguard Worker const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
129*77c1e3ccSAndroid Build Coastguard Worker const int num_iters = GET_PARAM(2);
130*77c1e3ccSAndroid Build Coastguard Worker int i, j, k, m;
131*77c1e3ccSAndroid Build Coastguard Worker const WienerConvolveParams conv_params = get_conv_params_wiener(8);
132*77c1e3ccSAndroid Build Coastguard Worker
133*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
134*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(input_, nullptr);
135*77c1e3ccSAndroid Build Coastguard Worker uint8_t *input = input_.get();
136*77c1e3ccSAndroid Build Coastguard Worker
137*77c1e3ccSAndroid Build Coastguard Worker // The AVX2 convolve functions always write rows with widths that are
138*77c1e3ccSAndroid Build Coastguard Worker // multiples of 16. So to avoid a buffer overflow, we may need to pad
139*77c1e3ccSAndroid Build Coastguard Worker // rows to a multiple of 16.
140*77c1e3ccSAndroid Build Coastguard Worker int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
141*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
142*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output, nullptr);
143*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
144*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output2, nullptr);
145*77c1e3ccSAndroid Build Coastguard Worker
146*77c1e3ccSAndroid Build Coastguard Worker // Generate random filter kernels
147*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, hkernel);
148*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, vkernel);
149*77c1e3ccSAndroid Build Coastguard Worker
150*77c1e3ccSAndroid Build Coastguard Worker for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
151*77c1e3ccSAndroid Build Coastguard Worker generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
152*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < num_iters; ++i) {
153*77c1e3ccSAndroid Build Coastguard Worker for (k = 0; k < h; ++k)
154*77c1e3ccSAndroid Build Coastguard Worker for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8();
155*77c1e3ccSAndroid Build Coastguard Worker // Choose random locations within the source block
156*77c1e3ccSAndroid Build Coastguard Worker int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
157*77c1e3ccSAndroid Build Coastguard Worker int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
158*77c1e3ccSAndroid Build Coastguard Worker av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w,
159*77c1e3ccSAndroid Build Coastguard Worker output.get(), out_w, hkernel, 16, vkernel,
160*77c1e3ccSAndroid Build Coastguard Worker 16, out_w, out_h, &conv_params);
161*77c1e3ccSAndroid Build Coastguard Worker test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w,
162*77c1e3ccSAndroid Build Coastguard Worker hkernel, 16, vkernel, 16, out_w, out_h, &conv_params);
163*77c1e3ccSAndroid Build Coastguard Worker
164*77c1e3ccSAndroid Build Coastguard Worker for (j = 0; j < out_w * out_h; ++j)
165*77c1e3ccSAndroid Build Coastguard Worker ASSERT_EQ(output[j], output2[j])
166*77c1e3ccSAndroid Build Coastguard Worker << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
167*77c1e3ccSAndroid Build Coastguard Worker << (j / out_w) << ") on iteration " << i;
168*77c1e3ccSAndroid Build Coastguard Worker }
169*77c1e3ccSAndroid Build Coastguard Worker }
170*77c1e3ccSAndroid Build Coastguard Worker }
171*77c1e3ccSAndroid Build Coastguard Worker
RunSpeedTest(hiprec_convolve_func test_impl)172*77c1e3ccSAndroid Build Coastguard Worker void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
173*77c1e3ccSAndroid Build Coastguard Worker const int w = 128, h = 128;
174*77c1e3ccSAndroid Build Coastguard Worker const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
175*77c1e3ccSAndroid Build Coastguard Worker const int num_iters = GET_PARAM(2) / 500;
176*77c1e3ccSAndroid Build Coastguard Worker int i, j, k;
177*77c1e3ccSAndroid Build Coastguard Worker const WienerConvolveParams conv_params = get_conv_params_wiener(8);
178*77c1e3ccSAndroid Build Coastguard Worker
179*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
180*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(input_, nullptr);
181*77c1e3ccSAndroid Build Coastguard Worker uint8_t *input = input_.get();
182*77c1e3ccSAndroid Build Coastguard Worker
183*77c1e3ccSAndroid Build Coastguard Worker // The AVX2 convolve functions always write rows with widths that are
184*77c1e3ccSAndroid Build Coastguard Worker // multiples of 16. So to avoid a buffer overflow, we may need to pad
185*77c1e3ccSAndroid Build Coastguard Worker // rows to a multiple of 16.
186*77c1e3ccSAndroid Build Coastguard Worker int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
187*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
188*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output, nullptr);
189*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
190*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output2, nullptr);
191*77c1e3ccSAndroid Build Coastguard Worker
192*77c1e3ccSAndroid Build Coastguard Worker // Generate random filter kernels
193*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, hkernel);
194*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, vkernel);
195*77c1e3ccSAndroid Build Coastguard Worker
196*77c1e3ccSAndroid Build Coastguard Worker generate_kernels(&rnd_, hkernel, vkernel);
197*77c1e3ccSAndroid Build Coastguard Worker
198*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < h; ++i)
199*77c1e3ccSAndroid Build Coastguard Worker for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
200*77c1e3ccSAndroid Build Coastguard Worker
201*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer ref_timer;
202*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_start(&ref_timer);
203*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < num_iters; ++i) {
204*77c1e3ccSAndroid Build Coastguard Worker for (j = 3; j < h - out_h - 4; j++) {
205*77c1e3ccSAndroid Build Coastguard Worker for (k = 3; k < w - out_w - 4; k++) {
206*77c1e3ccSAndroid Build Coastguard Worker av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w,
207*77c1e3ccSAndroid Build Coastguard Worker hkernel, 16, vkernel, 16, out_w, out_h,
208*77c1e3ccSAndroid Build Coastguard Worker &conv_params);
209*77c1e3ccSAndroid Build Coastguard Worker }
210*77c1e3ccSAndroid Build Coastguard Worker }
211*77c1e3ccSAndroid Build Coastguard Worker }
212*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_mark(&ref_timer);
213*77c1e3ccSAndroid Build Coastguard Worker const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
214*77c1e3ccSAndroid Build Coastguard Worker
215*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer tst_timer;
216*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_start(&tst_timer);
217*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < num_iters; ++i) {
218*77c1e3ccSAndroid Build Coastguard Worker for (j = 3; j < h - out_h - 4; j++) {
219*77c1e3ccSAndroid Build Coastguard Worker for (k = 3; k < w - out_w - 4; k++) {
220*77c1e3ccSAndroid Build Coastguard Worker test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16,
221*77c1e3ccSAndroid Build Coastguard Worker vkernel, 16, out_w, out_h, &conv_params);
222*77c1e3ccSAndroid Build Coastguard Worker }
223*77c1e3ccSAndroid Build Coastguard Worker }
224*77c1e3ccSAndroid Build Coastguard Worker }
225*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_mark(&tst_timer);
226*77c1e3ccSAndroid Build Coastguard Worker const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
227*77c1e3ccSAndroid Build Coastguard Worker
228*77c1e3ccSAndroid Build Coastguard Worker std::cout << "[ ] C time = " << ref_time / 1000
229*77c1e3ccSAndroid Build Coastguard Worker << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
230*77c1e3ccSAndroid Build Coastguard Worker
231*77c1e3ccSAndroid Build Coastguard Worker EXPECT_GT(ref_time, tst_time)
232*77c1e3ccSAndroid Build Coastguard Worker << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
233*77c1e3ccSAndroid Build Coastguard Worker << "C time: " << ref_time << " us\n"
234*77c1e3ccSAndroid Build Coastguard Worker << "SIMD time: " << tst_time << " us\n";
235*77c1e3ccSAndroid Build Coastguard Worker }
236*77c1e3ccSAndroid Build Coastguard Worker } // namespace AV1HiprecConvolve
237*77c1e3ccSAndroid Build Coastguard Worker
238*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
239*77c1e3ccSAndroid Build Coastguard Worker namespace AV1HighbdHiprecConvolve {
240*77c1e3ccSAndroid Build Coastguard Worker
BuildParams(highbd_hiprec_convolve_func filter)241*77c1e3ccSAndroid Build Coastguard Worker ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
242*77c1e3ccSAndroid Build Coastguard Worker highbd_hiprec_convolve_func filter) {
243*77c1e3ccSAndroid Build Coastguard Worker const HighbdHiprecConvolveParam params[] = {
244*77c1e3ccSAndroid Build Coastguard Worker make_tuple(8, 8, 50000, 8, filter), make_tuple(64, 64, 1000, 8, filter),
245*77c1e3ccSAndroid Build Coastguard Worker make_tuple(32, 8, 10000, 8, filter), make_tuple(8, 8, 50000, 10, filter),
246*77c1e3ccSAndroid Build Coastguard Worker make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
247*77c1e3ccSAndroid Build Coastguard Worker make_tuple(8, 8, 50000, 12, filter), make_tuple(64, 64, 1000, 12, filter),
248*77c1e3ccSAndroid Build Coastguard Worker make_tuple(32, 8, 10000, 12, filter),
249*77c1e3ccSAndroid Build Coastguard Worker };
250*77c1e3ccSAndroid Build Coastguard Worker return ::testing::ValuesIn(params);
251*77c1e3ccSAndroid Build Coastguard Worker }
252*77c1e3ccSAndroid Build Coastguard Worker
253*77c1e3ccSAndroid Build Coastguard Worker AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() = default;
SetUp()254*77c1e3ccSAndroid Build Coastguard Worker void AV1HighbdHiprecConvolveTest::SetUp() {
255*77c1e3ccSAndroid Build Coastguard Worker rnd_.Reset(ACMRandom::DeterministicSeed());
256*77c1e3ccSAndroid Build Coastguard Worker }
257*77c1e3ccSAndroid Build Coastguard Worker
RunCheckOutput(highbd_hiprec_convolve_func test_impl)258*77c1e3ccSAndroid Build Coastguard Worker void AV1HighbdHiprecConvolveTest::RunCheckOutput(
259*77c1e3ccSAndroid Build Coastguard Worker highbd_hiprec_convolve_func test_impl) {
260*77c1e3ccSAndroid Build Coastguard Worker const int w = 128, h = 128;
261*77c1e3ccSAndroid Build Coastguard Worker const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
262*77c1e3ccSAndroid Build Coastguard Worker const int num_iters = GET_PARAM(2);
263*77c1e3ccSAndroid Build Coastguard Worker const int bd = GET_PARAM(3);
264*77c1e3ccSAndroid Build Coastguard Worker int i, j;
265*77c1e3ccSAndroid Build Coastguard Worker const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
266*77c1e3ccSAndroid Build Coastguard Worker
267*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
268*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(input, nullptr);
269*77c1e3ccSAndroid Build Coastguard Worker
270*77c1e3ccSAndroid Build Coastguard Worker // The AVX2 convolve functions always write rows with widths that are
271*77c1e3ccSAndroid Build Coastguard Worker // multiples of 16. So to avoid a buffer overflow, we may need to pad
272*77c1e3ccSAndroid Build Coastguard Worker // rows to a multiple of 16.
273*77c1e3ccSAndroid Build Coastguard Worker int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
274*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
275*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output, nullptr);
276*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
277*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output2, nullptr);
278*77c1e3ccSAndroid Build Coastguard Worker
279*77c1e3ccSAndroid Build Coastguard Worker // Generate random filter kernels
280*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, hkernel);
281*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, vkernel);
282*77c1e3ccSAndroid Build Coastguard Worker
283*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < h; ++i)
284*77c1e3ccSAndroid Build Coastguard Worker for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
285*77c1e3ccSAndroid Build Coastguard Worker
286*77c1e3ccSAndroid Build Coastguard Worker uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
287*77c1e3ccSAndroid Build Coastguard Worker uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
288*77c1e3ccSAndroid Build Coastguard Worker uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
289*77c1e3ccSAndroid Build Coastguard Worker for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
290*77c1e3ccSAndroid Build Coastguard Worker generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
291*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < num_iters; ++i) {
292*77c1e3ccSAndroid Build Coastguard Worker // Choose random locations within the source block
293*77c1e3ccSAndroid Build Coastguard Worker int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
294*77c1e3ccSAndroid Build Coastguard Worker int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
295*77c1e3ccSAndroid Build Coastguard Worker av1_highbd_wiener_convolve_add_src_c(
296*77c1e3ccSAndroid Build Coastguard Worker input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel,
297*77c1e3ccSAndroid Build Coastguard Worker 16, vkernel, 16, out_w, out_h, &conv_params, bd);
298*77c1e3ccSAndroid Build Coastguard Worker test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
299*77c1e3ccSAndroid Build Coastguard Worker hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
300*77c1e3ccSAndroid Build Coastguard Worker
301*77c1e3ccSAndroid Build Coastguard Worker for (j = 0; j < out_w * out_h; ++j)
302*77c1e3ccSAndroid Build Coastguard Worker ASSERT_EQ(output[j], output2[j])
303*77c1e3ccSAndroid Build Coastguard Worker << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
304*77c1e3ccSAndroid Build Coastguard Worker << (j / out_w) << ") on iteration " << i;
305*77c1e3ccSAndroid Build Coastguard Worker }
306*77c1e3ccSAndroid Build Coastguard Worker }
307*77c1e3ccSAndroid Build Coastguard Worker }
308*77c1e3ccSAndroid Build Coastguard Worker
RunSpeedTest(highbd_hiprec_convolve_func test_impl)309*77c1e3ccSAndroid Build Coastguard Worker void AV1HighbdHiprecConvolveTest::RunSpeedTest(
310*77c1e3ccSAndroid Build Coastguard Worker highbd_hiprec_convolve_func test_impl) {
311*77c1e3ccSAndroid Build Coastguard Worker const int w = 128, h = 128;
312*77c1e3ccSAndroid Build Coastguard Worker const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
313*77c1e3ccSAndroid Build Coastguard Worker const int num_iters = GET_PARAM(2) / 500;
314*77c1e3ccSAndroid Build Coastguard Worker const int bd = GET_PARAM(3);
315*77c1e3ccSAndroid Build Coastguard Worker int i, j, k;
316*77c1e3ccSAndroid Build Coastguard Worker const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
317*77c1e3ccSAndroid Build Coastguard Worker
318*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
319*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(input, nullptr);
320*77c1e3ccSAndroid Build Coastguard Worker
321*77c1e3ccSAndroid Build Coastguard Worker // The AVX2 convolve functions always write rows with widths that are
322*77c1e3ccSAndroid Build Coastguard Worker // multiples of 16. So to avoid a buffer overflow, we may need to pad
323*77c1e3ccSAndroid Build Coastguard Worker // rows to a multiple of 16.
324*77c1e3ccSAndroid Build Coastguard Worker int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
325*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
326*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output, nullptr);
327*77c1e3ccSAndroid Build Coastguard Worker std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
328*77c1e3ccSAndroid Build Coastguard Worker ASSERT_NE(output2, nullptr);
329*77c1e3ccSAndroid Build Coastguard Worker
330*77c1e3ccSAndroid Build Coastguard Worker // Generate random filter kernels
331*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, hkernel);
332*77c1e3ccSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, InterpKernel, vkernel);
333*77c1e3ccSAndroid Build Coastguard Worker
334*77c1e3ccSAndroid Build Coastguard Worker generate_kernels(&rnd_, hkernel, vkernel);
335*77c1e3ccSAndroid Build Coastguard Worker
336*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < h; ++i)
337*77c1e3ccSAndroid Build Coastguard Worker for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
338*77c1e3ccSAndroid Build Coastguard Worker
339*77c1e3ccSAndroid Build Coastguard Worker uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
340*77c1e3ccSAndroid Build Coastguard Worker uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
341*77c1e3ccSAndroid Build Coastguard Worker uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
342*77c1e3ccSAndroid Build Coastguard Worker
343*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer ref_timer;
344*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_start(&ref_timer);
345*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < num_iters; ++i) {
346*77c1e3ccSAndroid Build Coastguard Worker for (j = 3; j < h - out_h - 4; j++) {
347*77c1e3ccSAndroid Build Coastguard Worker for (k = 3; k < w - out_w - 4; k++) {
348*77c1e3ccSAndroid Build Coastguard Worker av1_highbd_wiener_convolve_add_src_c(
349*77c1e3ccSAndroid Build Coastguard Worker input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
350*77c1e3ccSAndroid Build Coastguard Worker 16, out_w, out_h, &conv_params, bd);
351*77c1e3ccSAndroid Build Coastguard Worker }
352*77c1e3ccSAndroid Build Coastguard Worker }
353*77c1e3ccSAndroid Build Coastguard Worker }
354*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_mark(&ref_timer);
355*77c1e3ccSAndroid Build Coastguard Worker const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
356*77c1e3ccSAndroid Build Coastguard Worker
357*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer tst_timer;
358*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_start(&tst_timer);
359*77c1e3ccSAndroid Build Coastguard Worker for (i = 0; i < num_iters; ++i) {
360*77c1e3ccSAndroid Build Coastguard Worker for (j = 3; j < h - out_h - 4; j++) {
361*77c1e3ccSAndroid Build Coastguard Worker for (k = 3; k < w - out_w - 4; k++) {
362*77c1e3ccSAndroid Build Coastguard Worker test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
363*77c1e3ccSAndroid Build Coastguard Worker vkernel, 16, out_w, out_h, &conv_params, bd);
364*77c1e3ccSAndroid Build Coastguard Worker }
365*77c1e3ccSAndroid Build Coastguard Worker }
366*77c1e3ccSAndroid Build Coastguard Worker }
367*77c1e3ccSAndroid Build Coastguard Worker aom_usec_timer_mark(&tst_timer);
368*77c1e3ccSAndroid Build Coastguard Worker const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
369*77c1e3ccSAndroid Build Coastguard Worker
370*77c1e3ccSAndroid Build Coastguard Worker std::cout << "[ ] C time = " << ref_time / 1000
371*77c1e3ccSAndroid Build Coastguard Worker << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
372*77c1e3ccSAndroid Build Coastguard Worker
373*77c1e3ccSAndroid Build Coastguard Worker EXPECT_GT(ref_time, tst_time)
374*77c1e3ccSAndroid Build Coastguard Worker << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
375*77c1e3ccSAndroid Build Coastguard Worker << "C time: " << ref_time << " us\n"
376*77c1e3ccSAndroid Build Coastguard Worker << "SIMD time: " << tst_time << " us\n";
377*77c1e3ccSAndroid Build Coastguard Worker }
378*77c1e3ccSAndroid Build Coastguard Worker } // namespace AV1HighbdHiprecConvolve
379*77c1e3ccSAndroid Build Coastguard Worker #endif // CONFIG_AV1_HIGHBITDEPTH
380*77c1e3ccSAndroid Build Coastguard Worker } // namespace libaom_test
381