1*4e366538SXin Li /*
2*4e366538SXin Li * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3*4e366538SXin Li *
4*4e366538SXin Li * Use of this source code is governed by a BSD-style license
5*4e366538SXin Li * that can be found in the LICENSE file in the root of the source
6*4e366538SXin Li * tree. An additional intellectual property rights grant can be found
7*4e366538SXin Li * in the file PATENTS. All contributing project authors may
8*4e366538SXin Li * be found in the AUTHORS file in the root of the source tree.
9*4e366538SXin Li */
10*4e366538SXin Li
11*4e366538SXin Li #include <stdlib.h>
12*4e366538SXin Li #include <time.h>
13*4e366538SXin Li
14*4e366538SXin Li #include "../unit_test/unit_test.h"
15*4e366538SXin Li #include "libyuv/cpu_id.h"
16*4e366538SXin Li #include "libyuv/scale.h"
17*4e366538SXin Li
18*4e366538SXin Li #ifdef ENABLE_ROW_TESTS
19*4e366538SXin Li #include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
20*4e366538SXin Li #endif
21*4e366538SXin Li
22*4e366538SXin Li #define STRINGIZE(line) #line
23*4e366538SXin Li #define FILELINESTR(file, line) file ":" STRINGIZE(line)
24*4e366538SXin Li
25*4e366538SXin Li #if defined(__riscv) && !defined(__clang__)
26*4e366538SXin Li #define DISABLE_SLOW_TESTS
27*4e366538SXin Li #undef ENABLE_FULL_TESTS
28*4e366538SXin Li #endif
29*4e366538SXin Li
30*4e366538SXin Li #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
31*4e366538SXin Li // SLOW TESTS are those that are unoptimized C code.
32*4e366538SXin Li // FULL TESTS are optimized but test many variations of the same code.
33*4e366538SXin Li #define ENABLE_FULL_TESTS
34*4e366538SXin Li #endif
35*4e366538SXin Li
36*4e366538SXin Li namespace libyuv {
37*4e366538SXin Li
38*4e366538SXin Li // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I420TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)39*4e366538SXin Li static int I420TestFilter(int src_width,
40*4e366538SXin Li int src_height,
41*4e366538SXin Li int dst_width,
42*4e366538SXin Li int dst_height,
43*4e366538SXin Li FilterMode f,
44*4e366538SXin Li int benchmark_iterations,
45*4e366538SXin Li int disable_cpu_flags,
46*4e366538SXin Li int benchmark_cpu_info) {
47*4e366538SXin Li if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
48*4e366538SXin Li return 0;
49*4e366538SXin Li }
50*4e366538SXin Li
51*4e366538SXin Li int i, j;
52*4e366538SXin Li int src_width_uv = (Abs(src_width) + 1) >> 1;
53*4e366538SXin Li int src_height_uv = (Abs(src_height) + 1) >> 1;
54*4e366538SXin Li
55*4e366538SXin Li int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
56*4e366538SXin Li int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
57*4e366538SXin Li
58*4e366538SXin Li int src_stride_y = Abs(src_width);
59*4e366538SXin Li int src_stride_uv = src_width_uv;
60*4e366538SXin Li
61*4e366538SXin Li align_buffer_page_end(src_y, src_y_plane_size);
62*4e366538SXin Li align_buffer_page_end(src_u, src_uv_plane_size);
63*4e366538SXin Li align_buffer_page_end(src_v, src_uv_plane_size);
64*4e366538SXin Li if (!src_y || !src_u || !src_v) {
65*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
66*4e366538SXin Li return 0;
67*4e366538SXin Li }
68*4e366538SXin Li MemRandomize(src_y, src_y_plane_size);
69*4e366538SXin Li MemRandomize(src_u, src_uv_plane_size);
70*4e366538SXin Li MemRandomize(src_v, src_uv_plane_size);
71*4e366538SXin Li
72*4e366538SXin Li int dst_width_uv = (dst_width + 1) >> 1;
73*4e366538SXin Li int dst_height_uv = (dst_height + 1) >> 1;
74*4e366538SXin Li
75*4e366538SXin Li int64_t dst_y_plane_size = (dst_width) * (dst_height);
76*4e366538SXin Li int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
77*4e366538SXin Li
78*4e366538SXin Li int dst_stride_y = dst_width;
79*4e366538SXin Li int dst_stride_uv = dst_width_uv;
80*4e366538SXin Li
81*4e366538SXin Li align_buffer_page_end(dst_y_c, dst_y_plane_size);
82*4e366538SXin Li align_buffer_page_end(dst_u_c, dst_uv_plane_size);
83*4e366538SXin Li align_buffer_page_end(dst_v_c, dst_uv_plane_size);
84*4e366538SXin Li align_buffer_page_end(dst_y_opt, dst_y_plane_size);
85*4e366538SXin Li align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
86*4e366538SXin Li align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
87*4e366538SXin Li if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
88*4e366538SXin Li !dst_v_opt) {
89*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
90*4e366538SXin Li return 0;
91*4e366538SXin Li }
92*4e366538SXin Li
93*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
94*4e366538SXin Li double c_time = get_time();
95*4e366538SXin Li I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
96*4e366538SXin Li src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
97*4e366538SXin Li dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
98*4e366538SXin Li c_time = (get_time() - c_time);
99*4e366538SXin Li
100*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
101*4e366538SXin Li double opt_time = get_time();
102*4e366538SXin Li for (i = 0; i < benchmark_iterations; ++i) {
103*4e366538SXin Li I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
104*4e366538SXin Li src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
105*4e366538SXin Li dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
106*4e366538SXin Li f);
107*4e366538SXin Li }
108*4e366538SXin Li opt_time = (get_time() - opt_time) / benchmark_iterations;
109*4e366538SXin Li // Report performance of C vs OPT.
110*4e366538SXin Li printf("filter %d - %8d us C - %8d us OPT\n", f,
111*4e366538SXin Li static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
112*4e366538SXin Li
113*4e366538SXin Li // C version may be a little off from the optimized. Order of
114*4e366538SXin Li // operations may introduce rounding somewhere. So do a difference
115*4e366538SXin Li // of the buffers and look to see that the max difference is not
116*4e366538SXin Li // over 3.
117*4e366538SXin Li int max_diff = 0;
118*4e366538SXin Li for (i = 0; i < (dst_height); ++i) {
119*4e366538SXin Li for (j = 0; j < (dst_width); ++j) {
120*4e366538SXin Li int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
121*4e366538SXin Li dst_y_opt[(i * dst_stride_y) + j]);
122*4e366538SXin Li if (abs_diff > max_diff) {
123*4e366538SXin Li max_diff = abs_diff;
124*4e366538SXin Li }
125*4e366538SXin Li }
126*4e366538SXin Li }
127*4e366538SXin Li
128*4e366538SXin Li for (i = 0; i < (dst_height_uv); ++i) {
129*4e366538SXin Li for (j = 0; j < (dst_width_uv); ++j) {
130*4e366538SXin Li int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
131*4e366538SXin Li dst_u_opt[(i * dst_stride_uv) + j]);
132*4e366538SXin Li if (abs_diff > max_diff) {
133*4e366538SXin Li max_diff = abs_diff;
134*4e366538SXin Li }
135*4e366538SXin Li abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
136*4e366538SXin Li dst_v_opt[(i * dst_stride_uv) + j]);
137*4e366538SXin Li if (abs_diff > max_diff) {
138*4e366538SXin Li max_diff = abs_diff;
139*4e366538SXin Li }
140*4e366538SXin Li }
141*4e366538SXin Li }
142*4e366538SXin Li
143*4e366538SXin Li free_aligned_buffer_page_end(dst_y_c);
144*4e366538SXin Li free_aligned_buffer_page_end(dst_u_c);
145*4e366538SXin Li free_aligned_buffer_page_end(dst_v_c);
146*4e366538SXin Li free_aligned_buffer_page_end(dst_y_opt);
147*4e366538SXin Li free_aligned_buffer_page_end(dst_u_opt);
148*4e366538SXin Li free_aligned_buffer_page_end(dst_v_opt);
149*4e366538SXin Li free_aligned_buffer_page_end(src_y);
150*4e366538SXin Li free_aligned_buffer_page_end(src_u);
151*4e366538SXin Li free_aligned_buffer_page_end(src_v);
152*4e366538SXin Li
153*4e366538SXin Li return max_diff;
154*4e366538SXin Li }
155*4e366538SXin Li
156*4e366538SXin Li // Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
157*4e366538SXin Li // 0 = exact.
I420TestFilter_12(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)158*4e366538SXin Li static int I420TestFilter_12(int src_width,
159*4e366538SXin Li int src_height,
160*4e366538SXin Li int dst_width,
161*4e366538SXin Li int dst_height,
162*4e366538SXin Li FilterMode f,
163*4e366538SXin Li int benchmark_iterations,
164*4e366538SXin Li int disable_cpu_flags,
165*4e366538SXin Li int benchmark_cpu_info) {
166*4e366538SXin Li if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
167*4e366538SXin Li return 0;
168*4e366538SXin Li }
169*4e366538SXin Li
170*4e366538SXin Li int i;
171*4e366538SXin Li int src_width_uv = (Abs(src_width) + 1) >> 1;
172*4e366538SXin Li int src_height_uv = (Abs(src_height) + 1) >> 1;
173*4e366538SXin Li
174*4e366538SXin Li int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
175*4e366538SXin Li int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
176*4e366538SXin Li
177*4e366538SXin Li int src_stride_y = Abs(src_width);
178*4e366538SXin Li int src_stride_uv = src_width_uv;
179*4e366538SXin Li
180*4e366538SXin Li align_buffer_page_end(src_y, src_y_plane_size);
181*4e366538SXin Li align_buffer_page_end(src_u, src_uv_plane_size);
182*4e366538SXin Li align_buffer_page_end(src_v, src_uv_plane_size);
183*4e366538SXin Li align_buffer_page_end(src_y_12, src_y_plane_size * 2);
184*4e366538SXin Li align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
185*4e366538SXin Li align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
186*4e366538SXin Li if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
187*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
188*4e366538SXin Li return 0;
189*4e366538SXin Li }
190*4e366538SXin Li uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
191*4e366538SXin Li uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
192*4e366538SXin Li uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
193*4e366538SXin Li
194*4e366538SXin Li MemRandomize(src_y, src_y_plane_size);
195*4e366538SXin Li MemRandomize(src_u, src_uv_plane_size);
196*4e366538SXin Li MemRandomize(src_v, src_uv_plane_size);
197*4e366538SXin Li
198*4e366538SXin Li for (i = 0; i < src_y_plane_size; ++i) {
199*4e366538SXin Li p_src_y_12[i] = src_y[i];
200*4e366538SXin Li }
201*4e366538SXin Li for (i = 0; i < src_uv_plane_size; ++i) {
202*4e366538SXin Li p_src_u_12[i] = src_u[i];
203*4e366538SXin Li p_src_v_12[i] = src_v[i];
204*4e366538SXin Li }
205*4e366538SXin Li
206*4e366538SXin Li int dst_width_uv = (dst_width + 1) >> 1;
207*4e366538SXin Li int dst_height_uv = (dst_height + 1) >> 1;
208*4e366538SXin Li
209*4e366538SXin Li int dst_y_plane_size = (dst_width) * (dst_height);
210*4e366538SXin Li int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
211*4e366538SXin Li
212*4e366538SXin Li int dst_stride_y = dst_width;
213*4e366538SXin Li int dst_stride_uv = dst_width_uv;
214*4e366538SXin Li
215*4e366538SXin Li align_buffer_page_end(dst_y_8, dst_y_plane_size);
216*4e366538SXin Li align_buffer_page_end(dst_u_8, dst_uv_plane_size);
217*4e366538SXin Li align_buffer_page_end(dst_v_8, dst_uv_plane_size);
218*4e366538SXin Li align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
219*4e366538SXin Li align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
220*4e366538SXin Li align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
221*4e366538SXin Li
222*4e366538SXin Li uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
223*4e366538SXin Li uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
224*4e366538SXin Li uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
225*4e366538SXin Li
226*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
227*4e366538SXin Li I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
228*4e366538SXin Li src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
229*4e366538SXin Li dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
230*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
231*4e366538SXin Li for (i = 0; i < benchmark_iterations; ++i) {
232*4e366538SXin Li I420Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
233*4e366538SXin Li p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
234*4e366538SXin Li dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
235*4e366538SXin Li dst_stride_uv, dst_width, dst_height, f);
236*4e366538SXin Li }
237*4e366538SXin Li
238*4e366538SXin Li // Expect an exact match.
239*4e366538SXin Li int max_diff = 0;
240*4e366538SXin Li for (i = 0; i < dst_y_plane_size; ++i) {
241*4e366538SXin Li int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
242*4e366538SXin Li if (abs_diff > max_diff) {
243*4e366538SXin Li max_diff = abs_diff;
244*4e366538SXin Li }
245*4e366538SXin Li }
246*4e366538SXin Li for (i = 0; i < dst_uv_plane_size; ++i) {
247*4e366538SXin Li int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
248*4e366538SXin Li if (abs_diff > max_diff) {
249*4e366538SXin Li max_diff = abs_diff;
250*4e366538SXin Li }
251*4e366538SXin Li abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
252*4e366538SXin Li if (abs_diff > max_diff) {
253*4e366538SXin Li max_diff = abs_diff;
254*4e366538SXin Li }
255*4e366538SXin Li }
256*4e366538SXin Li
257*4e366538SXin Li free_aligned_buffer_page_end(dst_y_8);
258*4e366538SXin Li free_aligned_buffer_page_end(dst_u_8);
259*4e366538SXin Li free_aligned_buffer_page_end(dst_v_8);
260*4e366538SXin Li free_aligned_buffer_page_end(dst_y_12);
261*4e366538SXin Li free_aligned_buffer_page_end(dst_u_12);
262*4e366538SXin Li free_aligned_buffer_page_end(dst_v_12);
263*4e366538SXin Li free_aligned_buffer_page_end(src_y);
264*4e366538SXin Li free_aligned_buffer_page_end(src_u);
265*4e366538SXin Li free_aligned_buffer_page_end(src_v);
266*4e366538SXin Li free_aligned_buffer_page_end(src_y_12);
267*4e366538SXin Li free_aligned_buffer_page_end(src_u_12);
268*4e366538SXin Li free_aligned_buffer_page_end(src_v_12);
269*4e366538SXin Li
270*4e366538SXin Li return max_diff;
271*4e366538SXin Li }
272*4e366538SXin Li
273*4e366538SXin Li // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
274*4e366538SXin Li // 0 = exact.
I420TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)275*4e366538SXin Li static int I420TestFilter_16(int src_width,
276*4e366538SXin Li int src_height,
277*4e366538SXin Li int dst_width,
278*4e366538SXin Li int dst_height,
279*4e366538SXin Li FilterMode f,
280*4e366538SXin Li int benchmark_iterations,
281*4e366538SXin Li int disable_cpu_flags,
282*4e366538SXin Li int benchmark_cpu_info) {
283*4e366538SXin Li if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
284*4e366538SXin Li return 0;
285*4e366538SXin Li }
286*4e366538SXin Li
287*4e366538SXin Li int i;
288*4e366538SXin Li int src_width_uv = (Abs(src_width) + 1) >> 1;
289*4e366538SXin Li int src_height_uv = (Abs(src_height) + 1) >> 1;
290*4e366538SXin Li
291*4e366538SXin Li int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
292*4e366538SXin Li int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
293*4e366538SXin Li
294*4e366538SXin Li int src_stride_y = Abs(src_width);
295*4e366538SXin Li int src_stride_uv = src_width_uv;
296*4e366538SXin Li
297*4e366538SXin Li align_buffer_page_end(src_y, src_y_plane_size);
298*4e366538SXin Li align_buffer_page_end(src_u, src_uv_plane_size);
299*4e366538SXin Li align_buffer_page_end(src_v, src_uv_plane_size);
300*4e366538SXin Li align_buffer_page_end(src_y_16, src_y_plane_size * 2);
301*4e366538SXin Li align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
302*4e366538SXin Li align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
303*4e366538SXin Li if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
304*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
305*4e366538SXin Li return 0;
306*4e366538SXin Li }
307*4e366538SXin Li uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
308*4e366538SXin Li uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
309*4e366538SXin Li uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
310*4e366538SXin Li
311*4e366538SXin Li MemRandomize(src_y, src_y_plane_size);
312*4e366538SXin Li MemRandomize(src_u, src_uv_plane_size);
313*4e366538SXin Li MemRandomize(src_v, src_uv_plane_size);
314*4e366538SXin Li
315*4e366538SXin Li for (i = 0; i < src_y_plane_size; ++i) {
316*4e366538SXin Li p_src_y_16[i] = src_y[i];
317*4e366538SXin Li }
318*4e366538SXin Li for (i = 0; i < src_uv_plane_size; ++i) {
319*4e366538SXin Li p_src_u_16[i] = src_u[i];
320*4e366538SXin Li p_src_v_16[i] = src_v[i];
321*4e366538SXin Li }
322*4e366538SXin Li
323*4e366538SXin Li int dst_width_uv = (dst_width + 1) >> 1;
324*4e366538SXin Li int dst_height_uv = (dst_height + 1) >> 1;
325*4e366538SXin Li
326*4e366538SXin Li int dst_y_plane_size = (dst_width) * (dst_height);
327*4e366538SXin Li int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
328*4e366538SXin Li
329*4e366538SXin Li int dst_stride_y = dst_width;
330*4e366538SXin Li int dst_stride_uv = dst_width_uv;
331*4e366538SXin Li
332*4e366538SXin Li align_buffer_page_end(dst_y_8, dst_y_plane_size);
333*4e366538SXin Li align_buffer_page_end(dst_u_8, dst_uv_plane_size);
334*4e366538SXin Li align_buffer_page_end(dst_v_8, dst_uv_plane_size);
335*4e366538SXin Li align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
336*4e366538SXin Li align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
337*4e366538SXin Li align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
338*4e366538SXin Li
339*4e366538SXin Li uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
340*4e366538SXin Li uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
341*4e366538SXin Li uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
342*4e366538SXin Li
343*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
344*4e366538SXin Li I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
345*4e366538SXin Li src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
346*4e366538SXin Li dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
347*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
348*4e366538SXin Li for (i = 0; i < benchmark_iterations; ++i) {
349*4e366538SXin Li I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
350*4e366538SXin Li p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
351*4e366538SXin Li dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
352*4e366538SXin Li dst_stride_uv, dst_width, dst_height, f);
353*4e366538SXin Li }
354*4e366538SXin Li
355*4e366538SXin Li // Expect an exact match.
356*4e366538SXin Li int max_diff = 0;
357*4e366538SXin Li for (i = 0; i < dst_y_plane_size; ++i) {
358*4e366538SXin Li int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
359*4e366538SXin Li if (abs_diff > max_diff) {
360*4e366538SXin Li max_diff = abs_diff;
361*4e366538SXin Li }
362*4e366538SXin Li }
363*4e366538SXin Li for (i = 0; i < dst_uv_plane_size; ++i) {
364*4e366538SXin Li int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
365*4e366538SXin Li if (abs_diff > max_diff) {
366*4e366538SXin Li max_diff = abs_diff;
367*4e366538SXin Li }
368*4e366538SXin Li abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
369*4e366538SXin Li if (abs_diff > max_diff) {
370*4e366538SXin Li max_diff = abs_diff;
371*4e366538SXin Li }
372*4e366538SXin Li }
373*4e366538SXin Li
374*4e366538SXin Li free_aligned_buffer_page_end(dst_y_8);
375*4e366538SXin Li free_aligned_buffer_page_end(dst_u_8);
376*4e366538SXin Li free_aligned_buffer_page_end(dst_v_8);
377*4e366538SXin Li free_aligned_buffer_page_end(dst_y_16);
378*4e366538SXin Li free_aligned_buffer_page_end(dst_u_16);
379*4e366538SXin Li free_aligned_buffer_page_end(dst_v_16);
380*4e366538SXin Li free_aligned_buffer_page_end(src_y);
381*4e366538SXin Li free_aligned_buffer_page_end(src_u);
382*4e366538SXin Li free_aligned_buffer_page_end(src_v);
383*4e366538SXin Li free_aligned_buffer_page_end(src_y_16);
384*4e366538SXin Li free_aligned_buffer_page_end(src_u_16);
385*4e366538SXin Li free_aligned_buffer_page_end(src_v_16);
386*4e366538SXin Li
387*4e366538SXin Li return max_diff;
388*4e366538SXin Li }
389*4e366538SXin Li
390*4e366538SXin Li // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
I444TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)391*4e366538SXin Li static int I444TestFilter(int src_width,
392*4e366538SXin Li int src_height,
393*4e366538SXin Li int dst_width,
394*4e366538SXin Li int dst_height,
395*4e366538SXin Li FilterMode f,
396*4e366538SXin Li int benchmark_iterations,
397*4e366538SXin Li int disable_cpu_flags,
398*4e366538SXin Li int benchmark_cpu_info) {
399*4e366538SXin Li if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
400*4e366538SXin Li return 0;
401*4e366538SXin Li }
402*4e366538SXin Li
403*4e366538SXin Li int i, j;
404*4e366538SXin Li int src_width_uv = Abs(src_width);
405*4e366538SXin Li int src_height_uv = Abs(src_height);
406*4e366538SXin Li
407*4e366538SXin Li int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
408*4e366538SXin Li int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
409*4e366538SXin Li
410*4e366538SXin Li int src_stride_y = Abs(src_width);
411*4e366538SXin Li int src_stride_uv = src_width_uv;
412*4e366538SXin Li
413*4e366538SXin Li align_buffer_page_end(src_y, src_y_plane_size);
414*4e366538SXin Li align_buffer_page_end(src_u, src_uv_plane_size);
415*4e366538SXin Li align_buffer_page_end(src_v, src_uv_plane_size);
416*4e366538SXin Li if (!src_y || !src_u || !src_v) {
417*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
418*4e366538SXin Li return 0;
419*4e366538SXin Li }
420*4e366538SXin Li MemRandomize(src_y, src_y_plane_size);
421*4e366538SXin Li MemRandomize(src_u, src_uv_plane_size);
422*4e366538SXin Li MemRandomize(src_v, src_uv_plane_size);
423*4e366538SXin Li
424*4e366538SXin Li int dst_width_uv = dst_width;
425*4e366538SXin Li int dst_height_uv = dst_height;
426*4e366538SXin Li
427*4e366538SXin Li int64_t dst_y_plane_size = (dst_width) * (dst_height);
428*4e366538SXin Li int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
429*4e366538SXin Li
430*4e366538SXin Li int dst_stride_y = dst_width;
431*4e366538SXin Li int dst_stride_uv = dst_width_uv;
432*4e366538SXin Li
433*4e366538SXin Li align_buffer_page_end(dst_y_c, dst_y_plane_size);
434*4e366538SXin Li align_buffer_page_end(dst_u_c, dst_uv_plane_size);
435*4e366538SXin Li align_buffer_page_end(dst_v_c, dst_uv_plane_size);
436*4e366538SXin Li align_buffer_page_end(dst_y_opt, dst_y_plane_size);
437*4e366538SXin Li align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
438*4e366538SXin Li align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
439*4e366538SXin Li if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
440*4e366538SXin Li !dst_v_opt) {
441*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
442*4e366538SXin Li return 0;
443*4e366538SXin Li }
444*4e366538SXin Li
445*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
446*4e366538SXin Li double c_time = get_time();
447*4e366538SXin Li I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
448*4e366538SXin Li src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
449*4e366538SXin Li dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
450*4e366538SXin Li c_time = (get_time() - c_time);
451*4e366538SXin Li
452*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
453*4e366538SXin Li double opt_time = get_time();
454*4e366538SXin Li for (i = 0; i < benchmark_iterations; ++i) {
455*4e366538SXin Li I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
456*4e366538SXin Li src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
457*4e366538SXin Li dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
458*4e366538SXin Li f);
459*4e366538SXin Li }
460*4e366538SXin Li opt_time = (get_time() - opt_time) / benchmark_iterations;
461*4e366538SXin Li // Report performance of C vs OPT.
462*4e366538SXin Li printf("filter %d - %8d us C - %8d us OPT\n", f,
463*4e366538SXin Li static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
464*4e366538SXin Li
465*4e366538SXin Li // C version may be a little off from the optimized. Order of
466*4e366538SXin Li // operations may introduce rounding somewhere. So do a difference
467*4e366538SXin Li // of the buffers and look to see that the max difference is not
468*4e366538SXin Li // over 3.
469*4e366538SXin Li int max_diff = 0;
470*4e366538SXin Li for (i = 0; i < (dst_height); ++i) {
471*4e366538SXin Li for (j = 0; j < (dst_width); ++j) {
472*4e366538SXin Li int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
473*4e366538SXin Li dst_y_opt[(i * dst_stride_y) + j]);
474*4e366538SXin Li if (abs_diff > max_diff) {
475*4e366538SXin Li max_diff = abs_diff;
476*4e366538SXin Li }
477*4e366538SXin Li }
478*4e366538SXin Li }
479*4e366538SXin Li
480*4e366538SXin Li for (i = 0; i < (dst_height_uv); ++i) {
481*4e366538SXin Li for (j = 0; j < (dst_width_uv); ++j) {
482*4e366538SXin Li int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
483*4e366538SXin Li dst_u_opt[(i * dst_stride_uv) + j]);
484*4e366538SXin Li if (abs_diff > max_diff) {
485*4e366538SXin Li max_diff = abs_diff;
486*4e366538SXin Li }
487*4e366538SXin Li abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
488*4e366538SXin Li dst_v_opt[(i * dst_stride_uv) + j]);
489*4e366538SXin Li if (abs_diff > max_diff) {
490*4e366538SXin Li max_diff = abs_diff;
491*4e366538SXin Li }
492*4e366538SXin Li }
493*4e366538SXin Li }
494*4e366538SXin Li
495*4e366538SXin Li free_aligned_buffer_page_end(dst_y_c);
496*4e366538SXin Li free_aligned_buffer_page_end(dst_u_c);
497*4e366538SXin Li free_aligned_buffer_page_end(dst_v_c);
498*4e366538SXin Li free_aligned_buffer_page_end(dst_y_opt);
499*4e366538SXin Li free_aligned_buffer_page_end(dst_u_opt);
500*4e366538SXin Li free_aligned_buffer_page_end(dst_v_opt);
501*4e366538SXin Li free_aligned_buffer_page_end(src_y);
502*4e366538SXin Li free_aligned_buffer_page_end(src_u);
503*4e366538SXin Li free_aligned_buffer_page_end(src_v);
504*4e366538SXin Li
505*4e366538SXin Li return max_diff;
506*4e366538SXin Li }
507*4e366538SXin Li
508*4e366538SXin Li // Test scaling with 8 bit C vs 12 bit C and return maximum pixel difference.
509*4e366538SXin Li // 0 = exact.
I444TestFilter_12(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)510*4e366538SXin Li static int I444TestFilter_12(int src_width,
511*4e366538SXin Li int src_height,
512*4e366538SXin Li int dst_width,
513*4e366538SXin Li int dst_height,
514*4e366538SXin Li FilterMode f,
515*4e366538SXin Li int benchmark_iterations,
516*4e366538SXin Li int disable_cpu_flags,
517*4e366538SXin Li int benchmark_cpu_info) {
518*4e366538SXin Li if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
519*4e366538SXin Li return 0;
520*4e366538SXin Li }
521*4e366538SXin Li
522*4e366538SXin Li int i;
523*4e366538SXin Li int src_width_uv = Abs(src_width);
524*4e366538SXin Li int src_height_uv = Abs(src_height);
525*4e366538SXin Li
526*4e366538SXin Li int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
527*4e366538SXin Li int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
528*4e366538SXin Li
529*4e366538SXin Li int src_stride_y = Abs(src_width);
530*4e366538SXin Li int src_stride_uv = src_width_uv;
531*4e366538SXin Li
532*4e366538SXin Li align_buffer_page_end(src_y, src_y_plane_size);
533*4e366538SXin Li align_buffer_page_end(src_u, src_uv_plane_size);
534*4e366538SXin Li align_buffer_page_end(src_v, src_uv_plane_size);
535*4e366538SXin Li align_buffer_page_end(src_y_12, src_y_plane_size * 2);
536*4e366538SXin Li align_buffer_page_end(src_u_12, src_uv_plane_size * 2);
537*4e366538SXin Li align_buffer_page_end(src_v_12, src_uv_plane_size * 2);
538*4e366538SXin Li if (!src_y || !src_u || !src_v || !src_y_12 || !src_u_12 || !src_v_12) {
539*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
540*4e366538SXin Li return 0;
541*4e366538SXin Li }
542*4e366538SXin Li uint16_t* p_src_y_12 = reinterpret_cast<uint16_t*>(src_y_12);
543*4e366538SXin Li uint16_t* p_src_u_12 = reinterpret_cast<uint16_t*>(src_u_12);
544*4e366538SXin Li uint16_t* p_src_v_12 = reinterpret_cast<uint16_t*>(src_v_12);
545*4e366538SXin Li
546*4e366538SXin Li MemRandomize(src_y, src_y_plane_size);
547*4e366538SXin Li MemRandomize(src_u, src_uv_plane_size);
548*4e366538SXin Li MemRandomize(src_v, src_uv_plane_size);
549*4e366538SXin Li
550*4e366538SXin Li for (i = 0; i < src_y_plane_size; ++i) {
551*4e366538SXin Li p_src_y_12[i] = src_y[i];
552*4e366538SXin Li }
553*4e366538SXin Li for (i = 0; i < src_uv_plane_size; ++i) {
554*4e366538SXin Li p_src_u_12[i] = src_u[i];
555*4e366538SXin Li p_src_v_12[i] = src_v[i];
556*4e366538SXin Li }
557*4e366538SXin Li
558*4e366538SXin Li int dst_width_uv = dst_width;
559*4e366538SXin Li int dst_height_uv = dst_height;
560*4e366538SXin Li
561*4e366538SXin Li int dst_y_plane_size = (dst_width) * (dst_height);
562*4e366538SXin Li int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
563*4e366538SXin Li
564*4e366538SXin Li int dst_stride_y = dst_width;
565*4e366538SXin Li int dst_stride_uv = dst_width_uv;
566*4e366538SXin Li
567*4e366538SXin Li align_buffer_page_end(dst_y_8, dst_y_plane_size);
568*4e366538SXin Li align_buffer_page_end(dst_u_8, dst_uv_plane_size);
569*4e366538SXin Li align_buffer_page_end(dst_v_8, dst_uv_plane_size);
570*4e366538SXin Li align_buffer_page_end(dst_y_12, dst_y_plane_size * 2);
571*4e366538SXin Li align_buffer_page_end(dst_u_12, dst_uv_plane_size * 2);
572*4e366538SXin Li align_buffer_page_end(dst_v_12, dst_uv_plane_size * 2);
573*4e366538SXin Li
574*4e366538SXin Li uint16_t* p_dst_y_12 = reinterpret_cast<uint16_t*>(dst_y_12);
575*4e366538SXin Li uint16_t* p_dst_u_12 = reinterpret_cast<uint16_t*>(dst_u_12);
576*4e366538SXin Li uint16_t* p_dst_v_12 = reinterpret_cast<uint16_t*>(dst_v_12);
577*4e366538SXin Li
578*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
579*4e366538SXin Li I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
580*4e366538SXin Li src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
581*4e366538SXin Li dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
582*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
583*4e366538SXin Li for (i = 0; i < benchmark_iterations; ++i) {
584*4e366538SXin Li I444Scale_12(p_src_y_12, src_stride_y, p_src_u_12, src_stride_uv,
585*4e366538SXin Li p_src_v_12, src_stride_uv, src_width, src_height, p_dst_y_12,
586*4e366538SXin Li dst_stride_y, p_dst_u_12, dst_stride_uv, p_dst_v_12,
587*4e366538SXin Li dst_stride_uv, dst_width, dst_height, f);
588*4e366538SXin Li }
589*4e366538SXin Li
590*4e366538SXin Li // Expect an exact match.
591*4e366538SXin Li int max_diff = 0;
592*4e366538SXin Li for (i = 0; i < dst_y_plane_size; ++i) {
593*4e366538SXin Li int abs_diff = Abs(dst_y_8[i] - p_dst_y_12[i]);
594*4e366538SXin Li if (abs_diff > max_diff) {
595*4e366538SXin Li max_diff = abs_diff;
596*4e366538SXin Li }
597*4e366538SXin Li }
598*4e366538SXin Li for (i = 0; i < dst_uv_plane_size; ++i) {
599*4e366538SXin Li int abs_diff = Abs(dst_u_8[i] - p_dst_u_12[i]);
600*4e366538SXin Li if (abs_diff > max_diff) {
601*4e366538SXin Li max_diff = abs_diff;
602*4e366538SXin Li }
603*4e366538SXin Li abs_diff = Abs(dst_v_8[i] - p_dst_v_12[i]);
604*4e366538SXin Li if (abs_diff > max_diff) {
605*4e366538SXin Li max_diff = abs_diff;
606*4e366538SXin Li }
607*4e366538SXin Li }
608*4e366538SXin Li
609*4e366538SXin Li free_aligned_buffer_page_end(dst_y_8);
610*4e366538SXin Li free_aligned_buffer_page_end(dst_u_8);
611*4e366538SXin Li free_aligned_buffer_page_end(dst_v_8);
612*4e366538SXin Li free_aligned_buffer_page_end(dst_y_12);
613*4e366538SXin Li free_aligned_buffer_page_end(dst_u_12);
614*4e366538SXin Li free_aligned_buffer_page_end(dst_v_12);
615*4e366538SXin Li free_aligned_buffer_page_end(src_y);
616*4e366538SXin Li free_aligned_buffer_page_end(src_u);
617*4e366538SXin Li free_aligned_buffer_page_end(src_v);
618*4e366538SXin Li free_aligned_buffer_page_end(src_y_12);
619*4e366538SXin Li free_aligned_buffer_page_end(src_u_12);
620*4e366538SXin Li free_aligned_buffer_page_end(src_v_12);
621*4e366538SXin Li
622*4e366538SXin Li return max_diff;
623*4e366538SXin Li }
624*4e366538SXin Li
625*4e366538SXin Li // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
626*4e366538SXin Li // 0 = exact.
I444TestFilter_16(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)627*4e366538SXin Li static int I444TestFilter_16(int src_width,
628*4e366538SXin Li int src_height,
629*4e366538SXin Li int dst_width,
630*4e366538SXin Li int dst_height,
631*4e366538SXin Li FilterMode f,
632*4e366538SXin Li int benchmark_iterations,
633*4e366538SXin Li int disable_cpu_flags,
634*4e366538SXin Li int benchmark_cpu_info) {
635*4e366538SXin Li if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
636*4e366538SXin Li return 0;
637*4e366538SXin Li }
638*4e366538SXin Li
639*4e366538SXin Li int i;
640*4e366538SXin Li int src_width_uv = Abs(src_width);
641*4e366538SXin Li int src_height_uv = Abs(src_height);
642*4e366538SXin Li
643*4e366538SXin Li int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
644*4e366538SXin Li int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
645*4e366538SXin Li
646*4e366538SXin Li int src_stride_y = Abs(src_width);
647*4e366538SXin Li int src_stride_uv = src_width_uv;
648*4e366538SXin Li
649*4e366538SXin Li align_buffer_page_end(src_y, src_y_plane_size);
650*4e366538SXin Li align_buffer_page_end(src_u, src_uv_plane_size);
651*4e366538SXin Li align_buffer_page_end(src_v, src_uv_plane_size);
652*4e366538SXin Li align_buffer_page_end(src_y_16, src_y_plane_size * 2);
653*4e366538SXin Li align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
654*4e366538SXin Li align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
655*4e366538SXin Li if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
656*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
657*4e366538SXin Li return 0;
658*4e366538SXin Li }
659*4e366538SXin Li uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
660*4e366538SXin Li uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
661*4e366538SXin Li uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
662*4e366538SXin Li
663*4e366538SXin Li MemRandomize(src_y, src_y_plane_size);
664*4e366538SXin Li MemRandomize(src_u, src_uv_plane_size);
665*4e366538SXin Li MemRandomize(src_v, src_uv_plane_size);
666*4e366538SXin Li
667*4e366538SXin Li for (i = 0; i < src_y_plane_size; ++i) {
668*4e366538SXin Li p_src_y_16[i] = src_y[i];
669*4e366538SXin Li }
670*4e366538SXin Li for (i = 0; i < src_uv_plane_size; ++i) {
671*4e366538SXin Li p_src_u_16[i] = src_u[i];
672*4e366538SXin Li p_src_v_16[i] = src_v[i];
673*4e366538SXin Li }
674*4e366538SXin Li
675*4e366538SXin Li int dst_width_uv = dst_width;
676*4e366538SXin Li int dst_height_uv = dst_height;
677*4e366538SXin Li
678*4e366538SXin Li int dst_y_plane_size = (dst_width) * (dst_height);
679*4e366538SXin Li int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
680*4e366538SXin Li
681*4e366538SXin Li int dst_stride_y = dst_width;
682*4e366538SXin Li int dst_stride_uv = dst_width_uv;
683*4e366538SXin Li
684*4e366538SXin Li align_buffer_page_end(dst_y_8, dst_y_plane_size);
685*4e366538SXin Li align_buffer_page_end(dst_u_8, dst_uv_plane_size);
686*4e366538SXin Li align_buffer_page_end(dst_v_8, dst_uv_plane_size);
687*4e366538SXin Li align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
688*4e366538SXin Li align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
689*4e366538SXin Li align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
690*4e366538SXin Li
691*4e366538SXin Li uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
692*4e366538SXin Li uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
693*4e366538SXin Li uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
694*4e366538SXin Li
695*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
696*4e366538SXin Li I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
697*4e366538SXin Li src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
698*4e366538SXin Li dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
699*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
700*4e366538SXin Li for (i = 0; i < benchmark_iterations; ++i) {
701*4e366538SXin Li I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
702*4e366538SXin Li p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
703*4e366538SXin Li dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
704*4e366538SXin Li dst_stride_uv, dst_width, dst_height, f);
705*4e366538SXin Li }
706*4e366538SXin Li
707*4e366538SXin Li // Expect an exact match.
708*4e366538SXin Li int max_diff = 0;
709*4e366538SXin Li for (i = 0; i < dst_y_plane_size; ++i) {
710*4e366538SXin Li int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
711*4e366538SXin Li if (abs_diff > max_diff) {
712*4e366538SXin Li max_diff = abs_diff;
713*4e366538SXin Li }
714*4e366538SXin Li }
715*4e366538SXin Li for (i = 0; i < dst_uv_plane_size; ++i) {
716*4e366538SXin Li int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
717*4e366538SXin Li if (abs_diff > max_diff) {
718*4e366538SXin Li max_diff = abs_diff;
719*4e366538SXin Li }
720*4e366538SXin Li abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
721*4e366538SXin Li if (abs_diff > max_diff) {
722*4e366538SXin Li max_diff = abs_diff;
723*4e366538SXin Li }
724*4e366538SXin Li }
725*4e366538SXin Li
726*4e366538SXin Li free_aligned_buffer_page_end(dst_y_8);
727*4e366538SXin Li free_aligned_buffer_page_end(dst_u_8);
728*4e366538SXin Li free_aligned_buffer_page_end(dst_v_8);
729*4e366538SXin Li free_aligned_buffer_page_end(dst_y_16);
730*4e366538SXin Li free_aligned_buffer_page_end(dst_u_16);
731*4e366538SXin Li free_aligned_buffer_page_end(dst_v_16);
732*4e366538SXin Li free_aligned_buffer_page_end(src_y);
733*4e366538SXin Li free_aligned_buffer_page_end(src_u);
734*4e366538SXin Li free_aligned_buffer_page_end(src_v);
735*4e366538SXin Li free_aligned_buffer_page_end(src_y_16);
736*4e366538SXin Li free_aligned_buffer_page_end(src_u_16);
737*4e366538SXin Li free_aligned_buffer_page_end(src_v_16);
738*4e366538SXin Li
739*4e366538SXin Li return max_diff;
740*4e366538SXin Li }
741*4e366538SXin Li
742*4e366538SXin Li // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
NV12TestFilter(int src_width,int src_height,int dst_width,int dst_height,FilterMode f,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info)743*4e366538SXin Li static int NV12TestFilter(int src_width,
744*4e366538SXin Li int src_height,
745*4e366538SXin Li int dst_width,
746*4e366538SXin Li int dst_height,
747*4e366538SXin Li FilterMode f,
748*4e366538SXin Li int benchmark_iterations,
749*4e366538SXin Li int disable_cpu_flags,
750*4e366538SXin Li int benchmark_cpu_info) {
751*4e366538SXin Li if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
752*4e366538SXin Li return 0;
753*4e366538SXin Li }
754*4e366538SXin Li
755*4e366538SXin Li int i, j;
756*4e366538SXin Li int src_width_uv = (Abs(src_width) + 1) >> 1;
757*4e366538SXin Li int src_height_uv = (Abs(src_height) + 1) >> 1;
758*4e366538SXin Li
759*4e366538SXin Li int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
760*4e366538SXin Li int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2;
761*4e366538SXin Li
762*4e366538SXin Li int src_stride_y = Abs(src_width);
763*4e366538SXin Li int src_stride_uv = src_width_uv * 2;
764*4e366538SXin Li
765*4e366538SXin Li align_buffer_page_end(src_y, src_y_plane_size);
766*4e366538SXin Li align_buffer_page_end(src_uv, src_uv_plane_size);
767*4e366538SXin Li if (!src_y || !src_uv) {
768*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
769*4e366538SXin Li return 0;
770*4e366538SXin Li }
771*4e366538SXin Li MemRandomize(src_y, src_y_plane_size);
772*4e366538SXin Li MemRandomize(src_uv, src_uv_plane_size);
773*4e366538SXin Li
774*4e366538SXin Li int dst_width_uv = (dst_width + 1) >> 1;
775*4e366538SXin Li int dst_height_uv = (dst_height + 1) >> 1;
776*4e366538SXin Li
777*4e366538SXin Li int64_t dst_y_plane_size = (dst_width) * (dst_height);
778*4e366538SXin Li int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2;
779*4e366538SXin Li
780*4e366538SXin Li int dst_stride_y = dst_width;
781*4e366538SXin Li int dst_stride_uv = dst_width_uv * 2;
782*4e366538SXin Li
783*4e366538SXin Li align_buffer_page_end(dst_y_c, dst_y_plane_size);
784*4e366538SXin Li align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
785*4e366538SXin Li align_buffer_page_end(dst_y_opt, dst_y_plane_size);
786*4e366538SXin Li align_buffer_page_end(dst_uv_opt, dst_uv_plane_size);
787*4e366538SXin Li if (!dst_y_c || !dst_uv_c || !dst_y_opt || !dst_uv_opt) {
788*4e366538SXin Li printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
789*4e366538SXin Li return 0;
790*4e366538SXin Li }
791*4e366538SXin Li
792*4e366538SXin Li MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
793*4e366538SXin Li double c_time = get_time();
794*4e366538SXin Li NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
795*4e366538SXin Li dst_y_c, dst_stride_y, dst_uv_c, dst_stride_uv, dst_width,
796*4e366538SXin Li dst_height, f);
797*4e366538SXin Li c_time = (get_time() - c_time);
798*4e366538SXin Li
799*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
800*4e366538SXin Li double opt_time = get_time();
801*4e366538SXin Li for (i = 0; i < benchmark_iterations; ++i) {
802*4e366538SXin Li NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv, src_width, src_height,
803*4e366538SXin Li dst_y_opt, dst_stride_y, dst_uv_opt, dst_stride_uv, dst_width,
804*4e366538SXin Li dst_height, f);
805*4e366538SXin Li }
806*4e366538SXin Li opt_time = (get_time() - opt_time) / benchmark_iterations;
807*4e366538SXin Li // Report performance of C vs OPT.
808*4e366538SXin Li printf("filter %d - %8d us C - %8d us OPT\n", f,
809*4e366538SXin Li static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
810*4e366538SXin Li
811*4e366538SXin Li // C version may be a little off from the optimized. Order of
812*4e366538SXin Li // operations may introduce rounding somewhere. So do a difference
813*4e366538SXin Li // of the buffers and look to see that the max difference is not
814*4e366538SXin Li // over 3.
815*4e366538SXin Li int max_diff = 0;
816*4e366538SXin Li for (i = 0; i < (dst_height); ++i) {
817*4e366538SXin Li for (j = 0; j < (dst_width); ++j) {
818*4e366538SXin Li int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
819*4e366538SXin Li dst_y_opt[(i * dst_stride_y) + j]);
820*4e366538SXin Li if (abs_diff > max_diff) {
821*4e366538SXin Li max_diff = abs_diff;
822*4e366538SXin Li }
823*4e366538SXin Li }
824*4e366538SXin Li }
825*4e366538SXin Li
826*4e366538SXin Li for (i = 0; i < (dst_height_uv); ++i) {
827*4e366538SXin Li for (j = 0; j < (dst_width_uv * 2); ++j) {
828*4e366538SXin Li int abs_diff = Abs(dst_uv_c[(i * dst_stride_uv) + j] -
829*4e366538SXin Li dst_uv_opt[(i * dst_stride_uv) + j]);
830*4e366538SXin Li if (abs_diff > max_diff) {
831*4e366538SXin Li max_diff = abs_diff;
832*4e366538SXin Li }
833*4e366538SXin Li }
834*4e366538SXin Li }
835*4e366538SXin Li
836*4e366538SXin Li free_aligned_buffer_page_end(dst_y_c);
837*4e366538SXin Li free_aligned_buffer_page_end(dst_uv_c);
838*4e366538SXin Li free_aligned_buffer_page_end(dst_y_opt);
839*4e366538SXin Li free_aligned_buffer_page_end(dst_uv_opt);
840*4e366538SXin Li free_aligned_buffer_page_end(src_y);
841*4e366538SXin Li free_aligned_buffer_page_end(src_uv);
842*4e366538SXin Li
843*4e366538SXin Li return max_diff;
844*4e366538SXin Li }
845*4e366538SXin Li
846*4e366538SXin Li // The following adjustments in dimensions ensure the scale factor will be
847*4e366538SXin Li // exactly achieved.
848*4e366538SXin Li // 2 is chroma subsample.
849*4e366538SXin Li #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
850*4e366538SXin Li #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
851*4e366538SXin Li
852*4e366538SXin Li #define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
853*4e366538SXin Li TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
854*4e366538SXin Li int diff = I420TestFilter( \
855*4e366538SXin Li SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
856*4e366538SXin Li DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
857*4e366538SXin Li kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
858*4e366538SXin Li benchmark_cpu_info_); \
859*4e366538SXin Li EXPECT_LE(diff, max_diff); \
860*4e366538SXin Li } \
861*4e366538SXin Li TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
862*4e366538SXin Li int diff = I444TestFilter( \
863*4e366538SXin Li SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
864*4e366538SXin Li DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
865*4e366538SXin Li kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
866*4e366538SXin Li benchmark_cpu_info_); \
867*4e366538SXin Li EXPECT_LE(diff, max_diff); \
868*4e366538SXin Li } \
869*4e366538SXin Li TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_12) { \
870*4e366538SXin Li int diff = I420TestFilter_12( \
871*4e366538SXin Li SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
872*4e366538SXin Li DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
873*4e366538SXin Li kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
874*4e366538SXin Li benchmark_cpu_info_); \
875*4e366538SXin Li EXPECT_LE(diff, max_diff); \
876*4e366538SXin Li } \
877*4e366538SXin Li TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_12) { \
878*4e366538SXin Li int diff = I444TestFilter_12( \
879*4e366538SXin Li SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
880*4e366538SXin Li DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
881*4e366538SXin Li kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
882*4e366538SXin Li benchmark_cpu_info_); \
883*4e366538SXin Li EXPECT_LE(diff, max_diff); \
884*4e366538SXin Li } \
885*4e366538SXin Li TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \
886*4e366538SXin Li int diff = NV12TestFilter( \
887*4e366538SXin Li SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
888*4e366538SXin Li DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
889*4e366538SXin Li kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
890*4e366538SXin Li benchmark_cpu_info_); \
891*4e366538SXin Li EXPECT_LE(diff, max_diff); \
892*4e366538SXin Li }
893*4e366538SXin Li
894*4e366538SXin Li // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
895*4e366538SXin Li // filtering is different fixed point implementations for SSSE3, Neon and C.
896*4e366538SXin Li #ifndef DISABLE_SLOW_TESTS
897*4e366538SXin Li #define TEST_FACTOR(name, nom, denom, boxdiff) \
898*4e366538SXin Li TEST_FACTOR1(, name, None, nom, denom, 0) \
899*4e366538SXin Li TEST_FACTOR1(, name, Linear, nom, denom, 3) \
900*4e366538SXin Li TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
901*4e366538SXin Li TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
902*4e366538SXin Li #else
903*4e366538SXin Li #if defined(ENABLE_FULL_TESTS)
904*4e366538SXin Li #define TEST_FACTOR(name, nom, denom, boxdiff) \
905*4e366538SXin Li TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
906*4e366538SXin Li TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
907*4e366538SXin Li TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
908*4e366538SXin Li TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
909*4e366538SXin Li #else
910*4e366538SXin Li #define TEST_FACTOR(name, nom, denom, boxdiff) \
911*4e366538SXin Li TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
912*4e366538SXin Li TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
913*4e366538SXin Li #endif
914*4e366538SXin Li #endif
915*4e366538SXin Li
916*4e366538SXin Li TEST_FACTOR(2, 1, 2, 0)
917*4e366538SXin Li TEST_FACTOR(4, 1, 4, 0)
918*4e366538SXin Li #ifndef DISABLE_SLOW_TESTS
919*4e366538SXin Li TEST_FACTOR(8, 1, 8, 0)
920*4e366538SXin Li #endif
921*4e366538SXin Li TEST_FACTOR(3by4, 3, 4, 1)
922*4e366538SXin Li TEST_FACTOR(3by8, 3, 8, 1)
923*4e366538SXin Li TEST_FACTOR(3, 1, 3, 0)
924*4e366538SXin Li #undef TEST_FACTOR1
925*4e366538SXin Li #undef TEST_FACTOR
926*4e366538SXin Li #undef SX
927*4e366538SXin Li #undef DX
928*4e366538SXin Li
929*4e366538SXin Li #define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
930*4e366538SXin Li TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
931*4e366538SXin Li int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
932*4e366538SXin Li height, kFilter##filter, benchmark_iterations_, \
933*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
934*4e366538SXin Li EXPECT_LE(diff, max_diff); \
935*4e366538SXin Li } \
936*4e366538SXin Li TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
937*4e366538SXin Li int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
938*4e366538SXin Li height, kFilter##filter, benchmark_iterations_, \
939*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
940*4e366538SXin Li EXPECT_LE(diff, max_diff); \
941*4e366538SXin Li } \
942*4e366538SXin Li TEST_F(LibYUVScaleTest, \
943*4e366538SXin Li DISABLED_##I420##name##To##width##x##height##_##filter##_12) { \
944*4e366538SXin Li int diff = I420TestFilter_12( \
945*4e366538SXin Li benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
946*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
947*4e366538SXin Li EXPECT_LE(diff, max_diff); \
948*4e366538SXin Li } \
949*4e366538SXin Li TEST_F(LibYUVScaleTest, \
950*4e366538SXin Li DISABLED_##I444##name##To##width##x##height##_##filter##_12) { \
951*4e366538SXin Li int diff = I444TestFilter_12( \
952*4e366538SXin Li benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
953*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
954*4e366538SXin Li EXPECT_LE(diff, max_diff); \
955*4e366538SXin Li } \
956*4e366538SXin Li TEST_F(LibYUVScaleTest, \
957*4e366538SXin Li DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
958*4e366538SXin Li int diff = I420TestFilter_16( \
959*4e366538SXin Li benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
960*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
961*4e366538SXin Li EXPECT_LE(diff, max_diff); \
962*4e366538SXin Li } \
963*4e366538SXin Li TEST_F(LibYUVScaleTest, \
964*4e366538SXin Li DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
965*4e366538SXin Li int diff = I444TestFilter_16( \
966*4e366538SXin Li benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
967*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
968*4e366538SXin Li EXPECT_LE(diff, max_diff); \
969*4e366538SXin Li } \
970*4e366538SXin Li TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \
971*4e366538SXin Li int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \
972*4e366538SXin Li height, kFilter##filter, benchmark_iterations_, \
973*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
974*4e366538SXin Li EXPECT_LE(diff, max_diff); \
975*4e366538SXin Li } \
976*4e366538SXin Li TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
977*4e366538SXin Li int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
978*4e366538SXin Li Abs(benchmark_height_), kFilter##filter, \
979*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, \
980*4e366538SXin Li benchmark_cpu_info_); \
981*4e366538SXin Li EXPECT_LE(diff, max_diff); \
982*4e366538SXin Li } \
983*4e366538SXin Li TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
984*4e366538SXin Li int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
985*4e366538SXin Li Abs(benchmark_height_), kFilter##filter, \
986*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, \
987*4e366538SXin Li benchmark_cpu_info_); \
988*4e366538SXin Li EXPECT_LE(diff, max_diff); \
989*4e366538SXin Li } \
990*4e366538SXin Li TEST_F(LibYUVScaleTest, \
991*4e366538SXin Li DISABLED_##I420##name##From##width##x##height##_##filter##_12) { \
992*4e366538SXin Li int diff = I420TestFilter_12(width, height, Abs(benchmark_width_), \
993*4e366538SXin Li Abs(benchmark_height_), kFilter##filter, \
994*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, \
995*4e366538SXin Li benchmark_cpu_info_); \
996*4e366538SXin Li EXPECT_LE(diff, max_diff); \
997*4e366538SXin Li } \
998*4e366538SXin Li TEST_F(LibYUVScaleTest, \
999*4e366538SXin Li DISABLED_##I444##name##From##width##x##height##_##filter##_12) { \
1000*4e366538SXin Li int diff = I444TestFilter_12(width, height, Abs(benchmark_width_), \
1001*4e366538SXin Li Abs(benchmark_height_), kFilter##filter, \
1002*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, \
1003*4e366538SXin Li benchmark_cpu_info_); \
1004*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1005*4e366538SXin Li } \
1006*4e366538SXin Li TEST_F(LibYUVScaleTest, \
1007*4e366538SXin Li DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
1008*4e366538SXin Li int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
1009*4e366538SXin Li Abs(benchmark_height_), kFilter##filter, \
1010*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, \
1011*4e366538SXin Li benchmark_cpu_info_); \
1012*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1013*4e366538SXin Li } \
1014*4e366538SXin Li TEST_F(LibYUVScaleTest, \
1015*4e366538SXin Li DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
1016*4e366538SXin Li int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
1017*4e366538SXin Li Abs(benchmark_height_), kFilter##filter, \
1018*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, \
1019*4e366538SXin Li benchmark_cpu_info_); \
1020*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1021*4e366538SXin Li } \
1022*4e366538SXin Li TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) { \
1023*4e366538SXin Li int diff = NV12TestFilter(width, height, Abs(benchmark_width_), \
1024*4e366538SXin Li Abs(benchmark_height_), kFilter##filter, \
1025*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_, \
1026*4e366538SXin Li benchmark_cpu_info_); \
1027*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1028*4e366538SXin Li }
1029*4e366538SXin Li
1030*4e366538SXin Li #ifndef DISABLE_SLOW_TESTS
1031*4e366538SXin Li // Test scale to a specified size with all 4 filters.
1032*4e366538SXin Li #define TEST_SCALETO(name, width, height) \
1033*4e366538SXin Li TEST_SCALETO1(, name, width, height, None, 0) \
1034*4e366538SXin Li TEST_SCALETO1(, name, width, height, Linear, 3) \
1035*4e366538SXin Li TEST_SCALETO1(, name, width, height, Bilinear, 3) \
1036*4e366538SXin Li TEST_SCALETO1(, name, width, height, Box, 3)
1037*4e366538SXin Li #else
1038*4e366538SXin Li #if defined(ENABLE_FULL_TESTS)
1039*4e366538SXin Li #define TEST_SCALETO(name, width, height) \
1040*4e366538SXin Li TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
1041*4e366538SXin Li TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
1042*4e366538SXin Li TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
1043*4e366538SXin Li TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
1044*4e366538SXin Li #else
1045*4e366538SXin Li #define TEST_SCALETO(name, width, height) \
1046*4e366538SXin Li TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
1047*4e366538SXin Li TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
1048*4e366538SXin Li #endif
1049*4e366538SXin Li #endif
1050*4e366538SXin Li
1051*4e366538SXin Li TEST_SCALETO(Scale, 1, 1)
1052*4e366538SXin Li TEST_SCALETO(Scale, 569, 480)
1053*4e366538SXin Li TEST_SCALETO(Scale, 640, 360)
1054*4e366538SXin Li #ifndef DISABLE_SLOW_TESTS
1055*4e366538SXin Li TEST_SCALETO(Scale, 256, 144) /* 128x72 * 2 */
1056*4e366538SXin Li TEST_SCALETO(Scale, 320, 240)
1057*4e366538SXin Li TEST_SCALETO(Scale, 1280, 720)
1058*4e366538SXin Li TEST_SCALETO(Scale, 1920, 1080)
1059*4e366538SXin Li #endif // DISABLE_SLOW_TESTS
1060*4e366538SXin Li #undef TEST_SCALETO1
1061*4e366538SXin Li #undef TEST_SCALETO
1062*4e366538SXin Li
1063*4e366538SXin Li #define TEST_SCALESWAPXY1(DISABLED_, name, filter, max_diff) \
1064*4e366538SXin Li TEST_F(LibYUVScaleTest, I420##name##SwapXY_##filter) { \
1065*4e366538SXin Li int diff = I420TestFilter(benchmark_width_, benchmark_height_, \
1066*4e366538SXin Li benchmark_height_, benchmark_width_, \
1067*4e366538SXin Li kFilter##filter, benchmark_iterations_, \
1068*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
1069*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1070*4e366538SXin Li } \
1071*4e366538SXin Li TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \
1072*4e366538SXin Li int diff = I444TestFilter(benchmark_width_, benchmark_height_, \
1073*4e366538SXin Li benchmark_height_, benchmark_width_, \
1074*4e366538SXin Li kFilter##filter, benchmark_iterations_, \
1075*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
1076*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1077*4e366538SXin Li } \
1078*4e366538SXin Li TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_12) { \
1079*4e366538SXin Li int diff = I420TestFilter_12(benchmark_width_, benchmark_height_, \
1080*4e366538SXin Li benchmark_height_, benchmark_width_, \
1081*4e366538SXin Li kFilter##filter, benchmark_iterations_, \
1082*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
1083*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1084*4e366538SXin Li } \
1085*4e366538SXin Li TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_12) { \
1086*4e366538SXin Li int diff = I444TestFilter_12(benchmark_width_, benchmark_height_, \
1087*4e366538SXin Li benchmark_height_, benchmark_width_, \
1088*4e366538SXin Li kFilter##filter, benchmark_iterations_, \
1089*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
1090*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1091*4e366538SXin Li } \
1092*4e366538SXin Li TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \
1093*4e366538SXin Li int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \
1094*4e366538SXin Li benchmark_height_, benchmark_width_, \
1095*4e366538SXin Li kFilter##filter, benchmark_iterations_, \
1096*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
1097*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1098*4e366538SXin Li } \
1099*4e366538SXin Li TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \
1100*4e366538SXin Li int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \
1101*4e366538SXin Li benchmark_height_, benchmark_width_, \
1102*4e366538SXin Li kFilter##filter, benchmark_iterations_, \
1103*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
1104*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1105*4e366538SXin Li } \
1106*4e366538SXin Li TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \
1107*4e366538SXin Li int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \
1108*4e366538SXin Li benchmark_height_, benchmark_width_, \
1109*4e366538SXin Li kFilter##filter, benchmark_iterations_, \
1110*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_); \
1111*4e366538SXin Li EXPECT_LE(diff, max_diff); \
1112*4e366538SXin Li }
1113*4e366538SXin Li
1114*4e366538SXin Li // Test scale to a specified size with all 4 filters.
1115*4e366538SXin Li #ifndef DISABLE_SLOW_TESTS
1116*4e366538SXin Li TEST_SCALESWAPXY1(, Scale, None, 0)
1117*4e366538SXin Li TEST_SCALESWAPXY1(, Scale, Linear, 3)
1118*4e366538SXin Li TEST_SCALESWAPXY1(, Scale, Bilinear, 3)
1119*4e366538SXin Li TEST_SCALESWAPXY1(, Scale, Box, 3)
1120*4e366538SXin Li #else
1121*4e366538SXin Li #if defined(ENABLE_FULL_TESTS)
1122*4e366538SXin Li TEST_SCALESWAPXY1(DISABLED_, Scale, None, 0)
1123*4e366538SXin Li TEST_SCALESWAPXY1(DISABLED_, Scale, Linear, 3)
1124*4e366538SXin Li TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
1125*4e366538SXin Li TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
1126*4e366538SXin Li #else
1127*4e366538SXin Li TEST_SCALESWAPXY1(DISABLED_, Scale, Bilinear, 3)
1128*4e366538SXin Li TEST_SCALESWAPXY1(DISABLED_, Scale, Box, 3)
1129*4e366538SXin Li #endif
1130*4e366538SXin Li #endif
1131*4e366538SXin Li #undef TEST_SCALESWAPXY1
1132*4e366538SXin Li
1133*4e366538SXin Li } // namespace libyuv
1134