1*4e366538SXin Li /*
2*4e366538SXin Li * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3*4e366538SXin Li *
4*4e366538SXin Li * Use of this source code is governed by a BSD-style license
5*4e366538SXin Li * that can be found in the LICENSE file in the root of the source
6*4e366538SXin Li * tree. An additional intellectual property rights grant can be found
7*4e366538SXin Li * in the file PATENTS. All contributing project authors may
8*4e366538SXin Li * be found in the AUTHORS file in the root of the source tree.
9*4e366538SXin Li */
10*4e366538SXin Li
11*4e366538SXin Li #include <math.h>
12*4e366538SXin Li #include <stdlib.h>
13*4e366538SXin Li #include <time.h>
14*4e366538SXin Li
15*4e366538SXin Li #include "../unit_test/unit_test.h"
16*4e366538SXin Li #include "libyuv/compare.h"
17*4e366538SXin Li #include "libyuv/convert.h"
18*4e366538SXin Li #include "libyuv/convert_argb.h"
19*4e366538SXin Li #include "libyuv/convert_from.h"
20*4e366538SXin Li #include "libyuv/convert_from_argb.h"
21*4e366538SXin Li #include "libyuv/cpu_id.h"
22*4e366538SXin Li #include "libyuv/planar_functions.h"
23*4e366538SXin Li #include "libyuv/rotate.h"
24*4e366538SXin Li #include "libyuv/scale.h"
25*4e366538SXin Li
26*4e366538SXin Li #ifdef ENABLE_ROW_TESTS
27*4e366538SXin Li // row.h defines SIMD_ALIGNED, overriding unit_test.h
28*4e366538SXin Li // TODO(fbarchard): Remove row.h from unittests. Test public functions.
29*4e366538SXin Li #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
30*4e366538SXin Li #endif
31*4e366538SXin Li
32*4e366538SXin Li #if defined(LIBYUV_BIT_EXACT)
33*4e366538SXin Li #define EXPECTED_UNATTENUATE_DIFF 0
34*4e366538SXin Li #else
35*4e366538SXin Li #define EXPECTED_UNATTENUATE_DIFF 2
36*4e366538SXin Li #endif
37*4e366538SXin Li
38*4e366538SXin Li namespace libyuv {
39*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestAttenuate)40*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestAttenuate) {
41*4e366538SXin Li const int kSize = 1280 * 4;
42*4e366538SXin Li align_buffer_page_end(orig_pixels, kSize);
43*4e366538SXin Li align_buffer_page_end(atten_pixels, kSize);
44*4e366538SXin Li align_buffer_page_end(unatten_pixels, kSize);
45*4e366538SXin Li align_buffer_page_end(atten2_pixels, kSize);
46*4e366538SXin Li
47*4e366538SXin Li // Test unattenuation clamps
48*4e366538SXin Li orig_pixels[0 * 4 + 0] = 200u;
49*4e366538SXin Li orig_pixels[0 * 4 + 1] = 129u;
50*4e366538SXin Li orig_pixels[0 * 4 + 2] = 127u;
51*4e366538SXin Li orig_pixels[0 * 4 + 3] = 128u;
52*4e366538SXin Li // Test unattenuation transparent and opaque are unaffected
53*4e366538SXin Li orig_pixels[1 * 4 + 0] = 16u;
54*4e366538SXin Li orig_pixels[1 * 4 + 1] = 64u;
55*4e366538SXin Li orig_pixels[1 * 4 + 2] = 192u;
56*4e366538SXin Li orig_pixels[1 * 4 + 3] = 0u;
57*4e366538SXin Li orig_pixels[2 * 4 + 0] = 16u;
58*4e366538SXin Li orig_pixels[2 * 4 + 1] = 64u;
59*4e366538SXin Li orig_pixels[2 * 4 + 2] = 192u;
60*4e366538SXin Li orig_pixels[2 * 4 + 3] = 128u;
61*4e366538SXin Li orig_pixels[3 * 4 + 0] = 16u;
62*4e366538SXin Li orig_pixels[3 * 4 + 1] = 64u;
63*4e366538SXin Li orig_pixels[3 * 4 + 2] = 192u;
64*4e366538SXin Li orig_pixels[3 * 4 + 3] = 255u;
65*4e366538SXin Li orig_pixels[4 * 4 + 0] = 255u;
66*4e366538SXin Li orig_pixels[4 * 4 + 1] = 255u;
67*4e366538SXin Li orig_pixels[4 * 4 + 2] = 255u;
68*4e366538SXin Li orig_pixels[4 * 4 + 3] = 255u;
69*4e366538SXin Li
70*4e366538SXin Li ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1);
71*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
72*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
73*4e366538SXin Li EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
74*4e366538SXin Li EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
75*4e366538SXin Li EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
76*4e366538SXin Li EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
77*4e366538SXin Li EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
78*4e366538SXin Li EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
79*4e366538SXin Li EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]);
80*4e366538SXin Li EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]);
81*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]);
82*4e366538SXin Li EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]);
83*4e366538SXin Li EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]);
84*4e366538SXin Li EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]);
85*4e366538SXin Li EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]);
86*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]);
87*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]);
88*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]);
89*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]);
90*4e366538SXin Li EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]);
91*4e366538SXin Li
92*4e366538SXin Li ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1);
93*4e366538SXin Li EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]);
94*4e366538SXin Li EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]);
95*4e366538SXin Li EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]);
96*4e366538SXin Li EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]);
97*4e366538SXin Li EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]);
98*4e366538SXin Li EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]);
99*4e366538SXin Li EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]);
100*4e366538SXin Li EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]);
101*4e366538SXin Li EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]);
102*4e366538SXin Li EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]);
103*4e366538SXin Li EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]);
104*4e366538SXin Li EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]);
105*4e366538SXin Li EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]);
106*4e366538SXin Li EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]);
107*4e366538SXin Li EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]);
108*4e366538SXin Li EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]);
109*4e366538SXin Li EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]);
110*4e366538SXin Li EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]);
111*4e366538SXin Li EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]);
112*4e366538SXin Li EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]);
113*4e366538SXin Li
114*4e366538SXin Li // test 255
115*4e366538SXin Li for (int i = 0; i < 256; ++i) {
116*4e366538SXin Li orig_pixels[i * 4 + 0] = i;
117*4e366538SXin Li orig_pixels[i * 4 + 1] = 0;
118*4e366538SXin Li orig_pixels[i * 4 + 2] = 0;
119*4e366538SXin Li orig_pixels[i * 4 + 3] = 255;
120*4e366538SXin Li }
121*4e366538SXin Li ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1);
122*4e366538SXin Li for (int i = 0; i < 256; ++i) {
123*4e366538SXin Li EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]);
124*4e366538SXin Li EXPECT_EQ(0, atten_pixels[i * 4 + 1]);
125*4e366538SXin Li EXPECT_EQ(0, atten_pixels[i * 4 + 2]);
126*4e366538SXin Li EXPECT_EQ(255, atten_pixels[i * 4 + 3]);
127*4e366538SXin Li }
128*4e366538SXin Li
129*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
130*4e366538SXin Li orig_pixels[i * 4 + 0] = i;
131*4e366538SXin Li orig_pixels[i * 4 + 1] = i / 2;
132*4e366538SXin Li orig_pixels[i * 4 + 2] = i / 3;
133*4e366538SXin Li orig_pixels[i * 4 + 3] = i;
134*4e366538SXin Li }
135*4e366538SXin Li ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
136*4e366538SXin Li ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
137*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
138*4e366538SXin Li ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
139*4e366538SXin Li }
140*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
141*4e366538SXin Li EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1);
142*4e366538SXin Li EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1);
143*4e366538SXin Li EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1);
144*4e366538SXin Li EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1);
145*4e366538SXin Li }
146*4e366538SXin Li // Make sure transparent, 50% and opaque are fully accurate.
147*4e366538SXin Li EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
148*4e366538SXin Li EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
149*4e366538SXin Li EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
150*4e366538SXin Li EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
151*4e366538SXin Li EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
152*4e366538SXin Li EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
153*4e366538SXin Li EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
154*4e366538SXin Li EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
155*4e366538SXin Li EXPECT_EQ(255, atten_pixels[255 * 4 + 0]);
156*4e366538SXin Li EXPECT_EQ(127, atten_pixels[255 * 4 + 1]);
157*4e366538SXin Li EXPECT_EQ(85, atten_pixels[255 * 4 + 2]);
158*4e366538SXin Li EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
159*4e366538SXin Li
160*4e366538SXin Li free_aligned_buffer_page_end(atten2_pixels);
161*4e366538SXin Li free_aligned_buffer_page_end(unatten_pixels);
162*4e366538SXin Li free_aligned_buffer_page_end(atten_pixels);
163*4e366538SXin Li free_aligned_buffer_page_end(orig_pixels);
164*4e366538SXin Li }
165*4e366538SXin Li
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)166*4e366538SXin Li static int TestAttenuateI(int width,
167*4e366538SXin Li int height,
168*4e366538SXin Li int benchmark_iterations,
169*4e366538SXin Li int disable_cpu_flags,
170*4e366538SXin Li int benchmark_cpu_info,
171*4e366538SXin Li int invert,
172*4e366538SXin Li int off) {
173*4e366538SXin Li if (width < 1) {
174*4e366538SXin Li width = 1;
175*4e366538SXin Li }
176*4e366538SXin Li const int kBpp = 4;
177*4e366538SXin Li const int kStride = width * kBpp;
178*4e366538SXin Li align_buffer_page_end(src_argb, kStride * height + off);
179*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
180*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
181*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
182*4e366538SXin Li src_argb[i + off] = (fastrand() & 0xff);
183*4e366538SXin Li }
184*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
185*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
186*4e366538SXin Li
187*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
188*4e366538SXin Li ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
189*4e366538SXin Li invert * height);
190*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
191*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
192*4e366538SXin Li ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
193*4e366538SXin Li invert * height);
194*4e366538SXin Li }
195*4e366538SXin Li int max_diff = 0;
196*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
197*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
198*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
199*4e366538SXin Li if (abs_diff > max_diff) {
200*4e366538SXin Li max_diff = abs_diff;
201*4e366538SXin Li }
202*4e366538SXin Li }
203*4e366538SXin Li free_aligned_buffer_page_end(src_argb);
204*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
205*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
206*4e366538SXin Li return max_diff;
207*4e366538SXin Li }
208*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)209*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
210*4e366538SXin Li int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
211*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
212*4e366538SXin Li benchmark_cpu_info_, +1, 0);
213*4e366538SXin Li
214*4e366538SXin Li EXPECT_EQ(max_diff, 0);
215*4e366538SXin Li }
216*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)217*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
218*4e366538SXin Li int max_diff =
219*4e366538SXin Li TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
220*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
221*4e366538SXin Li EXPECT_EQ(max_diff, 0);
222*4e366538SXin Li }
223*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)224*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
225*4e366538SXin Li int max_diff =
226*4e366538SXin Li TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
227*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
228*4e366538SXin Li EXPECT_EQ(max_diff, 0);
229*4e366538SXin Li }
230*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)231*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
232*4e366538SXin Li int max_diff =
233*4e366538SXin Li TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
234*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
235*4e366538SXin Li EXPECT_EQ(max_diff, 0);
236*4e366538SXin Li }
237*4e366538SXin Li
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)238*4e366538SXin Li static int TestUnattenuateI(int width,
239*4e366538SXin Li int height,
240*4e366538SXin Li int benchmark_iterations,
241*4e366538SXin Li int disable_cpu_flags,
242*4e366538SXin Li int benchmark_cpu_info,
243*4e366538SXin Li int invert,
244*4e366538SXin Li int off) {
245*4e366538SXin Li if (width < 1) {
246*4e366538SXin Li width = 1;
247*4e366538SXin Li }
248*4e366538SXin Li const int kBpp = 4;
249*4e366538SXin Li const int kStride = width * kBpp;
250*4e366538SXin Li align_buffer_page_end(src_argb, kStride * height + off);
251*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
252*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
253*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
254*4e366538SXin Li src_argb[i + off] = (fastrand() & 0xff);
255*4e366538SXin Li }
256*4e366538SXin Li ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
257*4e366538SXin Li height);
258*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
259*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
260*4e366538SXin Li
261*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
262*4e366538SXin Li ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
263*4e366538SXin Li invert * height);
264*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
265*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
266*4e366538SXin Li ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
267*4e366538SXin Li invert * height);
268*4e366538SXin Li }
269*4e366538SXin Li int max_diff = 0;
270*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
271*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
272*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
273*4e366538SXin Li if (abs_diff > max_diff) {
274*4e366538SXin Li max_diff = abs_diff;
275*4e366538SXin Li }
276*4e366538SXin Li }
277*4e366538SXin Li free_aligned_buffer_page_end(src_argb);
278*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
279*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
280*4e366538SXin Li return max_diff;
281*4e366538SXin Li }
282*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)283*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
284*4e366538SXin Li int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
285*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
286*4e366538SXin Li benchmark_cpu_info_, +1, 0);
287*4e366538SXin Li EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
288*4e366538SXin Li }
289*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)290*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
291*4e366538SXin Li int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
292*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
293*4e366538SXin Li benchmark_cpu_info_, +1, 1);
294*4e366538SXin Li EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
295*4e366538SXin Li }
296*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)297*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
298*4e366538SXin Li int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
299*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
300*4e366538SXin Li benchmark_cpu_info_, -1, 0);
301*4e366538SXin Li EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
302*4e366538SXin Li }
303*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)304*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
305*4e366538SXin Li int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
306*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
307*4e366538SXin Li benchmark_cpu_info_, +1, 0);
308*4e366538SXin Li EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
309*4e366538SXin Li }
310*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)311*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
312*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
313*4e366538SXin Li SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
314*4e366538SXin Li
315*4e366538SXin Li for (int y = 0; y < 16; ++y) {
316*4e366538SXin Li for (int x = 0; x < 16; ++x) {
317*4e366538SXin Li orig_pixels[y][x][0] = 1u;
318*4e366538SXin Li orig_pixels[y][x][1] = 2u;
319*4e366538SXin Li orig_pixels[y][x][2] = 3u;
320*4e366538SXin Li orig_pixels[y][x][3] = 255u;
321*4e366538SXin Li }
322*4e366538SXin Li }
323*4e366538SXin Li
324*4e366538SXin Li ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
325*4e366538SXin Li &added_pixels[0][0][0], 16 * 4, 16, 16);
326*4e366538SXin Li
327*4e366538SXin Li for (int y = 0; y < 16; ++y) {
328*4e366538SXin Li for (int x = 0; x < 16; ++x) {
329*4e366538SXin Li EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
330*4e366538SXin Li EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
331*4e366538SXin Li EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
332*4e366538SXin Li EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
333*4e366538SXin Li }
334*4e366538SXin Li }
335*4e366538SXin Li }
336*4e366538SXin Li
337*4e366538SXin Li // near is for legacy platforms.
TEST_F(LibYUVPlanarTest,TestARGBGray)338*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBGray) {
339*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
340*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
341*4e366538SXin Li
342*4e366538SXin Li // Test blue
343*4e366538SXin Li orig_pixels[0][0] = 255u;
344*4e366538SXin Li orig_pixels[0][1] = 0u;
345*4e366538SXin Li orig_pixels[0][2] = 0u;
346*4e366538SXin Li orig_pixels[0][3] = 128u;
347*4e366538SXin Li // Test green
348*4e366538SXin Li orig_pixels[1][0] = 0u;
349*4e366538SXin Li orig_pixels[1][1] = 255u;
350*4e366538SXin Li orig_pixels[1][2] = 0u;
351*4e366538SXin Li orig_pixels[1][3] = 0u;
352*4e366538SXin Li // Test red
353*4e366538SXin Li orig_pixels[2][0] = 0u;
354*4e366538SXin Li orig_pixels[2][1] = 0u;
355*4e366538SXin Li orig_pixels[2][2] = 255u;
356*4e366538SXin Li orig_pixels[2][3] = 255u;
357*4e366538SXin Li // Test black
358*4e366538SXin Li orig_pixels[3][0] = 0u;
359*4e366538SXin Li orig_pixels[3][1] = 0u;
360*4e366538SXin Li orig_pixels[3][2] = 0u;
361*4e366538SXin Li orig_pixels[3][3] = 255u;
362*4e366538SXin Li // Test white
363*4e366538SXin Li orig_pixels[4][0] = 255u;
364*4e366538SXin Li orig_pixels[4][1] = 255u;
365*4e366538SXin Li orig_pixels[4][2] = 255u;
366*4e366538SXin Li orig_pixels[4][3] = 255u;
367*4e366538SXin Li // Test color
368*4e366538SXin Li orig_pixels[5][0] = 16u;
369*4e366538SXin Li orig_pixels[5][1] = 64u;
370*4e366538SXin Li orig_pixels[5][2] = 192u;
371*4e366538SXin Li orig_pixels[5][3] = 224u;
372*4e366538SXin Li // Do 16 to test asm version.
373*4e366538SXin Li ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
374*4e366538SXin Li EXPECT_NEAR(29u, orig_pixels[0][0], 1);
375*4e366538SXin Li EXPECT_NEAR(29u, orig_pixels[0][1], 1);
376*4e366538SXin Li EXPECT_NEAR(29u, orig_pixels[0][2], 1);
377*4e366538SXin Li EXPECT_EQ(128u, orig_pixels[0][3]);
378*4e366538SXin Li EXPECT_EQ(149u, orig_pixels[1][0]);
379*4e366538SXin Li EXPECT_EQ(149u, orig_pixels[1][1]);
380*4e366538SXin Li EXPECT_EQ(149u, orig_pixels[1][2]);
381*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[1][3]);
382*4e366538SXin Li EXPECT_NEAR(77u, orig_pixels[2][0], 1);
383*4e366538SXin Li EXPECT_NEAR(77u, orig_pixels[2][1], 1);
384*4e366538SXin Li EXPECT_NEAR(77u, orig_pixels[2][2], 1);
385*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[2][3]);
386*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[3][0]);
387*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[3][1]);
388*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[3][2]);
389*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[3][3]);
390*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[4][0]);
391*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[4][1]);
392*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[4][2]);
393*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[4][3]);
394*4e366538SXin Li EXPECT_NEAR(97u, orig_pixels[5][0], 1);
395*4e366538SXin Li EXPECT_NEAR(97u, orig_pixels[5][1], 1);
396*4e366538SXin Li EXPECT_NEAR(97u, orig_pixels[5][2], 1);
397*4e366538SXin Li EXPECT_EQ(224u, orig_pixels[5][3]);
398*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
399*4e366538SXin Li orig_pixels[i][0] = i;
400*4e366538SXin Li orig_pixels[i][1] = i / 2;
401*4e366538SXin Li orig_pixels[i][2] = i / 3;
402*4e366538SXin Li orig_pixels[i][3] = i;
403*4e366538SXin Li }
404*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
405*4e366538SXin Li ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
406*4e366538SXin Li }
407*4e366538SXin Li }
408*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)409*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
410*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
411*4e366538SXin Li SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
412*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
413*4e366538SXin Li
414*4e366538SXin Li // Test blue
415*4e366538SXin Li orig_pixels[0][0] = 255u;
416*4e366538SXin Li orig_pixels[0][1] = 0u;
417*4e366538SXin Li orig_pixels[0][2] = 0u;
418*4e366538SXin Li orig_pixels[0][3] = 128u;
419*4e366538SXin Li // Test green
420*4e366538SXin Li orig_pixels[1][0] = 0u;
421*4e366538SXin Li orig_pixels[1][1] = 255u;
422*4e366538SXin Li orig_pixels[1][2] = 0u;
423*4e366538SXin Li orig_pixels[1][3] = 0u;
424*4e366538SXin Li // Test red
425*4e366538SXin Li orig_pixels[2][0] = 0u;
426*4e366538SXin Li orig_pixels[2][1] = 0u;
427*4e366538SXin Li orig_pixels[2][2] = 255u;
428*4e366538SXin Li orig_pixels[2][3] = 255u;
429*4e366538SXin Li // Test black
430*4e366538SXin Li orig_pixels[3][0] = 0u;
431*4e366538SXin Li orig_pixels[3][1] = 0u;
432*4e366538SXin Li orig_pixels[3][2] = 0u;
433*4e366538SXin Li orig_pixels[3][3] = 255u;
434*4e366538SXin Li // Test white
435*4e366538SXin Li orig_pixels[4][0] = 255u;
436*4e366538SXin Li orig_pixels[4][1] = 255u;
437*4e366538SXin Li orig_pixels[4][2] = 255u;
438*4e366538SXin Li orig_pixels[4][3] = 255u;
439*4e366538SXin Li // Test color
440*4e366538SXin Li orig_pixels[5][0] = 16u;
441*4e366538SXin Li orig_pixels[5][1] = 64u;
442*4e366538SXin Li orig_pixels[5][2] = 192u;
443*4e366538SXin Li orig_pixels[5][3] = 224u;
444*4e366538SXin Li // Do 16 to test asm version.
445*4e366538SXin Li ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
446*4e366538SXin Li EXPECT_NEAR(30u, gray_pixels[0][0], 1);
447*4e366538SXin Li EXPECT_NEAR(30u, gray_pixels[0][1], 1);
448*4e366538SXin Li EXPECT_NEAR(30u, gray_pixels[0][2], 1);
449*4e366538SXin Li EXPECT_NEAR(128u, gray_pixels[0][3], 1);
450*4e366538SXin Li EXPECT_NEAR(149u, gray_pixels[1][0], 1);
451*4e366538SXin Li EXPECT_NEAR(149u, gray_pixels[1][1], 1);
452*4e366538SXin Li EXPECT_NEAR(149u, gray_pixels[1][2], 1);
453*4e366538SXin Li EXPECT_NEAR(0u, gray_pixels[1][3], 1);
454*4e366538SXin Li EXPECT_NEAR(76u, gray_pixels[2][0], 1);
455*4e366538SXin Li EXPECT_NEAR(76u, gray_pixels[2][1], 1);
456*4e366538SXin Li EXPECT_NEAR(76u, gray_pixels[2][2], 1);
457*4e366538SXin Li EXPECT_NEAR(255u, gray_pixels[2][3], 1);
458*4e366538SXin Li EXPECT_NEAR(0u, gray_pixels[3][0], 1);
459*4e366538SXin Li EXPECT_NEAR(0u, gray_pixels[3][1], 1);
460*4e366538SXin Li EXPECT_NEAR(0u, gray_pixels[3][2], 1);
461*4e366538SXin Li EXPECT_NEAR(255u, gray_pixels[3][3], 1);
462*4e366538SXin Li EXPECT_NEAR(255u, gray_pixels[4][0], 1);
463*4e366538SXin Li EXPECT_NEAR(255u, gray_pixels[4][1], 1);
464*4e366538SXin Li EXPECT_NEAR(255u, gray_pixels[4][2], 1);
465*4e366538SXin Li EXPECT_NEAR(255u, gray_pixels[4][3], 1);
466*4e366538SXin Li EXPECT_NEAR(96u, gray_pixels[5][0], 1);
467*4e366538SXin Li EXPECT_NEAR(96u, gray_pixels[5][1], 1);
468*4e366538SXin Li EXPECT_NEAR(96u, gray_pixels[5][2], 1);
469*4e366538SXin Li EXPECT_NEAR(224u, gray_pixels[5][3], 1);
470*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
471*4e366538SXin Li orig_pixels[i][0] = i;
472*4e366538SXin Li orig_pixels[i][1] = i / 2;
473*4e366538SXin Li orig_pixels[i][2] = i / 3;
474*4e366538SXin Li orig_pixels[i][3] = i;
475*4e366538SXin Li }
476*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
477*4e366538SXin Li ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
478*4e366538SXin Li }
479*4e366538SXin Li
480*4e366538SXin Li for (int i = 0; i < 256; ++i) {
481*4e366538SXin Li orig_pixels[i][0] = i;
482*4e366538SXin Li orig_pixels[i][1] = i;
483*4e366538SXin Li orig_pixels[i][2] = i;
484*4e366538SXin Li orig_pixels[i][3] = i;
485*4e366538SXin Li }
486*4e366538SXin Li ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
487*4e366538SXin Li for (int i = 0; i < 256; ++i) {
488*4e366538SXin Li EXPECT_EQ(i, orig_pixels[i][0]);
489*4e366538SXin Li EXPECT_EQ(i, orig_pixels[i][1]);
490*4e366538SXin Li EXPECT_EQ(i, orig_pixels[i][2]);
491*4e366538SXin Li EXPECT_EQ(i, orig_pixels[i][3]);
492*4e366538SXin Li }
493*4e366538SXin Li }
494*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBSepia)495*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBSepia) {
496*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
497*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
498*4e366538SXin Li
499*4e366538SXin Li // Test blue
500*4e366538SXin Li orig_pixels[0][0] = 255u;
501*4e366538SXin Li orig_pixels[0][1] = 0u;
502*4e366538SXin Li orig_pixels[0][2] = 0u;
503*4e366538SXin Li orig_pixels[0][3] = 128u;
504*4e366538SXin Li // Test green
505*4e366538SXin Li orig_pixels[1][0] = 0u;
506*4e366538SXin Li orig_pixels[1][1] = 255u;
507*4e366538SXin Li orig_pixels[1][2] = 0u;
508*4e366538SXin Li orig_pixels[1][3] = 0u;
509*4e366538SXin Li // Test red
510*4e366538SXin Li orig_pixels[2][0] = 0u;
511*4e366538SXin Li orig_pixels[2][1] = 0u;
512*4e366538SXin Li orig_pixels[2][2] = 255u;
513*4e366538SXin Li orig_pixels[2][3] = 255u;
514*4e366538SXin Li // Test black
515*4e366538SXin Li orig_pixels[3][0] = 0u;
516*4e366538SXin Li orig_pixels[3][1] = 0u;
517*4e366538SXin Li orig_pixels[3][2] = 0u;
518*4e366538SXin Li orig_pixels[3][3] = 255u;
519*4e366538SXin Li // Test white
520*4e366538SXin Li orig_pixels[4][0] = 255u;
521*4e366538SXin Li orig_pixels[4][1] = 255u;
522*4e366538SXin Li orig_pixels[4][2] = 255u;
523*4e366538SXin Li orig_pixels[4][3] = 255u;
524*4e366538SXin Li // Test color
525*4e366538SXin Li orig_pixels[5][0] = 16u;
526*4e366538SXin Li orig_pixels[5][1] = 64u;
527*4e366538SXin Li orig_pixels[5][2] = 192u;
528*4e366538SXin Li orig_pixels[5][3] = 224u;
529*4e366538SXin Li // Do 16 to test asm version.
530*4e366538SXin Li ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
531*4e366538SXin Li EXPECT_EQ(33u, orig_pixels[0][0]);
532*4e366538SXin Li EXPECT_EQ(43u, orig_pixels[0][1]);
533*4e366538SXin Li EXPECT_EQ(47u, orig_pixels[0][2]);
534*4e366538SXin Li EXPECT_EQ(128u, orig_pixels[0][3]);
535*4e366538SXin Li EXPECT_EQ(135u, orig_pixels[1][0]);
536*4e366538SXin Li EXPECT_EQ(175u, orig_pixels[1][1]);
537*4e366538SXin Li EXPECT_EQ(195u, orig_pixels[1][2]);
538*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[1][3]);
539*4e366538SXin Li EXPECT_EQ(69u, orig_pixels[2][0]);
540*4e366538SXin Li EXPECT_EQ(89u, orig_pixels[2][1]);
541*4e366538SXin Li EXPECT_EQ(99u, orig_pixels[2][2]);
542*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[2][3]);
543*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[3][0]);
544*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[3][1]);
545*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[3][2]);
546*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[3][3]);
547*4e366538SXin Li EXPECT_EQ(239u, orig_pixels[4][0]);
548*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[4][1]);
549*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[4][2]);
550*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[4][3]);
551*4e366538SXin Li EXPECT_EQ(88u, orig_pixels[5][0]);
552*4e366538SXin Li EXPECT_EQ(114u, orig_pixels[5][1]);
553*4e366538SXin Li EXPECT_EQ(127u, orig_pixels[5][2]);
554*4e366538SXin Li EXPECT_EQ(224u, orig_pixels[5][3]);
555*4e366538SXin Li
556*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
557*4e366538SXin Li orig_pixels[i][0] = i;
558*4e366538SXin Li orig_pixels[i][1] = i / 2;
559*4e366538SXin Li orig_pixels[i][2] = i / 3;
560*4e366538SXin Li orig_pixels[i][3] = i;
561*4e366538SXin Li }
562*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
563*4e366538SXin Li ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
564*4e366538SXin Li }
565*4e366538SXin Li }
566*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)567*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
568*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
569*4e366538SXin Li SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
570*4e366538SXin Li SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
571*4e366538SXin Li
572*4e366538SXin Li // Matrix for Sepia.
573*4e366538SXin Li SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
574*4e366538SXin Li 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
575*4e366538SXin Li 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
576*4e366538SXin Li };
577*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
578*4e366538SXin Li
579*4e366538SXin Li // Test blue
580*4e366538SXin Li orig_pixels[0][0] = 255u;
581*4e366538SXin Li orig_pixels[0][1] = 0u;
582*4e366538SXin Li orig_pixels[0][2] = 0u;
583*4e366538SXin Li orig_pixels[0][3] = 128u;
584*4e366538SXin Li // Test green
585*4e366538SXin Li orig_pixels[1][0] = 0u;
586*4e366538SXin Li orig_pixels[1][1] = 255u;
587*4e366538SXin Li orig_pixels[1][2] = 0u;
588*4e366538SXin Li orig_pixels[1][3] = 0u;
589*4e366538SXin Li // Test red
590*4e366538SXin Li orig_pixels[2][0] = 0u;
591*4e366538SXin Li orig_pixels[2][1] = 0u;
592*4e366538SXin Li orig_pixels[2][2] = 255u;
593*4e366538SXin Li orig_pixels[2][3] = 255u;
594*4e366538SXin Li // Test color
595*4e366538SXin Li orig_pixels[3][0] = 16u;
596*4e366538SXin Li orig_pixels[3][1] = 64u;
597*4e366538SXin Li orig_pixels[3][2] = 192u;
598*4e366538SXin Li orig_pixels[3][3] = 224u;
599*4e366538SXin Li // Do 16 to test asm version.
600*4e366538SXin Li ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
601*4e366538SXin Li &kRGBToSepia[0], 16, 1);
602*4e366538SXin Li EXPECT_EQ(31u, dst_pixels_opt[0][0]);
603*4e366538SXin Li EXPECT_EQ(43u, dst_pixels_opt[0][1]);
604*4e366538SXin Li EXPECT_EQ(47u, dst_pixels_opt[0][2]);
605*4e366538SXin Li EXPECT_EQ(128u, dst_pixels_opt[0][3]);
606*4e366538SXin Li EXPECT_EQ(135u, dst_pixels_opt[1][0]);
607*4e366538SXin Li EXPECT_EQ(175u, dst_pixels_opt[1][1]);
608*4e366538SXin Li EXPECT_EQ(195u, dst_pixels_opt[1][2]);
609*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[1][3]);
610*4e366538SXin Li EXPECT_EQ(67u, dst_pixels_opt[2][0]);
611*4e366538SXin Li EXPECT_EQ(87u, dst_pixels_opt[2][1]);
612*4e366538SXin Li EXPECT_EQ(99u, dst_pixels_opt[2][2]);
613*4e366538SXin Li EXPECT_EQ(255u, dst_pixels_opt[2][3]);
614*4e366538SXin Li EXPECT_EQ(87u, dst_pixels_opt[3][0]);
615*4e366538SXin Li EXPECT_EQ(112u, dst_pixels_opt[3][1]);
616*4e366538SXin Li EXPECT_EQ(127u, dst_pixels_opt[3][2]);
617*4e366538SXin Li EXPECT_EQ(224u, dst_pixels_opt[3][3]);
618*4e366538SXin Li
619*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
620*4e366538SXin Li orig_pixels[i][0] = i;
621*4e366538SXin Li orig_pixels[i][1] = i / 2;
622*4e366538SXin Li orig_pixels[i][2] = i / 3;
623*4e366538SXin Li orig_pixels[i][3] = i;
624*4e366538SXin Li }
625*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
626*4e366538SXin Li ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
627*4e366538SXin Li &kRGBToSepia[0], 1280, 1);
628*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
629*4e366538SXin Li
630*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
631*4e366538SXin Li ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
632*4e366538SXin Li &kRGBToSepia[0], 1280, 1);
633*4e366538SXin Li }
634*4e366538SXin Li
635*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
636*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
637*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
638*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
639*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
640*4e366538SXin Li }
641*4e366538SXin Li }
642*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)643*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
644*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
645*4e366538SXin Li
646*4e366538SXin Li // Matrix for Sepia.
647*4e366538SXin Li SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
648*4e366538SXin Li 17, 68, 35, 0, 22, 88, 45, 0,
649*4e366538SXin Li 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
650*4e366538SXin Li };
651*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
652*4e366538SXin Li
653*4e366538SXin Li // Test blue
654*4e366538SXin Li orig_pixels[0][0] = 255u;
655*4e366538SXin Li orig_pixels[0][1] = 0u;
656*4e366538SXin Li orig_pixels[0][2] = 0u;
657*4e366538SXin Li orig_pixels[0][3] = 128u;
658*4e366538SXin Li // Test green
659*4e366538SXin Li orig_pixels[1][0] = 0u;
660*4e366538SXin Li orig_pixels[1][1] = 255u;
661*4e366538SXin Li orig_pixels[1][2] = 0u;
662*4e366538SXin Li orig_pixels[1][3] = 0u;
663*4e366538SXin Li // Test red
664*4e366538SXin Li orig_pixels[2][0] = 0u;
665*4e366538SXin Li orig_pixels[2][1] = 0u;
666*4e366538SXin Li orig_pixels[2][2] = 255u;
667*4e366538SXin Li orig_pixels[2][3] = 255u;
668*4e366538SXin Li // Test color
669*4e366538SXin Li orig_pixels[3][0] = 16u;
670*4e366538SXin Li orig_pixels[3][1] = 64u;
671*4e366538SXin Li orig_pixels[3][2] = 192u;
672*4e366538SXin Li orig_pixels[3][3] = 224u;
673*4e366538SXin Li // Do 16 to test asm version.
674*4e366538SXin Li RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
675*4e366538SXin Li EXPECT_EQ(31u, orig_pixels[0][0]);
676*4e366538SXin Li EXPECT_EQ(43u, orig_pixels[0][1]);
677*4e366538SXin Li EXPECT_EQ(47u, orig_pixels[0][2]);
678*4e366538SXin Li EXPECT_EQ(128u, orig_pixels[0][3]);
679*4e366538SXin Li EXPECT_EQ(135u, orig_pixels[1][0]);
680*4e366538SXin Li EXPECT_EQ(175u, orig_pixels[1][1]);
681*4e366538SXin Li EXPECT_EQ(195u, orig_pixels[1][2]);
682*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[1][3]);
683*4e366538SXin Li EXPECT_EQ(67u, orig_pixels[2][0]);
684*4e366538SXin Li EXPECT_EQ(87u, orig_pixels[2][1]);
685*4e366538SXin Li EXPECT_EQ(99u, orig_pixels[2][2]);
686*4e366538SXin Li EXPECT_EQ(255u, orig_pixels[2][3]);
687*4e366538SXin Li EXPECT_EQ(87u, orig_pixels[3][0]);
688*4e366538SXin Li EXPECT_EQ(112u, orig_pixels[3][1]);
689*4e366538SXin Li EXPECT_EQ(127u, orig_pixels[3][2]);
690*4e366538SXin Li EXPECT_EQ(224u, orig_pixels[3][3]);
691*4e366538SXin Li
692*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
693*4e366538SXin Li orig_pixels[i][0] = i;
694*4e366538SXin Li orig_pixels[i][1] = i / 2;
695*4e366538SXin Li orig_pixels[i][2] = i / 3;
696*4e366538SXin Li orig_pixels[i][3] = i;
697*4e366538SXin Li }
698*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
699*4e366538SXin Li RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
700*4e366538SXin Li }
701*4e366538SXin Li }
702*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBColorTable)703*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
704*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
705*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
706*4e366538SXin Li
707*4e366538SXin Li // Matrix for Sepia.
708*4e366538SXin Li static const uint8_t kARGBTable[256 * 4] = {
709*4e366538SXin Li 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
710*4e366538SXin Li };
711*4e366538SXin Li
712*4e366538SXin Li orig_pixels[0][0] = 0u;
713*4e366538SXin Li orig_pixels[0][1] = 0u;
714*4e366538SXin Li orig_pixels[0][2] = 0u;
715*4e366538SXin Li orig_pixels[0][3] = 0u;
716*4e366538SXin Li orig_pixels[1][0] = 1u;
717*4e366538SXin Li orig_pixels[1][1] = 1u;
718*4e366538SXin Li orig_pixels[1][2] = 1u;
719*4e366538SXin Li orig_pixels[1][3] = 1u;
720*4e366538SXin Li orig_pixels[2][0] = 2u;
721*4e366538SXin Li orig_pixels[2][1] = 2u;
722*4e366538SXin Li orig_pixels[2][2] = 2u;
723*4e366538SXin Li orig_pixels[2][3] = 2u;
724*4e366538SXin Li orig_pixels[3][0] = 0u;
725*4e366538SXin Li orig_pixels[3][1] = 1u;
726*4e366538SXin Li orig_pixels[3][2] = 2u;
727*4e366538SXin Li orig_pixels[3][3] = 3u;
728*4e366538SXin Li // Do 16 to test asm version.
729*4e366538SXin Li ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
730*4e366538SXin Li EXPECT_EQ(1u, orig_pixels[0][0]);
731*4e366538SXin Li EXPECT_EQ(2u, orig_pixels[0][1]);
732*4e366538SXin Li EXPECT_EQ(3u, orig_pixels[0][2]);
733*4e366538SXin Li EXPECT_EQ(4u, orig_pixels[0][3]);
734*4e366538SXin Li EXPECT_EQ(5u, orig_pixels[1][0]);
735*4e366538SXin Li EXPECT_EQ(6u, orig_pixels[1][1]);
736*4e366538SXin Li EXPECT_EQ(7u, orig_pixels[1][2]);
737*4e366538SXin Li EXPECT_EQ(8u, orig_pixels[1][3]);
738*4e366538SXin Li EXPECT_EQ(9u, orig_pixels[2][0]);
739*4e366538SXin Li EXPECT_EQ(10u, orig_pixels[2][1]);
740*4e366538SXin Li EXPECT_EQ(11u, orig_pixels[2][2]);
741*4e366538SXin Li EXPECT_EQ(12u, orig_pixels[2][3]);
742*4e366538SXin Li EXPECT_EQ(1u, orig_pixels[3][0]);
743*4e366538SXin Li EXPECT_EQ(6u, orig_pixels[3][1]);
744*4e366538SXin Li EXPECT_EQ(11u, orig_pixels[3][2]);
745*4e366538SXin Li EXPECT_EQ(16u, orig_pixels[3][3]);
746*4e366538SXin Li
747*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
748*4e366538SXin Li orig_pixels[i][0] = i;
749*4e366538SXin Li orig_pixels[i][1] = i / 2;
750*4e366538SXin Li orig_pixels[i][2] = i / 3;
751*4e366538SXin Li orig_pixels[i][3] = i;
752*4e366538SXin Li }
753*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
754*4e366538SXin Li ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
755*4e366538SXin Li }
756*4e366538SXin Li }
757*4e366538SXin Li
758*4e366538SXin Li // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)759*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
760*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
761*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
762*4e366538SXin Li
763*4e366538SXin Li // Matrix for Sepia.
764*4e366538SXin Li static const uint8_t kARGBTable[256 * 4] = {
765*4e366538SXin Li 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
766*4e366538SXin Li };
767*4e366538SXin Li
768*4e366538SXin Li orig_pixels[0][0] = 0u;
769*4e366538SXin Li orig_pixels[0][1] = 0u;
770*4e366538SXin Li orig_pixels[0][2] = 0u;
771*4e366538SXin Li orig_pixels[0][3] = 0u;
772*4e366538SXin Li orig_pixels[1][0] = 1u;
773*4e366538SXin Li orig_pixels[1][1] = 1u;
774*4e366538SXin Li orig_pixels[1][2] = 1u;
775*4e366538SXin Li orig_pixels[1][3] = 1u;
776*4e366538SXin Li orig_pixels[2][0] = 2u;
777*4e366538SXin Li orig_pixels[2][1] = 2u;
778*4e366538SXin Li orig_pixels[2][2] = 2u;
779*4e366538SXin Li orig_pixels[2][3] = 2u;
780*4e366538SXin Li orig_pixels[3][0] = 0u;
781*4e366538SXin Li orig_pixels[3][1] = 1u;
782*4e366538SXin Li orig_pixels[3][2] = 2u;
783*4e366538SXin Li orig_pixels[3][3] = 3u;
784*4e366538SXin Li // Do 16 to test asm version.
785*4e366538SXin Li RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
786*4e366538SXin Li EXPECT_EQ(1u, orig_pixels[0][0]);
787*4e366538SXin Li EXPECT_EQ(2u, orig_pixels[0][1]);
788*4e366538SXin Li EXPECT_EQ(3u, orig_pixels[0][2]);
789*4e366538SXin Li EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged.
790*4e366538SXin Li EXPECT_EQ(5u, orig_pixels[1][0]);
791*4e366538SXin Li EXPECT_EQ(6u, orig_pixels[1][1]);
792*4e366538SXin Li EXPECT_EQ(7u, orig_pixels[1][2]);
793*4e366538SXin Li EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged.
794*4e366538SXin Li EXPECT_EQ(9u, orig_pixels[2][0]);
795*4e366538SXin Li EXPECT_EQ(10u, orig_pixels[2][1]);
796*4e366538SXin Li EXPECT_EQ(11u, orig_pixels[2][2]);
797*4e366538SXin Li EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged.
798*4e366538SXin Li EXPECT_EQ(1u, orig_pixels[3][0]);
799*4e366538SXin Li EXPECT_EQ(6u, orig_pixels[3][1]);
800*4e366538SXin Li EXPECT_EQ(11u, orig_pixels[3][2]);
801*4e366538SXin Li EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged.
802*4e366538SXin Li
803*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
804*4e366538SXin Li orig_pixels[i][0] = i;
805*4e366538SXin Li orig_pixels[i][1] = i / 2;
806*4e366538SXin Li orig_pixels[i][2] = i / 3;
807*4e366538SXin Li orig_pixels[i][3] = i;
808*4e366538SXin Li }
809*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
810*4e366538SXin Li RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
811*4e366538SXin Li }
812*4e366538SXin Li }
813*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBQuantize)814*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
815*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
816*4e366538SXin Li
817*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
818*4e366538SXin Li orig_pixels[i][0] = i;
819*4e366538SXin Li orig_pixels[i][1] = i / 2;
820*4e366538SXin Li orig_pixels[i][2] = i / 3;
821*4e366538SXin Li orig_pixels[i][3] = i;
822*4e366538SXin Li }
823*4e366538SXin Li ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
824*4e366538SXin Li 1280, 1);
825*4e366538SXin Li
826*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
827*4e366538SXin Li EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
828*4e366538SXin Li EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
829*4e366538SXin Li EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
830*4e366538SXin Li EXPECT_EQ(i & 255, orig_pixels[i][3]);
831*4e366538SXin Li }
832*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
833*4e366538SXin Li ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
834*4e366538SXin Li 1280, 1);
835*4e366538SXin Li }
836*4e366538SXin Li }
837*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBMirror_Opt)838*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
839*4e366538SXin Li align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
840*4e366538SXin Li align_buffer_page_end(dst_pixels_opt,
841*4e366538SXin Li benchmark_width_ * benchmark_height_ * 4);
842*4e366538SXin Li align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
843*4e366538SXin Li
844*4e366538SXin Li MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
845*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
846*4e366538SXin Li ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
847*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
848*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
849*4e366538SXin Li
850*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
851*4e366538SXin Li ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
852*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
853*4e366538SXin Li }
854*4e366538SXin Li for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
855*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
856*4e366538SXin Li }
857*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
858*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
859*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
860*4e366538SXin Li }
861*4e366538SXin Li
TEST_F(LibYUVPlanarTest,MirrorPlane_Opt)862*4e366538SXin Li TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
863*4e366538SXin Li align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
864*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
865*4e366538SXin Li align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
866*4e366538SXin Li
867*4e366538SXin Li MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
868*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
869*4e366538SXin Li MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
870*4e366538SXin Li benchmark_width_, benchmark_height_);
871*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
872*4e366538SXin Li
873*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
874*4e366538SXin Li MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
875*4e366538SXin Li benchmark_width_, benchmark_height_);
876*4e366538SXin Li }
877*4e366538SXin Li for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
878*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
879*4e366538SXin Li }
880*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
881*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
882*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
883*4e366538SXin Li }
884*4e366538SXin Li
TEST_F(LibYUVPlanarTest,MirrorUVPlane_Opt)885*4e366538SXin Li TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
886*4e366538SXin Li align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
887*4e366538SXin Li align_buffer_page_end(dst_pixels_opt,
888*4e366538SXin Li benchmark_width_ * benchmark_height_ * 2);
889*4e366538SXin Li align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
890*4e366538SXin Li
891*4e366538SXin Li MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
892*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
893*4e366538SXin Li MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
894*4e366538SXin Li benchmark_width_ * 2, benchmark_width_, benchmark_height_);
895*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
896*4e366538SXin Li
897*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
898*4e366538SXin Li MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
899*4e366538SXin Li benchmark_width_ * 2, benchmark_width_, benchmark_height_);
900*4e366538SXin Li }
901*4e366538SXin Li for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
902*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
903*4e366538SXin Li }
904*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
905*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
906*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
907*4e366538SXin Li }
908*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestShade)909*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestShade) {
910*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
911*4e366538SXin Li SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
912*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
913*4e366538SXin Li
914*4e366538SXin Li orig_pixels[0][0] = 10u;
915*4e366538SXin Li orig_pixels[0][1] = 20u;
916*4e366538SXin Li orig_pixels[0][2] = 40u;
917*4e366538SXin Li orig_pixels[0][3] = 80u;
918*4e366538SXin Li orig_pixels[1][0] = 0u;
919*4e366538SXin Li orig_pixels[1][1] = 0u;
920*4e366538SXin Li orig_pixels[1][2] = 0u;
921*4e366538SXin Li orig_pixels[1][3] = 255u;
922*4e366538SXin Li orig_pixels[2][0] = 0u;
923*4e366538SXin Li orig_pixels[2][1] = 0u;
924*4e366538SXin Li orig_pixels[2][2] = 0u;
925*4e366538SXin Li orig_pixels[2][3] = 0u;
926*4e366538SXin Li orig_pixels[3][0] = 0u;
927*4e366538SXin Li orig_pixels[3][1] = 0u;
928*4e366538SXin Li orig_pixels[3][2] = 0u;
929*4e366538SXin Li orig_pixels[3][3] = 0u;
930*4e366538SXin Li // Do 8 pixels to allow opt version to be used.
931*4e366538SXin Li ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
932*4e366538SXin Li EXPECT_EQ(10u, shade_pixels[0][0]);
933*4e366538SXin Li EXPECT_EQ(20u, shade_pixels[0][1]);
934*4e366538SXin Li EXPECT_EQ(40u, shade_pixels[0][2]);
935*4e366538SXin Li EXPECT_EQ(40u, shade_pixels[0][3]);
936*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[1][0]);
937*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[1][1]);
938*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[1][2]);
939*4e366538SXin Li EXPECT_EQ(128u, shade_pixels[1][3]);
940*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[2][0]);
941*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[2][1]);
942*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[2][2]);
943*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[2][3]);
944*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[3][0]);
945*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[3][1]);
946*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[3][2]);
947*4e366538SXin Li EXPECT_EQ(0u, shade_pixels[3][3]);
948*4e366538SXin Li
949*4e366538SXin Li ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
950*4e366538SXin Li EXPECT_EQ(5u, shade_pixels[0][0]);
951*4e366538SXin Li EXPECT_EQ(10u, shade_pixels[0][1]);
952*4e366538SXin Li EXPECT_EQ(20u, shade_pixels[0][2]);
953*4e366538SXin Li EXPECT_EQ(40u, shade_pixels[0][3]);
954*4e366538SXin Li
955*4e366538SXin Li ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
956*4e366538SXin Li EXPECT_EQ(5u, shade_pixels[0][0]);
957*4e366538SXin Li EXPECT_EQ(5u, shade_pixels[0][1]);
958*4e366538SXin Li EXPECT_EQ(5u, shade_pixels[0][2]);
959*4e366538SXin Li EXPECT_EQ(5u, shade_pixels[0][3]);
960*4e366538SXin Li
961*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
962*4e366538SXin Li ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
963*4e366538SXin Li 0x80808080);
964*4e366538SXin Li }
965*4e366538SXin Li }
966*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)967*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
968*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
969*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
970*4e366538SXin Li SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
971*4e366538SXin Li memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
972*4e366538SXin Li memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
973*4e366538SXin Li
974*4e366538SXin Li orig_pixels_0[0][0] = 16u;
975*4e366538SXin Li orig_pixels_0[0][1] = 32u;
976*4e366538SXin Li orig_pixels_0[0][2] = 64u;
977*4e366538SXin Li orig_pixels_0[0][3] = 128u;
978*4e366538SXin Li orig_pixels_0[1][0] = 0u;
979*4e366538SXin Li orig_pixels_0[1][1] = 0u;
980*4e366538SXin Li orig_pixels_0[1][2] = 0u;
981*4e366538SXin Li orig_pixels_0[1][3] = 255u;
982*4e366538SXin Li orig_pixels_0[2][0] = 0u;
983*4e366538SXin Li orig_pixels_0[2][1] = 0u;
984*4e366538SXin Li orig_pixels_0[2][2] = 0u;
985*4e366538SXin Li orig_pixels_0[2][3] = 0u;
986*4e366538SXin Li orig_pixels_0[3][0] = 0u;
987*4e366538SXin Li orig_pixels_0[3][1] = 0u;
988*4e366538SXin Li orig_pixels_0[3][2] = 0u;
989*4e366538SXin Li orig_pixels_0[3][3] = 0u;
990*4e366538SXin Li
991*4e366538SXin Li orig_pixels_1[0][0] = 0u;
992*4e366538SXin Li orig_pixels_1[0][1] = 0u;
993*4e366538SXin Li orig_pixels_1[0][2] = 0u;
994*4e366538SXin Li orig_pixels_1[0][3] = 0u;
995*4e366538SXin Li orig_pixels_1[1][0] = 0u;
996*4e366538SXin Li orig_pixels_1[1][1] = 0u;
997*4e366538SXin Li orig_pixels_1[1][2] = 0u;
998*4e366538SXin Li orig_pixels_1[1][3] = 0u;
999*4e366538SXin Li orig_pixels_1[2][0] = 0u;
1000*4e366538SXin Li orig_pixels_1[2][1] = 0u;
1001*4e366538SXin Li orig_pixels_1[2][2] = 0u;
1002*4e366538SXin Li orig_pixels_1[2][3] = 0u;
1003*4e366538SXin Li orig_pixels_1[3][0] = 255u;
1004*4e366538SXin Li orig_pixels_1[3][1] = 255u;
1005*4e366538SXin Li orig_pixels_1[3][2] = 255u;
1006*4e366538SXin Li orig_pixels_1[3][3] = 255u;
1007*4e366538SXin Li
1008*4e366538SXin Li ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1009*4e366538SXin Li &interpolate_pixels[0][0], 0, 4, 1, 128);
1010*4e366538SXin Li EXPECT_EQ(8u, interpolate_pixels[0][0]);
1011*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[0][1]);
1012*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[0][2]);
1013*4e366538SXin Li EXPECT_EQ(64u, interpolate_pixels[0][3]);
1014*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[1][0]);
1015*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[1][1]);
1016*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[1][2]);
1017*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[1][3]);
1018*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[2][0]);
1019*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[2][1]);
1020*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[2][2]);
1021*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[2][3]);
1022*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[3][0]);
1023*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[3][1]);
1024*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[3][2]);
1025*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[3][3]);
1026*4e366538SXin Li
1027*4e366538SXin Li ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1028*4e366538SXin Li &interpolate_pixels[0][0], 0, 4, 1, 0);
1029*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[0][0]);
1030*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[0][1]);
1031*4e366538SXin Li EXPECT_EQ(64u, interpolate_pixels[0][2]);
1032*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[0][3]);
1033*4e366538SXin Li
1034*4e366538SXin Li ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1035*4e366538SXin Li &interpolate_pixels[0][0], 0, 4, 1, 192);
1036*4e366538SXin Li
1037*4e366538SXin Li EXPECT_EQ(4u, interpolate_pixels[0][0]);
1038*4e366538SXin Li EXPECT_EQ(8u, interpolate_pixels[0][1]);
1039*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[0][2]);
1040*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[0][3]);
1041*4e366538SXin Li
1042*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1043*4e366538SXin Li ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1044*4e366538SXin Li &interpolate_pixels[0][0], 0, 1280, 1, 128);
1045*4e366538SXin Li }
1046*4e366538SXin Li }
1047*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)1048*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
1049*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
1050*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
1051*4e366538SXin Li SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
1052*4e366538SXin Li memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1053*4e366538SXin Li memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1054*4e366538SXin Li
1055*4e366538SXin Li orig_pixels_0[0] = 16u;
1056*4e366538SXin Li orig_pixels_0[1] = 32u;
1057*4e366538SXin Li orig_pixels_0[2] = 64u;
1058*4e366538SXin Li orig_pixels_0[3] = 128u;
1059*4e366538SXin Li orig_pixels_0[4] = 0u;
1060*4e366538SXin Li orig_pixels_0[5] = 0u;
1061*4e366538SXin Li orig_pixels_0[6] = 0u;
1062*4e366538SXin Li orig_pixels_0[7] = 255u;
1063*4e366538SXin Li orig_pixels_0[8] = 0u;
1064*4e366538SXin Li orig_pixels_0[9] = 0u;
1065*4e366538SXin Li orig_pixels_0[10] = 0u;
1066*4e366538SXin Li orig_pixels_0[11] = 0u;
1067*4e366538SXin Li orig_pixels_0[12] = 0u;
1068*4e366538SXin Li orig_pixels_0[13] = 0u;
1069*4e366538SXin Li orig_pixels_0[14] = 0u;
1070*4e366538SXin Li orig_pixels_0[15] = 0u;
1071*4e366538SXin Li
1072*4e366538SXin Li orig_pixels_1[0] = 0u;
1073*4e366538SXin Li orig_pixels_1[1] = 0u;
1074*4e366538SXin Li orig_pixels_1[2] = 0u;
1075*4e366538SXin Li orig_pixels_1[3] = 0u;
1076*4e366538SXin Li orig_pixels_1[4] = 0u;
1077*4e366538SXin Li orig_pixels_1[5] = 0u;
1078*4e366538SXin Li orig_pixels_1[6] = 0u;
1079*4e366538SXin Li orig_pixels_1[7] = 0u;
1080*4e366538SXin Li orig_pixels_1[8] = 0u;
1081*4e366538SXin Li orig_pixels_1[9] = 0u;
1082*4e366538SXin Li orig_pixels_1[10] = 0u;
1083*4e366538SXin Li orig_pixels_1[11] = 0u;
1084*4e366538SXin Li orig_pixels_1[12] = 255u;
1085*4e366538SXin Li orig_pixels_1[13] = 255u;
1086*4e366538SXin Li orig_pixels_1[14] = 255u;
1087*4e366538SXin Li orig_pixels_1[15] = 255u;
1088*4e366538SXin Li
1089*4e366538SXin Li InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1090*4e366538SXin Li &interpolate_pixels[0], 0, 16, 1, 128);
1091*4e366538SXin Li EXPECT_EQ(8u, interpolate_pixels[0]);
1092*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[1]);
1093*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[2]);
1094*4e366538SXin Li EXPECT_EQ(64u, interpolate_pixels[3]);
1095*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[4]);
1096*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[5]);
1097*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[6]);
1098*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[7]);
1099*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[8]);
1100*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[9]);
1101*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[10]);
1102*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[11]);
1103*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[12]);
1104*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[13]);
1105*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[14]);
1106*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[15]);
1107*4e366538SXin Li
1108*4e366538SXin Li InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1109*4e366538SXin Li &interpolate_pixels[0], 0, 16, 1, 0);
1110*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[0]);
1111*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[1]);
1112*4e366538SXin Li EXPECT_EQ(64u, interpolate_pixels[2]);
1113*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[3]);
1114*4e366538SXin Li
1115*4e366538SXin Li InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1116*4e366538SXin Li &interpolate_pixels[0], 0, 16, 1, 192);
1117*4e366538SXin Li
1118*4e366538SXin Li EXPECT_EQ(4u, interpolate_pixels[0]);
1119*4e366538SXin Li EXPECT_EQ(8u, interpolate_pixels[1]);
1120*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[2]);
1121*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[3]);
1122*4e366538SXin Li
1123*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1124*4e366538SXin Li InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1125*4e366538SXin Li &interpolate_pixels[0], 0, 1280, 1, 123);
1126*4e366538SXin Li }
1127*4e366538SXin Li }
1128*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestInterpolatePlane_16)1129*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
1130*4e366538SXin Li SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
1131*4e366538SXin Li SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
1132*4e366538SXin Li SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
1133*4e366538SXin Li memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1134*4e366538SXin Li memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1135*4e366538SXin Li
1136*4e366538SXin Li orig_pixels_0[0] = 16u;
1137*4e366538SXin Li orig_pixels_0[1] = 32u;
1138*4e366538SXin Li orig_pixels_0[2] = 64u;
1139*4e366538SXin Li orig_pixels_0[3] = 128u;
1140*4e366538SXin Li orig_pixels_0[4] = 0u;
1141*4e366538SXin Li orig_pixels_0[5] = 0u;
1142*4e366538SXin Li orig_pixels_0[6] = 0u;
1143*4e366538SXin Li orig_pixels_0[7] = 255u;
1144*4e366538SXin Li orig_pixels_0[8] = 0u;
1145*4e366538SXin Li orig_pixels_0[9] = 0u;
1146*4e366538SXin Li orig_pixels_0[10] = 0u;
1147*4e366538SXin Li orig_pixels_0[11] = 0u;
1148*4e366538SXin Li orig_pixels_0[12] = 0u;
1149*4e366538SXin Li orig_pixels_0[13] = 0u;
1150*4e366538SXin Li orig_pixels_0[14] = 0u;
1151*4e366538SXin Li orig_pixels_0[15] = 0u;
1152*4e366538SXin Li
1153*4e366538SXin Li orig_pixels_1[0] = 0u;
1154*4e366538SXin Li orig_pixels_1[1] = 0u;
1155*4e366538SXin Li orig_pixels_1[2] = 0u;
1156*4e366538SXin Li orig_pixels_1[3] = 0u;
1157*4e366538SXin Li orig_pixels_1[4] = 0u;
1158*4e366538SXin Li orig_pixels_1[5] = 0u;
1159*4e366538SXin Li orig_pixels_1[6] = 0u;
1160*4e366538SXin Li orig_pixels_1[7] = 0u;
1161*4e366538SXin Li orig_pixels_1[8] = 0u;
1162*4e366538SXin Li orig_pixels_1[9] = 0u;
1163*4e366538SXin Li orig_pixels_1[10] = 0u;
1164*4e366538SXin Li orig_pixels_1[11] = 0u;
1165*4e366538SXin Li orig_pixels_1[12] = 255u;
1166*4e366538SXin Li orig_pixels_1[13] = 255u;
1167*4e366538SXin Li orig_pixels_1[14] = 255u;
1168*4e366538SXin Li orig_pixels_1[15] = 255u;
1169*4e366538SXin Li
1170*4e366538SXin Li InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1171*4e366538SXin Li &interpolate_pixels[0], 0, 16, 1, 128);
1172*4e366538SXin Li EXPECT_EQ(8u, interpolate_pixels[0]);
1173*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[1]);
1174*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[2]);
1175*4e366538SXin Li EXPECT_EQ(64u, interpolate_pixels[3]);
1176*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[4]);
1177*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[5]);
1178*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[6]);
1179*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[7]);
1180*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[8]);
1181*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[9]);
1182*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[10]);
1183*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels[11]);
1184*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[12]);
1185*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[13]);
1186*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[14]);
1187*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[15]);
1188*4e366538SXin Li
1189*4e366538SXin Li InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1190*4e366538SXin Li &interpolate_pixels[0], 0, 16, 1, 0);
1191*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[0]);
1192*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[1]);
1193*4e366538SXin Li EXPECT_EQ(64u, interpolate_pixels[2]);
1194*4e366538SXin Li EXPECT_EQ(128u, interpolate_pixels[3]);
1195*4e366538SXin Li
1196*4e366538SXin Li InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1197*4e366538SXin Li &interpolate_pixels[0], 0, 16, 1, 192);
1198*4e366538SXin Li
1199*4e366538SXin Li EXPECT_EQ(4u, interpolate_pixels[0]);
1200*4e366538SXin Li EXPECT_EQ(8u, interpolate_pixels[1]);
1201*4e366538SXin Li EXPECT_EQ(16u, interpolate_pixels[2]);
1202*4e366538SXin Li EXPECT_EQ(32u, interpolate_pixels[3]);
1203*4e366538SXin Li
1204*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1205*4e366538SXin Li InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1206*4e366538SXin Li &interpolate_pixels[0], 0, 1280, 1, 123);
1207*4e366538SXin Li }
1208*4e366538SXin Li }
1209*4e366538SXin Li
1210*4e366538SXin Li #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1211*4e366538SXin Li N, NEG, OFF) \
1212*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
1213*4e366538SXin Li const int kWidth = W1280; \
1214*4e366538SXin Li const int kHeight = benchmark_height_; \
1215*4e366538SXin Li const int kStrideA = \
1216*4e366538SXin Li (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
1217*4e366538SXin Li const int kStrideB = \
1218*4e366538SXin Li (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
1219*4e366538SXin Li align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
1220*4e366538SXin Li align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
1221*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
1222*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
1223*4e366538SXin Li for (int i = 0; i < kStrideA * kHeight; ++i) { \
1224*4e366538SXin Li src_argb_a[i + OFF] = (fastrand() & 0xff); \
1225*4e366538SXin Li src_argb_b[i + OFF] = (fastrand() & 0xff); \
1226*4e366538SXin Li } \
1227*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_); \
1228*4e366538SXin Li ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1229*4e366538SXin Li dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \
1230*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_); \
1231*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) { \
1232*4e366538SXin Li ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1233*4e366538SXin Li dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \
1234*4e366538SXin Li } \
1235*4e366538SXin Li for (int i = 0; i < kStrideB * kHeight; ++i) { \
1236*4e366538SXin Li EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
1237*4e366538SXin Li } \
1238*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a); \
1239*4e366538SXin Li free_aligned_buffer_page_end(src_argb_b); \
1240*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c); \
1241*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt); \
1242*4e366538SXin Li }
1243*4e366538SXin Li
1244*4e366538SXin Li #define TESTINTERPOLATE(TERP) \
1245*4e366538SXin Li TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0) \
1246*4e366538SXin Li TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1247*4e366538SXin Li TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
1248*4e366538SXin Li TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1249*4e366538SXin Li
1250*4e366538SXin Li TESTINTERPOLATE(0)
1251*4e366538SXin Li TESTINTERPOLATE(64)
1252*4e366538SXin Li TESTINTERPOLATE(128)
1253*4e366538SXin Li TESTINTERPOLATE(192)
1254*4e366538SXin Li TESTINTERPOLATE(255)
1255*4e366538SXin Li
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int attenuate)1256*4e366538SXin Li static int TestBlend(int width,
1257*4e366538SXin Li int height,
1258*4e366538SXin Li int benchmark_iterations,
1259*4e366538SXin Li int disable_cpu_flags,
1260*4e366538SXin Li int benchmark_cpu_info,
1261*4e366538SXin Li int invert,
1262*4e366538SXin Li int off,
1263*4e366538SXin Li int attenuate) {
1264*4e366538SXin Li if (width < 1) {
1265*4e366538SXin Li width = 1;
1266*4e366538SXin Li }
1267*4e366538SXin Li const int kBpp = 4;
1268*4e366538SXin Li const int kStride = width * kBpp;
1269*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
1270*4e366538SXin Li align_buffer_page_end(src_argb_b, kStride * height + off);
1271*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
1272*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
1273*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1274*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
1275*4e366538SXin Li src_argb_b[i + off] = (fastrand() & 0xff);
1276*4e366538SXin Li }
1277*4e366538SXin Li MemRandomize(src_argb_a, kStride * height + off);
1278*4e366538SXin Li MemRandomize(src_argb_b, kStride * height + off);
1279*4e366538SXin Li if (attenuate) {
1280*4e366538SXin Li ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1281*4e366538SXin Li height);
1282*4e366538SXin Li }
1283*4e366538SXin Li memset(dst_argb_c, 255, kStride * height);
1284*4e366538SXin Li memset(dst_argb_opt, 255, kStride * height);
1285*4e366538SXin Li
1286*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
1287*4e366538SXin Li ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1288*4e366538SXin Li kStride, width, invert * height);
1289*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
1290*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
1291*4e366538SXin Li ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1292*4e366538SXin Li dst_argb_opt, kStride, width, invert * height);
1293*4e366538SXin Li }
1294*4e366538SXin Li int max_diff = 0;
1295*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1296*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1297*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
1298*4e366538SXin Li if (abs_diff > max_diff) {
1299*4e366538SXin Li max_diff = abs_diff;
1300*4e366538SXin Li }
1301*4e366538SXin Li }
1302*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
1303*4e366538SXin Li free_aligned_buffer_page_end(src_argb_b);
1304*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
1305*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
1306*4e366538SXin Li return max_diff;
1307*4e366538SXin Li }
1308*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1309*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1310*4e366538SXin Li int max_diff =
1311*4e366538SXin Li TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1312*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1313*4e366538SXin Li EXPECT_LE(max_diff, 1);
1314*4e366538SXin Li }
1315*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1316*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1317*4e366538SXin Li int max_diff =
1318*4e366538SXin Li TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1319*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
1320*4e366538SXin Li EXPECT_LE(max_diff, 1);
1321*4e366538SXin Li }
1322*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1323*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1324*4e366538SXin Li int max_diff =
1325*4e366538SXin Li TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1326*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
1327*4e366538SXin Li EXPECT_LE(max_diff, 1);
1328*4e366538SXin Li }
1329*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBBlend_Unattenuated)1330*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
1331*4e366538SXin Li int max_diff =
1332*4e366538SXin Li TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1333*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
1334*4e366538SXin Li EXPECT_LE(max_diff, 1);
1335*4e366538SXin Li }
1336*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1337*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1338*4e366538SXin Li int max_diff =
1339*4e366538SXin Li TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1340*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1341*4e366538SXin Li EXPECT_LE(max_diff, 1);
1342*4e366538SXin Li }
1343*4e366538SXin Li
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1344*4e366538SXin Li static void TestBlendPlane(int width,
1345*4e366538SXin Li int height,
1346*4e366538SXin Li int benchmark_iterations,
1347*4e366538SXin Li int disable_cpu_flags,
1348*4e366538SXin Li int benchmark_cpu_info,
1349*4e366538SXin Li int invert,
1350*4e366538SXin Li int off) {
1351*4e366538SXin Li if (width < 1) {
1352*4e366538SXin Li width = 1;
1353*4e366538SXin Li }
1354*4e366538SXin Li const int kBpp = 1;
1355*4e366538SXin Li const int kStride = width * kBpp;
1356*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
1357*4e366538SXin Li align_buffer_page_end(src_argb_b, kStride * height + off);
1358*4e366538SXin Li align_buffer_page_end(src_argb_alpha, kStride * height + off);
1359*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height + off);
1360*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height + off);
1361*4e366538SXin Li memset(dst_argb_c, 255, kStride * height + off);
1362*4e366538SXin Li memset(dst_argb_opt, 255, kStride * height + off);
1363*4e366538SXin Li
1364*4e366538SXin Li // Test source is maintained exactly if alpha is 255.
1365*4e366538SXin Li for (int i = 0; i < width; ++i) {
1366*4e366538SXin Li src_argb_a[i + off] = i & 255;
1367*4e366538SXin Li src_argb_b[i + off] = 255 - (i & 255);
1368*4e366538SXin Li }
1369*4e366538SXin Li memset(src_argb_alpha + off, 255, width);
1370*4e366538SXin Li BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1371*4e366538SXin Li src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1372*4e366538SXin Li for (int i = 0; i < width; ++i) {
1373*4e366538SXin Li EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1374*4e366538SXin Li }
1375*4e366538SXin Li // Test destination is maintained exactly if alpha is 0.
1376*4e366538SXin Li memset(src_argb_alpha + off, 0, width);
1377*4e366538SXin Li BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1378*4e366538SXin Li src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1379*4e366538SXin Li for (int i = 0; i < width; ++i) {
1380*4e366538SXin Li EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1381*4e366538SXin Li }
1382*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1383*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
1384*4e366538SXin Li src_argb_b[i + off] = (fastrand() & 0xff);
1385*4e366538SXin Li src_argb_alpha[i + off] = (fastrand() & 0xff);
1386*4e366538SXin Li }
1387*4e366538SXin Li
1388*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
1389*4e366538SXin Li BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1390*4e366538SXin Li src_argb_alpha + off, width, dst_argb_c + off, width, width,
1391*4e366538SXin Li invert * height);
1392*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
1393*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
1394*4e366538SXin Li BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1395*4e366538SXin Li src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1396*4e366538SXin Li invert * height);
1397*4e366538SXin Li }
1398*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1399*4e366538SXin Li EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1400*4e366538SXin Li }
1401*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
1402*4e366538SXin Li free_aligned_buffer_page_end(src_argb_b);
1403*4e366538SXin Li free_aligned_buffer_page_end(src_argb_alpha);
1404*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
1405*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
1406*4e366538SXin Li }
1407*4e366538SXin Li
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1408*4e366538SXin Li TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1409*4e366538SXin Li TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1410*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1411*4e366538SXin Li }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1412*4e366538SXin Li TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1413*4e366538SXin Li TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1414*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1415*4e366538SXin Li }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1416*4e366538SXin Li TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1417*4e366538SXin Li TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1418*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1419*4e366538SXin Li }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1420*4e366538SXin Li TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1421*4e366538SXin Li TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1422*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1423*4e366538SXin Li }
1424*4e366538SXin Li
1425*4e366538SXin Li #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1426*4e366538SXin Li
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1427*4e366538SXin Li static void TestI420Blend(int width,
1428*4e366538SXin Li int height,
1429*4e366538SXin Li int benchmark_iterations,
1430*4e366538SXin Li int disable_cpu_flags,
1431*4e366538SXin Li int benchmark_cpu_info,
1432*4e366538SXin Li int invert,
1433*4e366538SXin Li int off) {
1434*4e366538SXin Li width = ((width) > 0) ? (width) : 1;
1435*4e366538SXin Li const int kStrideUV = SUBSAMPLE(width, 2);
1436*4e366538SXin Li const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1437*4e366538SXin Li align_buffer_page_end(src_y0, width * height + off);
1438*4e366538SXin Li align_buffer_page_end(src_u0, kSizeUV + off);
1439*4e366538SXin Li align_buffer_page_end(src_v0, kSizeUV + off);
1440*4e366538SXin Li align_buffer_page_end(src_y1, width * height + off);
1441*4e366538SXin Li align_buffer_page_end(src_u1, kSizeUV + off);
1442*4e366538SXin Li align_buffer_page_end(src_v1, kSizeUV + off);
1443*4e366538SXin Li align_buffer_page_end(src_a, width * height + off);
1444*4e366538SXin Li align_buffer_page_end(dst_y_c, width * height + off);
1445*4e366538SXin Li align_buffer_page_end(dst_u_c, kSizeUV + off);
1446*4e366538SXin Li align_buffer_page_end(dst_v_c, kSizeUV + off);
1447*4e366538SXin Li align_buffer_page_end(dst_y_opt, width * height + off);
1448*4e366538SXin Li align_buffer_page_end(dst_u_opt, kSizeUV + off);
1449*4e366538SXin Li align_buffer_page_end(dst_v_opt, kSizeUV + off);
1450*4e366538SXin Li
1451*4e366538SXin Li MemRandomize(src_y0, width * height + off);
1452*4e366538SXin Li MemRandomize(src_u0, kSizeUV + off);
1453*4e366538SXin Li MemRandomize(src_v0, kSizeUV + off);
1454*4e366538SXin Li MemRandomize(src_y1, width * height + off);
1455*4e366538SXin Li MemRandomize(src_u1, kSizeUV + off);
1456*4e366538SXin Li MemRandomize(src_v1, kSizeUV + off);
1457*4e366538SXin Li MemRandomize(src_a, width * height + off);
1458*4e366538SXin Li memset(dst_y_c, 255, width * height + off);
1459*4e366538SXin Li memset(dst_u_c, 255, kSizeUV + off);
1460*4e366538SXin Li memset(dst_v_c, 255, kSizeUV + off);
1461*4e366538SXin Li memset(dst_y_opt, 255, width * height + off);
1462*4e366538SXin Li memset(dst_u_opt, 255, kSizeUV + off);
1463*4e366538SXin Li memset(dst_v_opt, 255, kSizeUV + off);
1464*4e366538SXin Li
1465*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
1466*4e366538SXin Li I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1467*4e366538SXin Li kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1468*4e366538SXin Li src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1469*4e366538SXin Li dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1470*4e366538SXin Li invert * height);
1471*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
1472*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
1473*4e366538SXin Li I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1474*4e366538SXin Li kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1475*4e366538SXin Li src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1476*4e366538SXin Li width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1477*4e366538SXin Li width, invert * height);
1478*4e366538SXin Li }
1479*4e366538SXin Li for (int i = 0; i < width * height; ++i) {
1480*4e366538SXin Li EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1481*4e366538SXin Li }
1482*4e366538SXin Li for (int i = 0; i < kSizeUV; ++i) {
1483*4e366538SXin Li EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1484*4e366538SXin Li EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1485*4e366538SXin Li }
1486*4e366538SXin Li free_aligned_buffer_page_end(src_y0);
1487*4e366538SXin Li free_aligned_buffer_page_end(src_u0);
1488*4e366538SXin Li free_aligned_buffer_page_end(src_v0);
1489*4e366538SXin Li free_aligned_buffer_page_end(src_y1);
1490*4e366538SXin Li free_aligned_buffer_page_end(src_u1);
1491*4e366538SXin Li free_aligned_buffer_page_end(src_v1);
1492*4e366538SXin Li free_aligned_buffer_page_end(src_a);
1493*4e366538SXin Li free_aligned_buffer_page_end(dst_y_c);
1494*4e366538SXin Li free_aligned_buffer_page_end(dst_u_c);
1495*4e366538SXin Li free_aligned_buffer_page_end(dst_v_c);
1496*4e366538SXin Li free_aligned_buffer_page_end(dst_y_opt);
1497*4e366538SXin Li free_aligned_buffer_page_end(dst_u_opt);
1498*4e366538SXin Li free_aligned_buffer_page_end(dst_v_opt);
1499*4e366538SXin Li }
1500*4e366538SXin Li
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1501*4e366538SXin Li TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1502*4e366538SXin Li TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1503*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1504*4e366538SXin Li }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1505*4e366538SXin Li TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1506*4e366538SXin Li TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1507*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1508*4e366538SXin Li }
1509*4e366538SXin Li
1510*4e366538SXin Li // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1511*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1512*4e366538SXin Li TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1513*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1514*4e366538SXin Li }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1515*4e366538SXin Li TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1516*4e366538SXin Li TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1517*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1518*4e366538SXin Li }
1519*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestAffine)1520*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestAffine) {
1521*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
1522*4e366538SXin Li SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
1523*4e366538SXin Li
1524*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
1525*4e366538SXin Li for (int j = 0; j < 4; ++j) {
1526*4e366538SXin Li orig_pixels_0[i][j] = i;
1527*4e366538SXin Li }
1528*4e366538SXin Li }
1529*4e366538SXin Li
1530*4e366538SXin Li float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1531*4e366538SXin Li
1532*4e366538SXin Li ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1533*4e366538SXin Li 1280);
1534*4e366538SXin Li EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1535*4e366538SXin Li EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1536*4e366538SXin Li EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1537*4e366538SXin Li
1538*4e366538SXin Li #if defined(HAS_ARGBAFFINEROW_SSE2)
1539*4e366538SXin Li SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
1540*4e366538SXin Li ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1541*4e366538SXin Li uv_step, 1280);
1542*4e366538SXin Li EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1543*4e366538SXin Li
1544*4e366538SXin Li int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1545*4e366538SXin Li if (has_sse2) {
1546*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1547*4e366538SXin Li ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1548*4e366538SXin Li uv_step, 1280);
1549*4e366538SXin Li }
1550*4e366538SXin Li }
1551*4e366538SXin Li #endif
1552*4e366538SXin Li }
1553*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestCopyPlane)1554*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1555*4e366538SXin Li int err = 0;
1556*4e366538SXin Li int yw = benchmark_width_;
1557*4e366538SXin Li int yh = benchmark_height_;
1558*4e366538SXin Li int b = 12;
1559*4e366538SXin Li int i, j;
1560*4e366538SXin Li
1561*4e366538SXin Li int y_plane_size = (yw + b * 2) * (yh + b * 2);
1562*4e366538SXin Li align_buffer_page_end(orig_y, y_plane_size);
1563*4e366538SXin Li align_buffer_page_end(dst_c, y_plane_size);
1564*4e366538SXin Li align_buffer_page_end(dst_opt, y_plane_size);
1565*4e366538SXin Li
1566*4e366538SXin Li memset(orig_y, 0, y_plane_size);
1567*4e366538SXin Li memset(dst_c, 0, y_plane_size);
1568*4e366538SXin Li memset(dst_opt, 0, y_plane_size);
1569*4e366538SXin Li
1570*4e366538SXin Li // Fill image buffers with random data.
1571*4e366538SXin Li for (i = b; i < (yh + b); ++i) {
1572*4e366538SXin Li for (j = b; j < (yw + b); ++j) {
1573*4e366538SXin Li orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1574*4e366538SXin Li }
1575*4e366538SXin Li }
1576*4e366538SXin Li
1577*4e366538SXin Li // Fill destination buffers with random data.
1578*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
1579*4e366538SXin Li uint8_t random_number = fastrand() & 0x7f;
1580*4e366538SXin Li dst_c[i] = random_number;
1581*4e366538SXin Li dst_opt[i] = dst_c[i];
1582*4e366538SXin Li }
1583*4e366538SXin Li
1584*4e366538SXin Li int y_off = b * (yw + b * 2) + b;
1585*4e366538SXin Li
1586*4e366538SXin Li int y_st = yw + b * 2;
1587*4e366538SXin Li int stride = 8;
1588*4e366538SXin Li
1589*4e366538SXin Li // Disable all optimizations.
1590*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
1591*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1592*4e366538SXin Li CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1593*4e366538SXin Li }
1594*4e366538SXin Li
1595*4e366538SXin Li // Enable optimizations.
1596*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
1597*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1598*4e366538SXin Li CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1599*4e366538SXin Li }
1600*4e366538SXin Li
1601*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
1602*4e366538SXin Li if (dst_c[i] != dst_opt[i]) {
1603*4e366538SXin Li ++err;
1604*4e366538SXin Li }
1605*4e366538SXin Li }
1606*4e366538SXin Li
1607*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
1608*4e366538SXin Li free_aligned_buffer_page_end(dst_c);
1609*4e366538SXin Li free_aligned_buffer_page_end(dst_opt);
1610*4e366538SXin Li
1611*4e366538SXin Li EXPECT_EQ(0, err);
1612*4e366538SXin Li }
1613*4e366538SXin Li
TEST_F(LibYUVPlanarTest,CopyPlane_Opt)1614*4e366538SXin Li TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
1615*4e366538SXin Li int i;
1616*4e366538SXin Li int y_plane_size = benchmark_width_ * benchmark_height_;
1617*4e366538SXin Li align_buffer_page_end(orig_y, y_plane_size);
1618*4e366538SXin Li align_buffer_page_end(dst_c, y_plane_size);
1619*4e366538SXin Li align_buffer_page_end(dst_opt, y_plane_size);
1620*4e366538SXin Li
1621*4e366538SXin Li MemRandomize(orig_y, y_plane_size);
1622*4e366538SXin Li memset(dst_c, 1, y_plane_size);
1623*4e366538SXin Li memset(dst_opt, 2, y_plane_size);
1624*4e366538SXin Li
1625*4e366538SXin Li // Disable all optimizations.
1626*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
1627*4e366538SXin Li for (i = 0; i < benchmark_iterations_; i++) {
1628*4e366538SXin Li CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
1629*4e366538SXin Li benchmark_width_, benchmark_height_);
1630*4e366538SXin Li }
1631*4e366538SXin Li
1632*4e366538SXin Li // Enable optimizations.
1633*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
1634*4e366538SXin Li for (i = 0; i < benchmark_iterations_; i++) {
1635*4e366538SXin Li CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
1636*4e366538SXin Li benchmark_width_, benchmark_height_);
1637*4e366538SXin Li }
1638*4e366538SXin Li
1639*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
1640*4e366538SXin Li EXPECT_EQ(dst_c[i], dst_opt[i]);
1641*4e366538SXin Li }
1642*4e366538SXin Li
1643*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
1644*4e366538SXin Li free_aligned_buffer_page_end(dst_c);
1645*4e366538SXin Li free_aligned_buffer_page_end(dst_opt);
1646*4e366538SXin Li }
1647*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestCopyPlaneZero)1648*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
1649*4e366538SXin Li // Test to verify copying a rect with a zero height or width does
1650*4e366538SXin Li // not touch destination memory.
1651*4e366538SXin Li uint8_t src = 42;
1652*4e366538SXin Li uint8_t dst = 0;
1653*4e366538SXin Li
1654*4e366538SXin Li // Disable all optimizations.
1655*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
1656*4e366538SXin Li CopyPlane(&src, 0, &dst, 0, 0, 0);
1657*4e366538SXin Li EXPECT_EQ(src, 42);
1658*4e366538SXin Li EXPECT_EQ(dst, 0);
1659*4e366538SXin Li
1660*4e366538SXin Li CopyPlane(&src, 1, &dst, 1, 1, 0);
1661*4e366538SXin Li EXPECT_EQ(src, 42);
1662*4e366538SXin Li EXPECT_EQ(dst, 0);
1663*4e366538SXin Li
1664*4e366538SXin Li CopyPlane(&src, 1, &dst, 1, 0, 1);
1665*4e366538SXin Li EXPECT_EQ(src, 42);
1666*4e366538SXin Li EXPECT_EQ(dst, 0);
1667*4e366538SXin Li
1668*4e366538SXin Li // Enable optimizations.
1669*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
1670*4e366538SXin Li CopyPlane(&src, 0, &dst, 0, 0, 0);
1671*4e366538SXin Li EXPECT_EQ(src, 42);
1672*4e366538SXin Li EXPECT_EQ(dst, 0);
1673*4e366538SXin Li
1674*4e366538SXin Li CopyPlane(&src, 1, &dst, 1, 1, 0);
1675*4e366538SXin Li EXPECT_EQ(src, 42);
1676*4e366538SXin Li EXPECT_EQ(dst, 0);
1677*4e366538SXin Li
1678*4e366538SXin Li CopyPlane(&src, 1, &dst, 1, 0, 1);
1679*4e366538SXin Li EXPECT_EQ(src, 42);
1680*4e366538SXin Li EXPECT_EQ(dst, 0);
1681*4e366538SXin Li }
1682*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestDetilePlane)1683*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestDetilePlane) {
1684*4e366538SXin Li int i, j;
1685*4e366538SXin Li
1686*4e366538SXin Li // orig is tiled. Allocate enough memory for tiles.
1687*4e366538SXin Li int tile_width = (benchmark_width_ + 15) & ~15;
1688*4e366538SXin Li int tile_height = (benchmark_height_ + 15) & ~15;
1689*4e366538SXin Li int tile_plane_size = tile_width * tile_height;
1690*4e366538SXin Li int y_plane_size = benchmark_width_ * benchmark_height_;
1691*4e366538SXin Li align_buffer_page_end(tile_y, tile_plane_size);
1692*4e366538SXin Li align_buffer_page_end(dst_c, y_plane_size);
1693*4e366538SXin Li align_buffer_page_end(dst_opt, y_plane_size);
1694*4e366538SXin Li
1695*4e366538SXin Li MemRandomize(tile_y, tile_plane_size);
1696*4e366538SXin Li memset(dst_c, 0, y_plane_size);
1697*4e366538SXin Li memset(dst_opt, 0, y_plane_size);
1698*4e366538SXin Li
1699*4e366538SXin Li // Disable all optimizations.
1700*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
1701*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1702*4e366538SXin Li DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
1703*4e366538SXin Li benchmark_height_, 16);
1704*4e366538SXin Li }
1705*4e366538SXin Li
1706*4e366538SXin Li // Enable optimizations.
1707*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
1708*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1709*4e366538SXin Li DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
1710*4e366538SXin Li benchmark_height_, 16);
1711*4e366538SXin Li }
1712*4e366538SXin Li
1713*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
1714*4e366538SXin Li EXPECT_EQ(dst_c[i], dst_opt[i]);
1715*4e366538SXin Li }
1716*4e366538SXin Li
1717*4e366538SXin Li free_aligned_buffer_page_end(tile_y);
1718*4e366538SXin Li free_aligned_buffer_page_end(dst_c);
1719*4e366538SXin Li free_aligned_buffer_page_end(dst_opt);
1720*4e366538SXin Li }
1721*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestDetilePlane_16)1722*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
1723*4e366538SXin Li int i, j;
1724*4e366538SXin Li
1725*4e366538SXin Li // orig is tiled. Allocate enough memory for tiles.
1726*4e366538SXin Li int tile_width = (benchmark_width_ + 15) & ~15;
1727*4e366538SXin Li int tile_height = (benchmark_height_ + 15) & ~15;
1728*4e366538SXin Li int tile_plane_size = tile_width * tile_height * 2;
1729*4e366538SXin Li int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
1730*4e366538SXin Li align_buffer_page_end(tile_y, tile_plane_size);
1731*4e366538SXin Li align_buffer_page_end(dst_c, y_plane_size);
1732*4e366538SXin Li align_buffer_page_end(dst_opt, y_plane_size);
1733*4e366538SXin Li
1734*4e366538SXin Li MemRandomize(tile_y, tile_plane_size);
1735*4e366538SXin Li memset(dst_c, 0, y_plane_size);
1736*4e366538SXin Li memset(dst_opt, 0, y_plane_size);
1737*4e366538SXin Li
1738*4e366538SXin Li // Disable all optimizations.
1739*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
1740*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1741*4e366538SXin Li DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
1742*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_, 16);
1743*4e366538SXin Li }
1744*4e366538SXin Li
1745*4e366538SXin Li // Enable optimizations.
1746*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
1747*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1748*4e366538SXin Li DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
1749*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_, 16);
1750*4e366538SXin Li }
1751*4e366538SXin Li
1752*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
1753*4e366538SXin Li EXPECT_EQ(dst_c[i], dst_opt[i]);
1754*4e366538SXin Li }
1755*4e366538SXin Li
1756*4e366538SXin Li free_aligned_buffer_page_end(tile_y);
1757*4e366538SXin Li free_aligned_buffer_page_end(dst_c);
1758*4e366538SXin Li free_aligned_buffer_page_end(dst_opt);
1759*4e366538SXin Li }
1760*4e366538SXin Li
1761*4e366538SXin Li // Compares DetileSplitUV to 2 step Detile + SplitUV
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Correctness)1762*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
1763*4e366538SXin Li int i, j;
1764*4e366538SXin Li
1765*4e366538SXin Li // orig is tiled. Allocate enough memory for tiles.
1766*4e366538SXin Li int tile_width = (benchmark_width_ + 15) & ~15;
1767*4e366538SXin Li int tile_height = (benchmark_height_ + 15) & ~15;
1768*4e366538SXin Li int tile_plane_size = tile_width * tile_height;
1769*4e366538SXin Li int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1770*4e366538SXin Li align_buffer_page_end(tile_uv, tile_plane_size);
1771*4e366538SXin Li align_buffer_page_end(detiled_uv, tile_plane_size);
1772*4e366538SXin Li align_buffer_page_end(dst_u_two_stage, uv_plane_size);
1773*4e366538SXin Li align_buffer_page_end(dst_u_opt, uv_plane_size);
1774*4e366538SXin Li align_buffer_page_end(dst_v_two_stage, uv_plane_size);
1775*4e366538SXin Li align_buffer_page_end(dst_v_opt, uv_plane_size);
1776*4e366538SXin Li
1777*4e366538SXin Li MemRandomize(tile_uv, tile_plane_size);
1778*4e366538SXin Li memset(detiled_uv, 0, tile_plane_size);
1779*4e366538SXin Li memset(dst_u_two_stage, 0, uv_plane_size);
1780*4e366538SXin Li memset(dst_u_opt, 0, uv_plane_size);
1781*4e366538SXin Li memset(dst_v_two_stage, 0, uv_plane_size);
1782*4e366538SXin Li memset(dst_v_opt, 0, uv_plane_size);
1783*4e366538SXin Li
1784*4e366538SXin Li DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
1785*4e366538SXin Li dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
1786*4e366538SXin Li benchmark_height_, 16);
1787*4e366538SXin Li
1788*4e366538SXin Li // Benchmark 2 step conversion for comparison.
1789*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1790*4e366538SXin Li DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
1791*4e366538SXin Li benchmark_width_, benchmark_height_, 16);
1792*4e366538SXin Li SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
1793*4e366538SXin Li (benchmark_width_ + 1) / 2, dst_v_two_stage,
1794*4e366538SXin Li (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
1795*4e366538SXin Li benchmark_height_);
1796*4e366538SXin Li }
1797*4e366538SXin Li
1798*4e366538SXin Li for (i = 0; i < uv_plane_size; ++i) {
1799*4e366538SXin Li EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
1800*4e366538SXin Li EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
1801*4e366538SXin Li }
1802*4e366538SXin Li
1803*4e366538SXin Li free_aligned_buffer_page_end(tile_uv);
1804*4e366538SXin Li free_aligned_buffer_page_end(detiled_uv);
1805*4e366538SXin Li free_aligned_buffer_page_end(dst_u_two_stage);
1806*4e366538SXin Li free_aligned_buffer_page_end(dst_u_opt);
1807*4e366538SXin Li free_aligned_buffer_page_end(dst_v_two_stage);
1808*4e366538SXin Li free_aligned_buffer_page_end(dst_v_opt);
1809*4e366538SXin Li }
1810*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Benchmark)1811*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
1812*4e366538SXin Li int i, j;
1813*4e366538SXin Li
1814*4e366538SXin Li // orig is tiled. Allocate enough memory for tiles.
1815*4e366538SXin Li int tile_width = (benchmark_width_ + 15) & ~15;
1816*4e366538SXin Li int tile_height = (benchmark_height_ + 15) & ~15;
1817*4e366538SXin Li int tile_plane_size = tile_width * tile_height;
1818*4e366538SXin Li int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1819*4e366538SXin Li align_buffer_page_end(tile_uv, tile_plane_size);
1820*4e366538SXin Li align_buffer_page_end(dst_u_c, uv_plane_size);
1821*4e366538SXin Li align_buffer_page_end(dst_u_opt, uv_plane_size);
1822*4e366538SXin Li align_buffer_page_end(dst_v_c, uv_plane_size);
1823*4e366538SXin Li align_buffer_page_end(dst_v_opt, uv_plane_size);
1824*4e366538SXin Li
1825*4e366538SXin Li MemRandomize(tile_uv, tile_plane_size);
1826*4e366538SXin Li memset(dst_u_c, 0, uv_plane_size);
1827*4e366538SXin Li memset(dst_u_opt, 0, uv_plane_size);
1828*4e366538SXin Li memset(dst_v_c, 0, uv_plane_size);
1829*4e366538SXin Li memset(dst_v_opt, 0, uv_plane_size);
1830*4e366538SXin Li
1831*4e366538SXin Li // Disable all optimizations.
1832*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
1833*4e366538SXin Li
1834*4e366538SXin Li DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
1835*4e366538SXin Li dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
1836*4e366538SXin Li benchmark_height_, 16);
1837*4e366538SXin Li
1838*4e366538SXin Li // Enable optimizations.
1839*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
1840*4e366538SXin Li
1841*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
1842*4e366538SXin Li DetileSplitUVPlane(
1843*4e366538SXin Li tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
1844*4e366538SXin Li (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
1845*4e366538SXin Li }
1846*4e366538SXin Li
1847*4e366538SXin Li for (i = 0; i < uv_plane_size; ++i) {
1848*4e366538SXin Li EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
1849*4e366538SXin Li EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
1850*4e366538SXin Li }
1851*4e366538SXin Li
1852*4e366538SXin Li free_aligned_buffer_page_end(tile_uv);
1853*4e366538SXin Li free_aligned_buffer_page_end(dst_u_c);
1854*4e366538SXin Li free_aligned_buffer_page_end(dst_u_opt);
1855*4e366538SXin Li free_aligned_buffer_page_end(dst_v_c);
1856*4e366538SXin Li free_aligned_buffer_page_end(dst_v_opt);
1857*4e366538SXin Li }
1858*4e366538SXin Li
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1859*4e366538SXin Li static int TestMultiply(int width,
1860*4e366538SXin Li int height,
1861*4e366538SXin Li int benchmark_iterations,
1862*4e366538SXin Li int disable_cpu_flags,
1863*4e366538SXin Li int benchmark_cpu_info,
1864*4e366538SXin Li int invert,
1865*4e366538SXin Li int off) {
1866*4e366538SXin Li if (width < 1) {
1867*4e366538SXin Li width = 1;
1868*4e366538SXin Li }
1869*4e366538SXin Li const int kBpp = 4;
1870*4e366538SXin Li const int kStride = width * kBpp;
1871*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
1872*4e366538SXin Li align_buffer_page_end(src_argb_b, kStride * height + off);
1873*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
1874*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
1875*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1876*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
1877*4e366538SXin Li src_argb_b[i + off] = (fastrand() & 0xff);
1878*4e366538SXin Li }
1879*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
1880*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
1881*4e366538SXin Li
1882*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
1883*4e366538SXin Li ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1884*4e366538SXin Li kStride, width, invert * height);
1885*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
1886*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
1887*4e366538SXin Li ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1888*4e366538SXin Li dst_argb_opt, kStride, width, invert * height);
1889*4e366538SXin Li }
1890*4e366538SXin Li int max_diff = 0;
1891*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1892*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1893*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
1894*4e366538SXin Li if (abs_diff > max_diff) {
1895*4e366538SXin Li max_diff = abs_diff;
1896*4e366538SXin Li }
1897*4e366538SXin Li }
1898*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
1899*4e366538SXin Li free_aligned_buffer_page_end(src_argb_b);
1900*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
1901*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
1902*4e366538SXin Li return max_diff;
1903*4e366538SXin Li }
1904*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1905*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1906*4e366538SXin Li int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
1907*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
1908*4e366538SXin Li benchmark_cpu_info_, +1, 0);
1909*4e366538SXin Li EXPECT_LE(max_diff, 1);
1910*4e366538SXin Li }
1911*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1912*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1913*4e366538SXin Li int max_diff =
1914*4e366538SXin Li TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1915*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1916*4e366538SXin Li EXPECT_LE(max_diff, 1);
1917*4e366538SXin Li }
1918*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1919*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1920*4e366538SXin Li int max_diff =
1921*4e366538SXin Li TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1922*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1923*4e366538SXin Li EXPECT_LE(max_diff, 1);
1924*4e366538SXin Li }
1925*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1926*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1927*4e366538SXin Li int max_diff =
1928*4e366538SXin Li TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1929*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1930*4e366538SXin Li EXPECT_LE(max_diff, 1);
1931*4e366538SXin Li }
1932*4e366538SXin Li
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1933*4e366538SXin Li static int TestAdd(int width,
1934*4e366538SXin Li int height,
1935*4e366538SXin Li int benchmark_iterations,
1936*4e366538SXin Li int disable_cpu_flags,
1937*4e366538SXin Li int benchmark_cpu_info,
1938*4e366538SXin Li int invert,
1939*4e366538SXin Li int off) {
1940*4e366538SXin Li if (width < 1) {
1941*4e366538SXin Li width = 1;
1942*4e366538SXin Li }
1943*4e366538SXin Li const int kBpp = 4;
1944*4e366538SXin Li const int kStride = width * kBpp;
1945*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
1946*4e366538SXin Li align_buffer_page_end(src_argb_b, kStride * height + off);
1947*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
1948*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
1949*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1950*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
1951*4e366538SXin Li src_argb_b[i + off] = (fastrand() & 0xff);
1952*4e366538SXin Li }
1953*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
1954*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
1955*4e366538SXin Li
1956*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
1957*4e366538SXin Li ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1958*4e366538SXin Li kStride, width, invert * height);
1959*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
1960*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
1961*4e366538SXin Li ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1962*4e366538SXin Li kStride, width, invert * height);
1963*4e366538SXin Li }
1964*4e366538SXin Li int max_diff = 0;
1965*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
1966*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1967*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
1968*4e366538SXin Li if (abs_diff > max_diff) {
1969*4e366538SXin Li max_diff = abs_diff;
1970*4e366538SXin Li }
1971*4e366538SXin Li }
1972*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
1973*4e366538SXin Li free_aligned_buffer_page_end(src_argb_b);
1974*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
1975*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
1976*4e366538SXin Li return max_diff;
1977*4e366538SXin Li }
1978*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1979*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1980*4e366538SXin Li int max_diff =
1981*4e366538SXin Li TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1982*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1983*4e366538SXin Li EXPECT_LE(max_diff, 1);
1984*4e366538SXin Li }
1985*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1986*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1987*4e366538SXin Li int max_diff =
1988*4e366538SXin Li TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1989*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1990*4e366538SXin Li EXPECT_LE(max_diff, 1);
1991*4e366538SXin Li }
1992*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1993*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1994*4e366538SXin Li int max_diff =
1995*4e366538SXin Li TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1996*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1997*4e366538SXin Li EXPECT_LE(max_diff, 1);
1998*4e366538SXin Li }
1999*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)2000*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
2001*4e366538SXin Li int max_diff =
2002*4e366538SXin Li TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
2003*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2004*4e366538SXin Li EXPECT_LE(max_diff, 1);
2005*4e366538SXin Li }
2006*4e366538SXin Li
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2007*4e366538SXin Li static int TestSubtract(int width,
2008*4e366538SXin Li int height,
2009*4e366538SXin Li int benchmark_iterations,
2010*4e366538SXin Li int disable_cpu_flags,
2011*4e366538SXin Li int benchmark_cpu_info,
2012*4e366538SXin Li int invert,
2013*4e366538SXin Li int off) {
2014*4e366538SXin Li if (width < 1) {
2015*4e366538SXin Li width = 1;
2016*4e366538SXin Li }
2017*4e366538SXin Li const int kBpp = 4;
2018*4e366538SXin Li const int kStride = width * kBpp;
2019*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
2020*4e366538SXin Li align_buffer_page_end(src_argb_b, kStride * height + off);
2021*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
2022*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
2023*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2024*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
2025*4e366538SXin Li src_argb_b[i + off] = (fastrand() & 0xff);
2026*4e366538SXin Li }
2027*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
2028*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
2029*4e366538SXin Li
2030*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2031*4e366538SXin Li ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
2032*4e366538SXin Li kStride, width, invert * height);
2033*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2034*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
2035*4e366538SXin Li ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
2036*4e366538SXin Li dst_argb_opt, kStride, width, invert * height);
2037*4e366538SXin Li }
2038*4e366538SXin Li int max_diff = 0;
2039*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2040*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2041*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
2042*4e366538SXin Li if (abs_diff > max_diff) {
2043*4e366538SXin Li max_diff = abs_diff;
2044*4e366538SXin Li }
2045*4e366538SXin Li }
2046*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
2047*4e366538SXin Li free_aligned_buffer_page_end(src_argb_b);
2048*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
2049*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
2050*4e366538SXin Li return max_diff;
2051*4e366538SXin Li }
2052*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)2053*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
2054*4e366538SXin Li int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_,
2055*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2056*4e366538SXin Li benchmark_cpu_info_, +1, 0);
2057*4e366538SXin Li EXPECT_LE(max_diff, 1);
2058*4e366538SXin Li }
2059*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)2060*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
2061*4e366538SXin Li int max_diff =
2062*4e366538SXin Li TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2063*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2064*4e366538SXin Li EXPECT_LE(max_diff, 1);
2065*4e366538SXin Li }
2066*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)2067*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
2068*4e366538SXin Li int max_diff =
2069*4e366538SXin Li TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2070*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2071*4e366538SXin Li EXPECT_LE(max_diff, 1);
2072*4e366538SXin Li }
2073*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)2074*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
2075*4e366538SXin Li int max_diff =
2076*4e366538SXin Li TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2077*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2078*4e366538SXin Li EXPECT_LE(max_diff, 1);
2079*4e366538SXin Li }
2080*4e366538SXin Li
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2081*4e366538SXin Li static int TestSobel(int width,
2082*4e366538SXin Li int height,
2083*4e366538SXin Li int benchmark_iterations,
2084*4e366538SXin Li int disable_cpu_flags,
2085*4e366538SXin Li int benchmark_cpu_info,
2086*4e366538SXin Li int invert,
2087*4e366538SXin Li int off) {
2088*4e366538SXin Li if (width < 1) {
2089*4e366538SXin Li width = 1;
2090*4e366538SXin Li }
2091*4e366538SXin Li const int kBpp = 4;
2092*4e366538SXin Li const int kStride = width * kBpp;
2093*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
2094*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
2095*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
2096*4e366538SXin Li memset(src_argb_a, 0, kStride * height + off);
2097*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2098*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
2099*4e366538SXin Li }
2100*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
2101*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
2102*4e366538SXin Li
2103*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2104*4e366538SXin Li ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2105*4e366538SXin Li invert * height);
2106*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2107*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
2108*4e366538SXin Li ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2109*4e366538SXin Li invert * height);
2110*4e366538SXin Li }
2111*4e366538SXin Li int max_diff = 0;
2112*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2113*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2114*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
2115*4e366538SXin Li if (abs_diff > max_diff) {
2116*4e366538SXin Li max_diff = abs_diff;
2117*4e366538SXin Li }
2118*4e366538SXin Li }
2119*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
2120*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
2121*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
2122*4e366538SXin Li return max_diff;
2123*4e366538SXin Li }
2124*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)2125*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
2126*4e366538SXin Li int max_diff =
2127*4e366538SXin Li TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2128*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2129*4e366538SXin Li EXPECT_EQ(0, max_diff);
2130*4e366538SXin Li }
2131*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)2132*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
2133*4e366538SXin Li int max_diff =
2134*4e366538SXin Li TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2135*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2136*4e366538SXin Li EXPECT_EQ(0, max_diff);
2137*4e366538SXin Li }
2138*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)2139*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
2140*4e366538SXin Li int max_diff =
2141*4e366538SXin Li TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2142*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2143*4e366538SXin Li EXPECT_EQ(0, max_diff);
2144*4e366538SXin Li }
2145*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)2146*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
2147*4e366538SXin Li int max_diff =
2148*4e366538SXin Li TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2149*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2150*4e366538SXin Li EXPECT_EQ(0, max_diff);
2151*4e366538SXin Li }
2152*4e366538SXin Li
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2153*4e366538SXin Li static int TestSobelToPlane(int width,
2154*4e366538SXin Li int height,
2155*4e366538SXin Li int benchmark_iterations,
2156*4e366538SXin Li int disable_cpu_flags,
2157*4e366538SXin Li int benchmark_cpu_info,
2158*4e366538SXin Li int invert,
2159*4e366538SXin Li int off) {
2160*4e366538SXin Li if (width < 1) {
2161*4e366538SXin Li width = 1;
2162*4e366538SXin Li }
2163*4e366538SXin Li const int kSrcBpp = 4;
2164*4e366538SXin Li const int kDstBpp = 1;
2165*4e366538SXin Li const int kSrcStride = (width * kSrcBpp + 15) & ~15;
2166*4e366538SXin Li const int kDstStride = (width * kDstBpp + 15) & ~15;
2167*4e366538SXin Li align_buffer_page_end(src_argb_a, kSrcStride * height + off);
2168*4e366538SXin Li align_buffer_page_end(dst_argb_c, kDstStride * height);
2169*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kDstStride * height);
2170*4e366538SXin Li memset(src_argb_a, 0, kSrcStride * height + off);
2171*4e366538SXin Li for (int i = 0; i < kSrcStride * height; ++i) {
2172*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
2173*4e366538SXin Li }
2174*4e366538SXin Li memset(dst_argb_c, 0, kDstStride * height);
2175*4e366538SXin Li memset(dst_argb_opt, 0, kDstStride * height);
2176*4e366538SXin Li
2177*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2178*4e366538SXin Li ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
2179*4e366538SXin Li invert * height);
2180*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2181*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
2182*4e366538SXin Li ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
2183*4e366538SXin Li width, invert * height);
2184*4e366538SXin Li }
2185*4e366538SXin Li int max_diff = 0;
2186*4e366538SXin Li for (int i = 0; i < kDstStride * height; ++i) {
2187*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2188*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
2189*4e366538SXin Li if (abs_diff > max_diff) {
2190*4e366538SXin Li max_diff = abs_diff;
2191*4e366538SXin Li }
2192*4e366538SXin Li }
2193*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
2194*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
2195*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
2196*4e366538SXin Li return max_diff;
2197*4e366538SXin Li }
2198*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)2199*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
2200*4e366538SXin Li int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_,
2201*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2202*4e366538SXin Li benchmark_cpu_info_, +1, 0);
2203*4e366538SXin Li EXPECT_EQ(0, max_diff);
2204*4e366538SXin Li }
2205*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)2206*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
2207*4e366538SXin Li int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2208*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2209*4e366538SXin Li benchmark_cpu_info_, +1, 1);
2210*4e366538SXin Li EXPECT_EQ(0, max_diff);
2211*4e366538SXin Li }
2212*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)2213*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
2214*4e366538SXin Li int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2215*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2216*4e366538SXin Li benchmark_cpu_info_, -1, 0);
2217*4e366538SXin Li EXPECT_EQ(0, max_diff);
2218*4e366538SXin Li }
2219*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)2220*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
2221*4e366538SXin Li int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2222*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2223*4e366538SXin Li benchmark_cpu_info_, +1, 0);
2224*4e366538SXin Li EXPECT_EQ(0, max_diff);
2225*4e366538SXin Li }
2226*4e366538SXin Li
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2227*4e366538SXin Li static int TestSobelXY(int width,
2228*4e366538SXin Li int height,
2229*4e366538SXin Li int benchmark_iterations,
2230*4e366538SXin Li int disable_cpu_flags,
2231*4e366538SXin Li int benchmark_cpu_info,
2232*4e366538SXin Li int invert,
2233*4e366538SXin Li int off) {
2234*4e366538SXin Li if (width < 1) {
2235*4e366538SXin Li width = 1;
2236*4e366538SXin Li }
2237*4e366538SXin Li const int kBpp = 4;
2238*4e366538SXin Li const int kStride = width * kBpp;
2239*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
2240*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
2241*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
2242*4e366538SXin Li memset(src_argb_a, 0, kStride * height + off);
2243*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2244*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
2245*4e366538SXin Li }
2246*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
2247*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
2248*4e366538SXin Li
2249*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2250*4e366538SXin Li ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2251*4e366538SXin Li invert * height);
2252*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2253*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
2254*4e366538SXin Li ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2255*4e366538SXin Li invert * height);
2256*4e366538SXin Li }
2257*4e366538SXin Li int max_diff = 0;
2258*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2259*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2260*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
2261*4e366538SXin Li if (abs_diff > max_diff) {
2262*4e366538SXin Li max_diff = abs_diff;
2263*4e366538SXin Li }
2264*4e366538SXin Li }
2265*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
2266*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
2267*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
2268*4e366538SXin Li return max_diff;
2269*4e366538SXin Li }
2270*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)2271*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
2272*4e366538SXin Li int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_,
2273*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2274*4e366538SXin Li benchmark_cpu_info_, +1, 0);
2275*4e366538SXin Li EXPECT_EQ(0, max_diff);
2276*4e366538SXin Li }
2277*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)2278*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
2279*4e366538SXin Li int max_diff =
2280*4e366538SXin Li TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2281*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2282*4e366538SXin Li EXPECT_EQ(0, max_diff);
2283*4e366538SXin Li }
2284*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)2285*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
2286*4e366538SXin Li int max_diff =
2287*4e366538SXin Li TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2288*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2289*4e366538SXin Li EXPECT_EQ(0, max_diff);
2290*4e366538SXin Li }
2291*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)2292*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
2293*4e366538SXin Li int max_diff =
2294*4e366538SXin Li TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2295*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2296*4e366538SXin Li EXPECT_EQ(0, max_diff);
2297*4e366538SXin Li }
2298*4e366538SXin Li
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)2299*4e366538SXin Li static int TestBlur(int width,
2300*4e366538SXin Li int height,
2301*4e366538SXin Li int benchmark_iterations,
2302*4e366538SXin Li int disable_cpu_flags,
2303*4e366538SXin Li int benchmark_cpu_info,
2304*4e366538SXin Li int invert,
2305*4e366538SXin Li int off,
2306*4e366538SXin Li int radius) {
2307*4e366538SXin Li if (width < 1) {
2308*4e366538SXin Li width = 1;
2309*4e366538SXin Li }
2310*4e366538SXin Li const int kBpp = 4;
2311*4e366538SXin Li const int kStride = width * kBpp;
2312*4e366538SXin Li align_buffer_page_end(src_argb_a, kStride * height + off);
2313*4e366538SXin Li align_buffer_page_end(dst_cumsum, width * height * 16);
2314*4e366538SXin Li align_buffer_page_end(dst_argb_c, kStride * height);
2315*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kStride * height);
2316*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2317*4e366538SXin Li src_argb_a[i + off] = (fastrand() & 0xff);
2318*4e366538SXin Li }
2319*4e366538SXin Li memset(dst_cumsum, 0, width * height * 16);
2320*4e366538SXin Li memset(dst_argb_c, 0, kStride * height);
2321*4e366538SXin Li memset(dst_argb_opt, 0, kStride * height);
2322*4e366538SXin Li
2323*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2324*4e366538SXin Li ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
2325*4e366538SXin Li reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2326*4e366538SXin Li invert * height, radius);
2327*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2328*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
2329*4e366538SXin Li ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
2330*4e366538SXin Li reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2331*4e366538SXin Li invert * height, radius);
2332*4e366538SXin Li }
2333*4e366538SXin Li int max_diff = 0;
2334*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2335*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2336*4e366538SXin Li static_cast<int>(dst_argb_opt[i]));
2337*4e366538SXin Li if (abs_diff > max_diff) {
2338*4e366538SXin Li max_diff = abs_diff;
2339*4e366538SXin Li }
2340*4e366538SXin Li }
2341*4e366538SXin Li free_aligned_buffer_page_end(src_argb_a);
2342*4e366538SXin Li free_aligned_buffer_page_end(dst_cumsum);
2343*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
2344*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
2345*4e366538SXin Li return max_diff;
2346*4e366538SXin Li }
2347*4e366538SXin Li
2348*4e366538SXin Li #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
2349*4e366538SXin Li #define DISABLED_ARM(name) name
2350*4e366538SXin Li #else
2351*4e366538SXin Li #define DISABLED_ARM(name) DISABLED_##name
2352*4e366538SXin Li #endif
2353*4e366538SXin Li
2354*4e366538SXin Li static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Any))2355*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) {
2356*4e366538SXin Li int max_diff =
2357*4e366538SXin Li TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2358*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2359*4e366538SXin Li EXPECT_LE(max_diff, 1);
2360*4e366538SXin Li }
2361*4e366538SXin Li
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Unaligned))2362*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) {
2363*4e366538SXin Li int max_diff =
2364*4e366538SXin Li TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2365*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
2366*4e366538SXin Li EXPECT_LE(max_diff, 1);
2367*4e366538SXin Li }
2368*4e366538SXin Li
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Invert))2369*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) {
2370*4e366538SXin Li int max_diff =
2371*4e366538SXin Li TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2372*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
2373*4e366538SXin Li EXPECT_LE(max_diff, 1);
2374*4e366538SXin Li }
2375*4e366538SXin Li
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Opt))2376*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) {
2377*4e366538SXin Li int max_diff =
2378*4e366538SXin Li TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2379*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2380*4e366538SXin Li EXPECT_LE(max_diff, 1);
2381*4e366538SXin Li }
2382*4e366538SXin Li
2383*4e366538SXin Li static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Any))2384*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) {
2385*4e366538SXin Li int max_diff =
2386*4e366538SXin Li TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2387*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2388*4e366538SXin Li EXPECT_LE(max_diff, 1);
2389*4e366538SXin Li }
2390*4e366538SXin Li
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Unaligned))2391*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) {
2392*4e366538SXin Li int max_diff =
2393*4e366538SXin Li TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2394*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
2395*4e366538SXin Li EXPECT_LE(max_diff, 1);
2396*4e366538SXin Li }
2397*4e366538SXin Li
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Invert))2398*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) {
2399*4e366538SXin Li int max_diff =
2400*4e366538SXin Li TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2401*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
2402*4e366538SXin Li EXPECT_LE(max_diff, 1);
2403*4e366538SXin Li }
2404*4e366538SXin Li
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Opt))2405*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) {
2406*4e366538SXin Li int max_diff =
2407*4e366538SXin Li TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2408*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2409*4e366538SXin Li EXPECT_LE(max_diff, 1);
2410*4e366538SXin Li }
2411*4e366538SXin Li
TEST_F(LibYUVPlanarTest,DISABLED_ARM (TestARGBPolynomial))2412*4e366538SXin Li TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) {
2413*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2414*4e366538SXin Li SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2415*4e366538SXin Li SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2416*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
2417*4e366538SXin Li
2418*4e366538SXin Li SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
2419*4e366538SXin Li 0.94230f, -3.03300f, -2.92500f, 0.f, // C0
2420*4e366538SXin Li 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
2421*4e366538SXin Li 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
2422*4e366538SXin Li 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
2423*4e366538SXin Li };
2424*4e366538SXin Li
2425*4e366538SXin Li // Test blue
2426*4e366538SXin Li orig_pixels[0][0] = 255u;
2427*4e366538SXin Li orig_pixels[0][1] = 0u;
2428*4e366538SXin Li orig_pixels[0][2] = 0u;
2429*4e366538SXin Li orig_pixels[0][3] = 128u;
2430*4e366538SXin Li // Test green
2431*4e366538SXin Li orig_pixels[1][0] = 0u;
2432*4e366538SXin Li orig_pixels[1][1] = 255u;
2433*4e366538SXin Li orig_pixels[1][2] = 0u;
2434*4e366538SXin Li orig_pixels[1][3] = 0u;
2435*4e366538SXin Li // Test red
2436*4e366538SXin Li orig_pixels[2][0] = 0u;
2437*4e366538SXin Li orig_pixels[2][1] = 0u;
2438*4e366538SXin Li orig_pixels[2][2] = 255u;
2439*4e366538SXin Li orig_pixels[2][3] = 255u;
2440*4e366538SXin Li // Test white
2441*4e366538SXin Li orig_pixels[3][0] = 255u;
2442*4e366538SXin Li orig_pixels[3][1] = 255u;
2443*4e366538SXin Li orig_pixels[3][2] = 255u;
2444*4e366538SXin Li orig_pixels[3][3] = 255u;
2445*4e366538SXin Li // Test color
2446*4e366538SXin Li orig_pixels[4][0] = 16u;
2447*4e366538SXin Li orig_pixels[4][1] = 64u;
2448*4e366538SXin Li orig_pixels[4][2] = 192u;
2449*4e366538SXin Li orig_pixels[4][3] = 224u;
2450*4e366538SXin Li // Do 16 to test asm version.
2451*4e366538SXin Li ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2452*4e366538SXin Li &kWarmifyPolynomial[0], 16, 1);
2453*4e366538SXin Li EXPECT_EQ(235u, dst_pixels_opt[0][0]);
2454*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2455*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2456*4e366538SXin Li EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2457*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2458*4e366538SXin Li EXPECT_EQ(233u, dst_pixels_opt[1][1]);
2459*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2460*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2461*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2462*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2463*4e366538SXin Li EXPECT_EQ(241u, dst_pixels_opt[2][2]);
2464*4e366538SXin Li EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2465*4e366538SXin Li EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2466*4e366538SXin Li EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2467*4e366538SXin Li EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2468*4e366538SXin Li EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2469*4e366538SXin Li EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2470*4e366538SXin Li EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2471*4e366538SXin Li EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2472*4e366538SXin Li EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2473*4e366538SXin Li
2474*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
2475*4e366538SXin Li orig_pixels[i][0] = i;
2476*4e366538SXin Li orig_pixels[i][1] = i / 2;
2477*4e366538SXin Li orig_pixels[i][2] = i / 3;
2478*4e366538SXin Li orig_pixels[i][3] = i;
2479*4e366538SXin Li }
2480*4e366538SXin Li
2481*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
2482*4e366538SXin Li ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2483*4e366538SXin Li &kWarmifyPolynomial[0], 1280, 1);
2484*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
2485*4e366538SXin Li
2486*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2487*4e366538SXin Li ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2488*4e366538SXin Li &kWarmifyPolynomial[0], 1280, 1);
2489*4e366538SXin Li }
2490*4e366538SXin Li
2491*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
2492*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2493*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2494*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2495*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2496*4e366538SXin Li }
2497*4e366538SXin Li }
2498*4e366538SXin Li
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2499*4e366538SXin Li int TestHalfFloatPlane(int benchmark_width,
2500*4e366538SXin Li int benchmark_height,
2501*4e366538SXin Li int benchmark_iterations,
2502*4e366538SXin Li int disable_cpu_flags,
2503*4e366538SXin Li int benchmark_cpu_info,
2504*4e366538SXin Li float scale,
2505*4e366538SXin Li int mask) {
2506*4e366538SXin Li int i, j;
2507*4e366538SXin Li const int y_plane_size = benchmark_width * benchmark_height * 2;
2508*4e366538SXin Li
2509*4e366538SXin Li align_buffer_page_end(orig_y, y_plane_size * 3);
2510*4e366538SXin Li uint8_t* dst_opt = orig_y + y_plane_size;
2511*4e366538SXin Li uint8_t* dst_c = orig_y + y_plane_size * 2;
2512*4e366538SXin Li
2513*4e366538SXin Li MemRandomize(orig_y, y_plane_size);
2514*4e366538SXin Li memset(dst_c, 0, y_plane_size);
2515*4e366538SXin Li memset(dst_opt, 1, y_plane_size);
2516*4e366538SXin Li
2517*4e366538SXin Li for (i = 0; i < y_plane_size / 2; ++i) {
2518*4e366538SXin Li reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
2519*4e366538SXin Li }
2520*4e366538SXin Li
2521*4e366538SXin Li // Disable all optimizations.
2522*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2523*4e366538SXin Li for (j = 0; j < benchmark_iterations; j++) {
2524*4e366538SXin Li HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2525*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
2526*4e366538SXin Li scale, benchmark_width, benchmark_height);
2527*4e366538SXin Li }
2528*4e366538SXin Li
2529*4e366538SXin Li // Enable optimizations.
2530*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2531*4e366538SXin Li for (j = 0; j < benchmark_iterations; j++) {
2532*4e366538SXin Li HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2533*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
2534*4e366538SXin Li scale, benchmark_width, benchmark_height);
2535*4e366538SXin Li }
2536*4e366538SXin Li
2537*4e366538SXin Li int max_diff = 0;
2538*4e366538SXin Li for (i = 0; i < y_plane_size / 2; ++i) {
2539*4e366538SXin Li int abs_diff =
2540*4e366538SXin Li abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
2541*4e366538SXin Li static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
2542*4e366538SXin Li if (abs_diff > max_diff) {
2543*4e366538SXin Li max_diff = abs_diff;
2544*4e366538SXin Li }
2545*4e366538SXin Li }
2546*4e366538SXin Li
2547*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
2548*4e366538SXin Li return max_diff;
2549*4e366538SXin Li }
2550*4e366538SXin Li
2551*4e366538SXin Li #if defined(__arm__)
EnableFlushDenormalToZero(void)2552*4e366538SXin Li static void EnableFlushDenormalToZero(void) {
2553*4e366538SXin Li uint32_t cw;
2554*4e366538SXin Li __asm__ __volatile__(
2555*4e366538SXin Li "vmrs %0, fpscr \n"
2556*4e366538SXin Li "orr %0, %0, #0x1000000 \n"
2557*4e366538SXin Li "vmsr fpscr, %0 \n"
2558*4e366538SXin Li : "=r"(cw)::"memory");
2559*4e366538SXin Li }
2560*4e366538SXin Li #endif
2561*4e366538SXin Li
2562*4e366538SXin Li // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2563*4e366538SXin Li // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
2564*4e366538SXin Li // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2565*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2566*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2567*4e366538SXin Li // 32 bit arm rounding on denormal case is off by 1 compared to C.
2568*4e366538SXin Li #if defined(__arm__)
2569*4e366538SXin Li EnableFlushDenormalToZero();
2570*4e366538SXin Li #endif
2571*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2572*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2573*4e366538SXin Li benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2574*4e366538SXin Li EXPECT_EQ(0, diff);
2575*4e366538SXin Li }
2576*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2577*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2578*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2579*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2580*4e366538SXin Li benchmark_cpu_info_, 1.0f, 65535);
2581*4e366538SXin Li EXPECT_LE(diff, 1);
2582*4e366538SXin Li }
2583*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2584*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2585*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2586*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2587*4e366538SXin Li benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2588*4e366538SXin Li EXPECT_EQ(0, diff);
2589*4e366538SXin Li }
2590*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2591*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2592*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2593*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2594*4e366538SXin Li benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2595*4e366538SXin Li EXPECT_EQ(0, diff);
2596*4e366538SXin Li }
2597*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2598*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2599*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2600*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2601*4e366538SXin Li benchmark_cpu_info_, 1.0f / 512.0f, 511);
2602*4e366538SXin Li EXPECT_EQ(0, diff);
2603*4e366538SXin Li }
2604*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2605*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2606*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2607*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2608*4e366538SXin Li benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2609*4e366538SXin Li EXPECT_EQ(0, diff);
2610*4e366538SXin Li }
2611*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2612*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2613*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2614*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2615*4e366538SXin Li benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2616*4e366538SXin Li EXPECT_EQ(0, diff);
2617*4e366538SXin Li }
2618*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2619*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2620*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2621*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2622*4e366538SXin Li benchmark_cpu_info_, 1.0f, 2047);
2623*4e366538SXin Li EXPECT_EQ(0, diff);
2624*4e366538SXin Li }
2625*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2626*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2627*4e366538SXin Li int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2628*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2629*4e366538SXin Li benchmark_cpu_info_, 1.0f, 4095);
2630*4e366538SXin Li EXPECT_LE(diff, 1);
2631*4e366538SXin Li }
2632*4e366538SXin Li
TestByteToFloat(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale)2633*4e366538SXin Li float TestByteToFloat(int benchmark_width,
2634*4e366538SXin Li int benchmark_height,
2635*4e366538SXin Li int benchmark_iterations,
2636*4e366538SXin Li int disable_cpu_flags,
2637*4e366538SXin Li int benchmark_cpu_info,
2638*4e366538SXin Li float scale) {
2639*4e366538SXin Li int i, j;
2640*4e366538SXin Li const int y_plane_size = benchmark_width * benchmark_height;
2641*4e366538SXin Li
2642*4e366538SXin Li align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
2643*4e366538SXin Li float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
2644*4e366538SXin Li float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
2645*4e366538SXin Li
2646*4e366538SXin Li MemRandomize(orig_y, y_plane_size);
2647*4e366538SXin Li memset(dst_c, 0, y_plane_size * 4);
2648*4e366538SXin Li memset(dst_opt, 1, y_plane_size * 4);
2649*4e366538SXin Li
2650*4e366538SXin Li // Disable all optimizations.
2651*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2652*4e366538SXin Li ByteToFloat(orig_y, dst_c, scale, y_plane_size);
2653*4e366538SXin Li
2654*4e366538SXin Li // Enable optimizations.
2655*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2656*4e366538SXin Li for (j = 0; j < benchmark_iterations; j++) {
2657*4e366538SXin Li ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
2658*4e366538SXin Li }
2659*4e366538SXin Li
2660*4e366538SXin Li float max_diff = 0;
2661*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
2662*4e366538SXin Li float abs_diff = fabs(dst_c[i] - dst_opt[i]);
2663*4e366538SXin Li if (abs_diff > max_diff) {
2664*4e366538SXin Li max_diff = abs_diff;
2665*4e366538SXin Li }
2666*4e366538SXin Li }
2667*4e366538SXin Li
2668*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
2669*4e366538SXin Li return max_diff;
2670*4e366538SXin Li }
2671*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestByteToFloat)2672*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestByteToFloat) {
2673*4e366538SXin Li float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
2674*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2675*4e366538SXin Li benchmark_cpu_info_, 1.0f);
2676*4e366538SXin Li EXPECT_EQ(0.f, diff);
2677*4e366538SXin Li }
2678*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2679*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2680*4e366538SXin Li SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2681*4e366538SXin Li SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2682*4e366538SXin Li SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2683*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
2684*4e366538SXin Li
2685*4e366538SXin Li align_buffer_page_end(lumacolortable, 32768);
2686*4e366538SXin Li int v = 0;
2687*4e366538SXin Li for (int i = 0; i < 32768; ++i) {
2688*4e366538SXin Li lumacolortable[i] = v;
2689*4e366538SXin Li v += 3;
2690*4e366538SXin Li }
2691*4e366538SXin Li // Test blue
2692*4e366538SXin Li orig_pixels[0][0] = 255u;
2693*4e366538SXin Li orig_pixels[0][1] = 0u;
2694*4e366538SXin Li orig_pixels[0][2] = 0u;
2695*4e366538SXin Li orig_pixels[0][3] = 128u;
2696*4e366538SXin Li // Test green
2697*4e366538SXin Li orig_pixels[1][0] = 0u;
2698*4e366538SXin Li orig_pixels[1][1] = 255u;
2699*4e366538SXin Li orig_pixels[1][2] = 0u;
2700*4e366538SXin Li orig_pixels[1][3] = 0u;
2701*4e366538SXin Li // Test red
2702*4e366538SXin Li orig_pixels[2][0] = 0u;
2703*4e366538SXin Li orig_pixels[2][1] = 0u;
2704*4e366538SXin Li orig_pixels[2][2] = 255u;
2705*4e366538SXin Li orig_pixels[2][3] = 255u;
2706*4e366538SXin Li // Test color
2707*4e366538SXin Li orig_pixels[3][0] = 16u;
2708*4e366538SXin Li orig_pixels[3][1] = 64u;
2709*4e366538SXin Li orig_pixels[3][2] = 192u;
2710*4e366538SXin Li orig_pixels[3][3] = 224u;
2711*4e366538SXin Li // Do 16 to test asm version.
2712*4e366538SXin Li ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2713*4e366538SXin Li &lumacolortable[0], 16, 1);
2714*4e366538SXin Li EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2715*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2716*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2717*4e366538SXin Li EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2718*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2719*4e366538SXin Li EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2720*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2721*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2722*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2723*4e366538SXin Li EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2724*4e366538SXin Li EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2725*4e366538SXin Li EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2726*4e366538SXin Li EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2727*4e366538SXin Li EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2728*4e366538SXin Li EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2729*4e366538SXin Li EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2730*4e366538SXin Li
2731*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
2732*4e366538SXin Li orig_pixels[i][0] = i;
2733*4e366538SXin Li orig_pixels[i][1] = i / 2;
2734*4e366538SXin Li orig_pixels[i][2] = i / 3;
2735*4e366538SXin Li orig_pixels[i][3] = i;
2736*4e366538SXin Li }
2737*4e366538SXin Li
2738*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
2739*4e366538SXin Li ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2740*4e366538SXin Li lumacolortable, 1280, 1);
2741*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
2742*4e366538SXin Li
2743*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2744*4e366538SXin Li ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2745*4e366538SXin Li lumacolortable, 1280, 1);
2746*4e366538SXin Li }
2747*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
2748*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2749*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2750*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2751*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2752*4e366538SXin Li }
2753*4e366538SXin Li
2754*4e366538SXin Li free_aligned_buffer_page_end(lumacolortable);
2755*4e366538SXin Li }
2756*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2757*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2758*4e366538SXin Li const int kSize = benchmark_width_ * benchmark_height_ * 4;
2759*4e366538SXin Li align_buffer_page_end(orig_pixels, kSize);
2760*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kSize);
2761*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kSize);
2762*4e366538SXin Li
2763*4e366538SXin Li MemRandomize(orig_pixels, kSize);
2764*4e366538SXin Li MemRandomize(dst_pixels_opt, kSize);
2765*4e366538SXin Li memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2766*4e366538SXin Li
2767*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
2768*4e366538SXin Li ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2769*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2770*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
2771*4e366538SXin Li
2772*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
2773*4e366538SXin Li ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2774*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2775*4e366538SXin Li }
2776*4e366538SXin Li for (int i = 0; i < kSize; ++i) {
2777*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2778*4e366538SXin Li }
2779*4e366538SXin Li
2780*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
2781*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
2782*4e366538SXin Li free_aligned_buffer_page_end(orig_pixels);
2783*4e366538SXin Li }
2784*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2785*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2786*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
2787*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 4);
2788*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels);
2789*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels);
2790*4e366538SXin Li
2791*4e366538SXin Li MemRandomize(src_pixels, kPixels * 4);
2792*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels);
2793*4e366538SXin Li memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2794*4e366538SXin Li
2795*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
2796*4e366538SXin Li ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2797*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
2798*4e366538SXin Li double c_time = get_time();
2799*4e366538SXin Li ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2800*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
2801*4e366538SXin Li c_time = (get_time() - c_time);
2802*4e366538SXin Li
2803*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
2804*4e366538SXin Li ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2805*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
2806*4e366538SXin Li double opt_time = get_time();
2807*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
2808*4e366538SXin Li ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2809*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
2810*4e366538SXin Li }
2811*4e366538SXin Li opt_time = (get_time() - opt_time) / benchmark_iterations_;
2812*4e366538SXin Li // Report performance of C vs OPT
2813*4e366538SXin Li printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
2814*4e366538SXin Li static_cast<int>(opt_time * 1e6));
2815*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
2816*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2817*4e366538SXin Li }
2818*4e366538SXin Li
2819*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
2820*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
2821*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
2822*4e366538SXin Li }
2823*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2824*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2825*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
2826*4e366538SXin Li align_buffer_page_end(orig_pixels, kPixels);
2827*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2828*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 4);
2829*4e366538SXin Li
2830*4e366538SXin Li MemRandomize(orig_pixels, kPixels);
2831*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 4);
2832*4e366538SXin Li memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2833*4e366538SXin Li
2834*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
2835*4e366538SXin Li ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2836*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2837*4e366538SXin Li double c_time = get_time();
2838*4e366538SXin Li ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2839*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2840*4e366538SXin Li c_time = (get_time() - c_time);
2841*4e366538SXin Li
2842*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
2843*4e366538SXin Li ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2844*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2845*4e366538SXin Li double opt_time = get_time();
2846*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
2847*4e366538SXin Li ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2848*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2849*4e366538SXin Li }
2850*4e366538SXin Li opt_time = (get_time() - opt_time) / benchmark_iterations_;
2851*4e366538SXin Li
2852*4e366538SXin Li // Report performance of C vs OPT
2853*4e366538SXin Li printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
2854*4e366538SXin Li static_cast<int>(opt_time * 1e6));
2855*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) {
2856*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2857*4e366538SXin Li }
2858*4e366538SXin Li
2859*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
2860*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
2861*4e366538SXin Li free_aligned_buffer_page_end(orig_pixels);
2862*4e366538SXin Li }
2863*4e366538SXin Li
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2864*4e366538SXin Li static int TestARGBRect(int width,
2865*4e366538SXin Li int height,
2866*4e366538SXin Li int benchmark_iterations,
2867*4e366538SXin Li int disable_cpu_flags,
2868*4e366538SXin Li int benchmark_cpu_info,
2869*4e366538SXin Li int invert,
2870*4e366538SXin Li int off,
2871*4e366538SXin Li int bpp) {
2872*4e366538SXin Li if (width < 1) {
2873*4e366538SXin Li width = 1;
2874*4e366538SXin Li }
2875*4e366538SXin Li const int kStride = width * bpp;
2876*4e366538SXin Li const int kSize = kStride * height;
2877*4e366538SXin Li const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2878*4e366538SXin Li
2879*4e366538SXin Li align_buffer_page_end(dst_argb_c, kSize + off);
2880*4e366538SXin Li align_buffer_page_end(dst_argb_opt, kSize + off);
2881*4e366538SXin Li
2882*4e366538SXin Li MemRandomize(dst_argb_c + off, kSize);
2883*4e366538SXin Li memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2884*4e366538SXin Li
2885*4e366538SXin Li MaskCpuFlags(disable_cpu_flags);
2886*4e366538SXin Li if (bpp == 4) {
2887*4e366538SXin Li ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2888*4e366538SXin Li } else {
2889*4e366538SXin Li SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2890*4e366538SXin Li }
2891*4e366538SXin Li
2892*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info);
2893*4e366538SXin Li for (int i = 0; i < benchmark_iterations; ++i) {
2894*4e366538SXin Li if (bpp == 4) {
2895*4e366538SXin Li ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2896*4e366538SXin Li } else {
2897*4e366538SXin Li SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2898*4e366538SXin Li }
2899*4e366538SXin Li }
2900*4e366538SXin Li int max_diff = 0;
2901*4e366538SXin Li for (int i = 0; i < kStride * height; ++i) {
2902*4e366538SXin Li int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2903*4e366538SXin Li static_cast<int>(dst_argb_opt[i + off]));
2904*4e366538SXin Li if (abs_diff > max_diff) {
2905*4e366538SXin Li max_diff = abs_diff;
2906*4e366538SXin Li }
2907*4e366538SXin Li }
2908*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_c);
2909*4e366538SXin Li free_aligned_buffer_page_end(dst_argb_opt);
2910*4e366538SXin Li return max_diff;
2911*4e366538SXin Li }
2912*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2913*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2914*4e366538SXin Li int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2915*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2916*4e366538SXin Li benchmark_cpu_info_, +1, 0, 4);
2917*4e366538SXin Li EXPECT_EQ(0, max_diff);
2918*4e366538SXin Li }
2919*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2920*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2921*4e366538SXin Li int max_diff =
2922*4e366538SXin Li TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2923*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2924*4e366538SXin Li EXPECT_EQ(0, max_diff);
2925*4e366538SXin Li }
2926*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2927*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2928*4e366538SXin Li int max_diff =
2929*4e366538SXin Li TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2930*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2931*4e366538SXin Li EXPECT_EQ(0, max_diff);
2932*4e366538SXin Li }
2933*4e366538SXin Li
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2934*4e366538SXin Li TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2935*4e366538SXin Li int max_diff =
2936*4e366538SXin Li TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2937*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2938*4e366538SXin Li EXPECT_EQ(0, max_diff);
2939*4e366538SXin Li }
2940*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SetPlane_Any)2941*4e366538SXin Li TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2942*4e366538SXin Li int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2943*4e366538SXin Li benchmark_iterations_, disable_cpu_flags_,
2944*4e366538SXin Li benchmark_cpu_info_, +1, 0, 1);
2945*4e366538SXin Li EXPECT_EQ(0, max_diff);
2946*4e366538SXin Li }
2947*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2948*4e366538SXin Li TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2949*4e366538SXin Li int max_diff =
2950*4e366538SXin Li TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2951*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2952*4e366538SXin Li EXPECT_EQ(0, max_diff);
2953*4e366538SXin Li }
2954*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2955*4e366538SXin Li TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2956*4e366538SXin Li int max_diff =
2957*4e366538SXin Li TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2958*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2959*4e366538SXin Li EXPECT_EQ(0, max_diff);
2960*4e366538SXin Li }
2961*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2962*4e366538SXin Li TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2963*4e366538SXin Li int max_diff =
2964*4e366538SXin Li TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2965*4e366538SXin Li disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2966*4e366538SXin Li EXPECT_EQ(0, max_diff);
2967*4e366538SXin Li }
2968*4e366538SXin Li
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2969*4e366538SXin Li TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2970*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
2971*4e366538SXin Li align_buffer_page_end(src_pixels_u, kPixels);
2972*4e366538SXin Li align_buffer_page_end(src_pixels_v, kPixels);
2973*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2974*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 2);
2975*4e366538SXin Li
2976*4e366538SXin Li MemRandomize(src_pixels_u, kPixels);
2977*4e366538SXin Li MemRandomize(src_pixels_v, kPixels);
2978*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 2);
2979*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 2);
2980*4e366538SXin Li
2981*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
2982*4e366538SXin Li MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2983*4e366538SXin Li dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2984*4e366538SXin Li benchmark_height_);
2985*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
2986*4e366538SXin Li
2987*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
2988*4e366538SXin Li MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2989*4e366538SXin Li dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2990*4e366538SXin Li benchmark_height_);
2991*4e366538SXin Li }
2992*4e366538SXin Li
2993*4e366538SXin Li for (int i = 0; i < kPixels * 2; ++i) {
2994*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2995*4e366538SXin Li }
2996*4e366538SXin Li
2997*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_u);
2998*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_v);
2999*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3000*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3001*4e366538SXin Li }
3002*4e366538SXin Li
3003*4e366538SXin Li // 16 bit channel split and merge
TEST_F(LibYUVPlanarTest,MergeUVPlane_16_Opt)3004*4e366538SXin Li TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
3005*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3006*4e366538SXin Li align_buffer_page_end(src_pixels_u, kPixels * 2);
3007*4e366538SXin Li align_buffer_page_end(src_pixels_v, kPixels * 2);
3008*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
3009*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
3010*4e366538SXin Li MemRandomize(src_pixels_u, kPixels * 2);
3011*4e366538SXin Li MemRandomize(src_pixels_v, kPixels * 2);
3012*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
3013*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 2 * 2);
3014*4e366538SXin Li
3015*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3016*4e366538SXin Li MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
3017*4e366538SXin Li (const uint16_t*)src_pixels_v, benchmark_width_,
3018*4e366538SXin Li (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
3019*4e366538SXin Li benchmark_width_, benchmark_height_, 12);
3020*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3021*4e366538SXin Li
3022*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3023*4e366538SXin Li MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
3024*4e366538SXin Li (const uint16_t*)src_pixels_v, benchmark_width_,
3025*4e366538SXin Li (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
3026*4e366538SXin Li benchmark_width_, benchmark_height_, 12);
3027*4e366538SXin Li }
3028*4e366538SXin Li
3029*4e366538SXin Li for (int i = 0; i < kPixels * 2 * 2; ++i) {
3030*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3031*4e366538SXin Li }
3032*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_u);
3033*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_v);
3034*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3035*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3036*4e366538SXin Li }
3037*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)3038*4e366538SXin Li TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
3039*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3040*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 2);
3041*4e366538SXin Li align_buffer_page_end(dst_pixels_u_c, kPixels);
3042*4e366538SXin Li align_buffer_page_end(dst_pixels_v_c, kPixels);
3043*4e366538SXin Li align_buffer_page_end(dst_pixels_u_opt, kPixels);
3044*4e366538SXin Li align_buffer_page_end(dst_pixels_v_opt, kPixels);
3045*4e366538SXin Li
3046*4e366538SXin Li MemRandomize(src_pixels, kPixels * 2);
3047*4e366538SXin Li MemRandomize(dst_pixels_u_c, kPixels);
3048*4e366538SXin Li MemRandomize(dst_pixels_v_c, kPixels);
3049*4e366538SXin Li MemRandomize(dst_pixels_u_opt, kPixels);
3050*4e366538SXin Li MemRandomize(dst_pixels_v_opt, kPixels);
3051*4e366538SXin Li
3052*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3053*4e366538SXin Li SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
3054*4e366538SXin Li benchmark_width_, dst_pixels_v_c, benchmark_width_,
3055*4e366538SXin Li benchmark_width_, benchmark_height_);
3056*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3057*4e366538SXin Li
3058*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3059*4e366538SXin Li SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
3060*4e366538SXin Li benchmark_width_, dst_pixels_v_opt, benchmark_width_,
3061*4e366538SXin Li benchmark_width_, benchmark_height_);
3062*4e366538SXin Li }
3063*4e366538SXin Li
3064*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
3065*4e366538SXin Li EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
3066*4e366538SXin Li EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
3067*4e366538SXin Li }
3068*4e366538SXin Li
3069*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3070*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_u_c);
3071*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_v_c);
3072*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_u_opt);
3073*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_v_opt);
3074*4e366538SXin Li }
3075*4e366538SXin Li
3076*4e366538SXin Li // 16 bit channel split
TEST_F(LibYUVPlanarTest,SplitUVPlane_16_Opt)3077*4e366538SXin Li TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
3078*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3079*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 2 * 2);
3080*4e366538SXin Li align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
3081*4e366538SXin Li align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
3082*4e366538SXin Li align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
3083*4e366538SXin Li align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
3084*4e366538SXin Li MemRandomize(src_pixels, kPixels * 2 * 2);
3085*4e366538SXin Li MemRandomize(dst_pixels_u_c, kPixels * 2);
3086*4e366538SXin Li MemRandomize(dst_pixels_v_c, kPixels * 2);
3087*4e366538SXin Li MemRandomize(dst_pixels_u_opt, kPixels * 2);
3088*4e366538SXin Li MemRandomize(dst_pixels_v_opt, kPixels * 2);
3089*4e366538SXin Li
3090*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3091*4e366538SXin Li SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
3092*4e366538SXin Li (uint16_t*)dst_pixels_u_c, benchmark_width_,
3093*4e366538SXin Li (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
3094*4e366538SXin Li benchmark_height_, 10);
3095*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3096*4e366538SXin Li
3097*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3098*4e366538SXin Li SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
3099*4e366538SXin Li (uint16_t*)dst_pixels_u_opt, benchmark_width_,
3100*4e366538SXin Li (uint16_t*)dst_pixels_v_opt, benchmark_width_,
3101*4e366538SXin Li benchmark_width_, benchmark_height_, 10);
3102*4e366538SXin Li }
3103*4e366538SXin Li
3104*4e366538SXin Li for (int i = 0; i < kPixels * 2; ++i) {
3105*4e366538SXin Li EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
3106*4e366538SXin Li EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
3107*4e366538SXin Li }
3108*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3109*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_u_c);
3110*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_v_c);
3111*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_u_opt);
3112*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_v_opt);
3113*4e366538SXin Li }
3114*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SwapUVPlane_Opt)3115*4e366538SXin Li TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
3116*4e366538SXin Li // Round count up to multiple of 16
3117*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3118*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 2);
3119*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 2);
3120*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 2);
3121*4e366538SXin Li
3122*4e366538SXin Li MemRandomize(src_pixels, kPixels * 2);
3123*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 2);
3124*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 2);
3125*4e366538SXin Li
3126*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3127*4e366538SXin Li SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
3128*4e366538SXin Li benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3129*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3130*4e366538SXin Li
3131*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3132*4e366538SXin Li SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
3133*4e366538SXin Li benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3134*4e366538SXin Li }
3135*4e366538SXin Li
3136*4e366538SXin Li for (int i = 0; i < kPixels * 2; ++i) {
3137*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3138*4e366538SXin Li }
3139*4e366538SXin Li
3140*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3141*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3142*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3143*4e366538SXin Li }
3144*4e366538SXin Li
TEST_F(LibYUVPlanarTest,MergeRGBPlane_Opt)3145*4e366538SXin Li TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
3146*4e366538SXin Li // Round count up to multiple of 16
3147*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3148*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 3);
3149*4e366538SXin Li align_buffer_page_end(tmp_pixels_r, kPixels);
3150*4e366538SXin Li align_buffer_page_end(tmp_pixels_g, kPixels);
3151*4e366538SXin Li align_buffer_page_end(tmp_pixels_b, kPixels);
3152*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3153*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 3);
3154*4e366538SXin Li
3155*4e366538SXin Li MemRandomize(src_pixels, kPixels * 3);
3156*4e366538SXin Li MemRandomize(tmp_pixels_r, kPixels);
3157*4e366538SXin Li MemRandomize(tmp_pixels_g, kPixels);
3158*4e366538SXin Li MemRandomize(tmp_pixels_b, kPixels);
3159*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 3);
3160*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 3);
3161*4e366538SXin Li
3162*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3163*4e366538SXin Li SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3164*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3165*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
3166*4e366538SXin Li MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3167*4e366538SXin Li tmp_pixels_b, benchmark_width_, dst_pixels_c,
3168*4e366538SXin Li benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3169*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3170*4e366538SXin Li
3171*4e366538SXin Li SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3172*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3173*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
3174*4e366538SXin Li
3175*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3176*4e366538SXin Li MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3177*4e366538SXin Li benchmark_width_, tmp_pixels_b, benchmark_width_,
3178*4e366538SXin Li dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
3179*4e366538SXin Li benchmark_height_);
3180*4e366538SXin Li }
3181*4e366538SXin Li
3182*4e366538SXin Li for (int i = 0; i < kPixels * 3; ++i) {
3183*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3184*4e366538SXin Li }
3185*4e366538SXin Li
3186*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3187*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_r);
3188*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_g);
3189*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_b);
3190*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3191*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3192*4e366538SXin Li }
3193*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SplitRGBPlane_Opt)3194*4e366538SXin Li TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
3195*4e366538SXin Li // Round count up to multiple of 16
3196*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3197*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 3);
3198*4e366538SXin Li align_buffer_page_end(tmp_pixels_r, kPixels);
3199*4e366538SXin Li align_buffer_page_end(tmp_pixels_g, kPixels);
3200*4e366538SXin Li align_buffer_page_end(tmp_pixels_b, kPixels);
3201*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3202*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 3);
3203*4e366538SXin Li
3204*4e366538SXin Li MemRandomize(src_pixels, kPixels * 3);
3205*4e366538SXin Li MemRandomize(tmp_pixels_r, kPixels);
3206*4e366538SXin Li MemRandomize(tmp_pixels_g, kPixels);
3207*4e366538SXin Li MemRandomize(tmp_pixels_b, kPixels);
3208*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 3);
3209*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 3);
3210*4e366538SXin Li
3211*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3212*4e366538SXin Li SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3213*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3214*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
3215*4e366538SXin Li MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3216*4e366538SXin Li tmp_pixels_b, benchmark_width_, dst_pixels_c,
3217*4e366538SXin Li benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3218*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3219*4e366538SXin Li
3220*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3221*4e366538SXin Li SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3222*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_,
3223*4e366538SXin Li tmp_pixels_b, benchmark_width_, benchmark_width_,
3224*4e366538SXin Li benchmark_height_);
3225*4e366538SXin Li }
3226*4e366538SXin Li MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3227*4e366538SXin Li tmp_pixels_b, benchmark_width_, dst_pixels_opt,
3228*4e366538SXin Li benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3229*4e366538SXin Li
3230*4e366538SXin Li for (int i = 0; i < kPixels * 3; ++i) {
3231*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3232*4e366538SXin Li }
3233*4e366538SXin Li
3234*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3235*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_r);
3236*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_g);
3237*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_b);
3238*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3239*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3240*4e366538SXin Li }
3241*4e366538SXin Li
TEST_F(LibYUVPlanarTest,MergeARGBPlane_Opt)3242*4e366538SXin Li TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
3243*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3244*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 4);
3245*4e366538SXin Li align_buffer_page_end(tmp_pixels_r, kPixels);
3246*4e366538SXin Li align_buffer_page_end(tmp_pixels_g, kPixels);
3247*4e366538SXin Li align_buffer_page_end(tmp_pixels_b, kPixels);
3248*4e366538SXin Li align_buffer_page_end(tmp_pixels_a, kPixels);
3249*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3250*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 4);
3251*4e366538SXin Li
3252*4e366538SXin Li MemRandomize(src_pixels, kPixels * 4);
3253*4e366538SXin Li MemRandomize(tmp_pixels_r, kPixels);
3254*4e366538SXin Li MemRandomize(tmp_pixels_g, kPixels);
3255*4e366538SXin Li MemRandomize(tmp_pixels_b, kPixels);
3256*4e366538SXin Li MemRandomize(tmp_pixels_a, kPixels);
3257*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 4);
3258*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 4);
3259*4e366538SXin Li
3260*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3261*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3262*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3263*4e366538SXin Li benchmark_width_, tmp_pixels_a, benchmark_width_,
3264*4e366538SXin Li benchmark_width_, benchmark_height_);
3265*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3266*4e366538SXin Li tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3267*4e366538SXin Li dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3268*4e366538SXin Li benchmark_height_);
3269*4e366538SXin Li
3270*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3271*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3272*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3273*4e366538SXin Li benchmark_width_, tmp_pixels_a, benchmark_width_,
3274*4e366538SXin Li benchmark_width_, benchmark_height_);
3275*4e366538SXin Li
3276*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3277*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3278*4e366538SXin Li benchmark_width_, tmp_pixels_b, benchmark_width_,
3279*4e366538SXin Li tmp_pixels_a, benchmark_width_, dst_pixels_opt,
3280*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3281*4e366538SXin Li }
3282*4e366538SXin Li
3283*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) {
3284*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3285*4e366538SXin Li }
3286*4e366538SXin Li
3287*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3288*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_r);
3289*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_g);
3290*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_b);
3291*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_a);
3292*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3293*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3294*4e366538SXin Li }
3295*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SplitARGBPlane_Opt)3296*4e366538SXin Li TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
3297*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3298*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 4);
3299*4e366538SXin Li align_buffer_page_end(tmp_pixels_r, kPixels);
3300*4e366538SXin Li align_buffer_page_end(tmp_pixels_g, kPixels);
3301*4e366538SXin Li align_buffer_page_end(tmp_pixels_b, kPixels);
3302*4e366538SXin Li align_buffer_page_end(tmp_pixels_a, kPixels);
3303*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3304*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 4);
3305*4e366538SXin Li
3306*4e366538SXin Li MemRandomize(src_pixels, kPixels * 4);
3307*4e366538SXin Li MemRandomize(tmp_pixels_r, kPixels);
3308*4e366538SXin Li MemRandomize(tmp_pixels_g, kPixels);
3309*4e366538SXin Li MemRandomize(tmp_pixels_b, kPixels);
3310*4e366538SXin Li MemRandomize(tmp_pixels_a, kPixels);
3311*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 4);
3312*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 4);
3313*4e366538SXin Li
3314*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3315*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3316*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3317*4e366538SXin Li benchmark_width_, tmp_pixels_a, benchmark_width_,
3318*4e366538SXin Li benchmark_width_, benchmark_height_);
3319*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3320*4e366538SXin Li tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3321*4e366538SXin Li dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3322*4e366538SXin Li benchmark_height_);
3323*4e366538SXin Li
3324*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3325*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3326*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3327*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_,
3328*4e366538SXin Li tmp_pixels_b, benchmark_width_, tmp_pixels_a,
3329*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
3330*4e366538SXin Li }
3331*4e366538SXin Li
3332*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3333*4e366538SXin Li tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3334*4e366538SXin Li dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3335*4e366538SXin Li benchmark_height_);
3336*4e366538SXin Li
3337*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) {
3338*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3339*4e366538SXin Li }
3340*4e366538SXin Li
3341*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3342*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_r);
3343*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_g);
3344*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_b);
3345*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_a);
3346*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3347*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3348*4e366538SXin Li }
3349*4e366538SXin Li
TEST_F(LibYUVPlanarTest,MergeXRGBPlane_Opt)3350*4e366538SXin Li TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
3351*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3352*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 4);
3353*4e366538SXin Li align_buffer_page_end(tmp_pixels_r, kPixels);
3354*4e366538SXin Li align_buffer_page_end(tmp_pixels_g, kPixels);
3355*4e366538SXin Li align_buffer_page_end(tmp_pixels_b, kPixels);
3356*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3357*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 4);
3358*4e366538SXin Li
3359*4e366538SXin Li MemRandomize(src_pixels, kPixels * 4);
3360*4e366538SXin Li MemRandomize(tmp_pixels_r, kPixels);
3361*4e366538SXin Li MemRandomize(tmp_pixels_g, kPixels);
3362*4e366538SXin Li MemRandomize(tmp_pixels_b, kPixels);
3363*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 4);
3364*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 4);
3365*4e366538SXin Li
3366*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3367*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3368*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3369*4e366538SXin Li benchmark_width_, NULL, 0, benchmark_width_,
3370*4e366538SXin Li benchmark_height_);
3371*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3372*4e366538SXin Li tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3373*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3374*4e366538SXin Li
3375*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3376*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3377*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3378*4e366538SXin Li benchmark_width_, NULL, 0, benchmark_width_,
3379*4e366538SXin Li benchmark_height_);
3380*4e366538SXin Li
3381*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3382*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3383*4e366538SXin Li benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0,
3384*4e366538SXin Li dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3385*4e366538SXin Li benchmark_height_);
3386*4e366538SXin Li }
3387*4e366538SXin Li
3388*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) {
3389*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3390*4e366538SXin Li }
3391*4e366538SXin Li
3392*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3393*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_r);
3394*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_g);
3395*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_b);
3396*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3397*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3398*4e366538SXin Li }
3399*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SplitXRGBPlane_Opt)3400*4e366538SXin Li TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
3401*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3402*4e366538SXin Li align_buffer_page_end(src_pixels, kPixels * 4);
3403*4e366538SXin Li align_buffer_page_end(tmp_pixels_r, kPixels);
3404*4e366538SXin Li align_buffer_page_end(tmp_pixels_g, kPixels);
3405*4e366538SXin Li align_buffer_page_end(tmp_pixels_b, kPixels);
3406*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3407*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kPixels * 4);
3408*4e366538SXin Li
3409*4e366538SXin Li MemRandomize(src_pixels, kPixels * 4);
3410*4e366538SXin Li MemRandomize(tmp_pixels_r, kPixels);
3411*4e366538SXin Li MemRandomize(tmp_pixels_g, kPixels);
3412*4e366538SXin Li MemRandomize(tmp_pixels_b, kPixels);
3413*4e366538SXin Li MemRandomize(dst_pixels_opt, kPixels * 4);
3414*4e366538SXin Li MemRandomize(dst_pixels_c, kPixels * 4);
3415*4e366538SXin Li
3416*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3417*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3418*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3419*4e366538SXin Li benchmark_width_, NULL, 0, benchmark_width_,
3420*4e366538SXin Li benchmark_height_);
3421*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3422*4e366538SXin Li tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3423*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3424*4e366538SXin Li
3425*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3426*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3427*4e366538SXin Li SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3428*4e366538SXin Li benchmark_width_, tmp_pixels_g, benchmark_width_,
3429*4e366538SXin Li tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_,
3430*4e366538SXin Li benchmark_height_);
3431*4e366538SXin Li }
3432*4e366538SXin Li
3433*4e366538SXin Li MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3434*4e366538SXin Li tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt,
3435*4e366538SXin Li benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3436*4e366538SXin Li
3437*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) {
3438*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3439*4e366538SXin Li }
3440*4e366538SXin Li
3441*4e366538SXin Li free_aligned_buffer_page_end(src_pixels);
3442*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_r);
3443*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_g);
3444*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_b);
3445*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
3446*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
3447*4e366538SXin Li }
3448*4e366538SXin Li
3449*4e366538SXin Li // Merge 4 channels
3450*4e366538SXin Li #define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3451*4e366538SXin Li TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
3452*4e366538SXin Li const int kWidth = W1280; \
3453*4e366538SXin Li const int kPixels = kWidth * benchmark_height_; \
3454*4e366538SXin Li align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3455*4e366538SXin Li align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3456*4e366538SXin Li align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3457*4e366538SXin Li align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
3458*4e366538SXin Li align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3459*4e366538SXin Li align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3460*4e366538SXin Li MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3461*4e366538SXin Li MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3462*4e366538SXin Li MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3463*4e366538SXin Li MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
3464*4e366538SXin Li memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
3465*4e366538SXin Li memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
3466*4e366538SXin Li STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3467*4e366538SXin Li STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3468*4e366538SXin Li STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3469*4e366538SXin Li STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \
3470*4e366538SXin Li DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3471*4e366538SXin Li DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3472*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_); \
3473*4e366538SXin Li FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3474*4e366538SXin Li kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \
3475*4e366538SXin Li kWidth, NEG benchmark_height_, DEPTH); \
3476*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_); \
3477*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) { \
3478*4e366538SXin Li FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3479*4e366538SXin Li kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \
3480*4e366538SXin Li kWidth, NEG benchmark_height_, DEPTH); \
3481*4e366538SXin Li } \
3482*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) { \
3483*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3484*4e366538SXin Li } \
3485*4e366538SXin Li free_aligned_buffer_page_end(src_memory_r); \
3486*4e366538SXin Li free_aligned_buffer_page_end(src_memory_g); \
3487*4e366538SXin Li free_aligned_buffer_page_end(src_memory_b); \
3488*4e366538SXin Li free_aligned_buffer_page_end(src_memory_a); \
3489*4e366538SXin Li free_aligned_buffer_page_end(dst_memory_c); \
3490*4e366538SXin Li free_aligned_buffer_page_end(dst_memory_opt); \
3491*4e366538SXin Li }
3492*4e366538SXin Li
3493*4e366538SXin Li // Merge 3 channel RGB into 4 channel XRGB with opaque alpha
3494*4e366538SXin Li #define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3495*4e366538SXin Li TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \
3496*4e366538SXin Li const int kWidth = W1280; \
3497*4e366538SXin Li const int kPixels = kWidth * benchmark_height_; \
3498*4e366538SXin Li align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3499*4e366538SXin Li align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3500*4e366538SXin Li align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3501*4e366538SXin Li align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3502*4e366538SXin Li align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3503*4e366538SXin Li MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3504*4e366538SXin Li MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3505*4e366538SXin Li MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3506*4e366538SXin Li memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
3507*4e366538SXin Li memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
3508*4e366538SXin Li STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3509*4e366538SXin Li STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3510*4e366538SXin Li STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3511*4e366538SXin Li DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3512*4e366538SXin Li DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3513*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_); \
3514*4e366538SXin Li FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3515*4e366538SXin Li kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \
3516*4e366538SXin Li NEG benchmark_height_, DEPTH); \
3517*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_); \
3518*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) { \
3519*4e366538SXin Li FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3520*4e366538SXin Li kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth, \
3521*4e366538SXin Li NEG benchmark_height_, DEPTH); \
3522*4e366538SXin Li } \
3523*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) { \
3524*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3525*4e366538SXin Li } \
3526*4e366538SXin Li free_aligned_buffer_page_end(src_memory_r); \
3527*4e366538SXin Li free_aligned_buffer_page_end(src_memory_g); \
3528*4e366538SXin Li free_aligned_buffer_page_end(src_memory_b); \
3529*4e366538SXin Li free_aligned_buffer_page_end(dst_memory_c); \
3530*4e366538SXin Li free_aligned_buffer_page_end(dst_memory_opt); \
3531*4e366538SXin Li }
3532*4e366538SXin Li
3533*4e366538SXin Li #define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
3534*4e366538SXin Li TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3535*4e366538SXin Li TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3536*4e366538SXin Li 2) \
3537*4e366538SXin Li TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3538*4e366538SXin Li TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \
3539*4e366538SXin Li TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \
3540*4e366538SXin Li 0) \
3541*4e366538SXin Li TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3542*4e366538SXin Li 2) \
3543*4e366538SXin Li TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3544*4e366538SXin Li TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3545*4e366538SXin Li
3546*4e366538SXin Li TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10)
3547*4e366538SXin Li TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12)
3548*4e366538SXin Li TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16)
3549*4e366538SXin Li TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10)
3550*4e366538SXin Li TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12)
3551*4e366538SXin Li TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
3552*4e366538SXin Li
3553*4e366538SXin Li #define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3554*4e366538SXin Li TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
3555*4e366538SXin Li const int kWidth = W1280; \
3556*4e366538SXin Li const int kPixels = kWidth * benchmark_height_; \
3557*4e366538SXin Li align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3558*4e366538SXin Li align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3559*4e366538SXin Li align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3560*4e366538SXin Li align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3561*4e366538SXin Li align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3562*4e366538SXin Li MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3563*4e366538SXin Li MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3564*4e366538SXin Li MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3565*4e366538SXin Li STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3566*4e366538SXin Li STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3567*4e366538SXin Li STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3568*4e366538SXin Li DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3569*4e366538SXin Li DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3570*4e366538SXin Li memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
3571*4e366538SXin Li memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
3572*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_); \
3573*4e366538SXin Li FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3574*4e366538SXin Li kWidth, dst_pixels_c, kWidth * 4, kWidth, \
3575*4e366538SXin Li NEG benchmark_height_, DEPTH); \
3576*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_); \
3577*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) { \
3578*4e366538SXin Li FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3579*4e366538SXin Li kWidth, dst_pixels_opt, kWidth * 4, kWidth, \
3580*4e366538SXin Li NEG benchmark_height_, DEPTH); \
3581*4e366538SXin Li } \
3582*4e366538SXin Li for (int i = 0; i < kPixels * 4; ++i) { \
3583*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3584*4e366538SXin Li } \
3585*4e366538SXin Li free_aligned_buffer_page_end(src_memory_r); \
3586*4e366538SXin Li free_aligned_buffer_page_end(src_memory_g); \
3587*4e366538SXin Li free_aligned_buffer_page_end(src_memory_b); \
3588*4e366538SXin Li free_aligned_buffer_page_end(dst_memory_c); \
3589*4e366538SXin Li free_aligned_buffer_page_end(dst_memory_opt); \
3590*4e366538SXin Li }
3591*4e366538SXin Li
3592*4e366538SXin Li #define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
3593*4e366538SXin Li TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3594*4e366538SXin Li TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3595*4e366538SXin Li 2) \
3596*4e366538SXin Li TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3597*4e366538SXin Li TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3598*4e366538SXin Li
3599*4e366538SXin Li TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
3600*4e366538SXin Li TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
3601*4e366538SXin Li TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
3602*4e366538SXin Li
3603*4e366538SXin Li // TODO(fbarchard): improve test for platforms and cpu detect
3604*4e366538SXin Li #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest,MergeUVRow_16_Opt)3605*4e366538SXin Li TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
3606*4e366538SXin Li // Round count up to multiple of 8
3607*4e366538SXin Li const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
3608*4e366538SXin Li
3609*4e366538SXin Li align_buffer_page_end(src_pixels_u, kPixels * 2);
3610*4e366538SXin Li align_buffer_page_end(src_pixels_v, kPixels * 2);
3611*4e366538SXin Li align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
3612*4e366538SXin Li align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
3613*4e366538SXin Li
3614*4e366538SXin Li MemRandomize(src_pixels_u, kPixels * 2);
3615*4e366538SXin Li MemRandomize(src_pixels_v, kPixels * 2);
3616*4e366538SXin Li memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
3617*4e366538SXin Li memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
3618*4e366538SXin Li
3619*4e366538SXin Li MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3620*4e366538SXin Li reinterpret_cast<const uint16_t*>(src_pixels_v),
3621*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
3622*4e366538SXin Li
3623*4e366538SXin Li int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3624*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3625*4e366538SXin Li if (has_avx2) {
3626*4e366538SXin Li MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
3627*4e366538SXin Li reinterpret_cast<const uint16_t*>(src_pixels_v),
3628*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3629*4e366538SXin Li kPixels);
3630*4e366538SXin Li } else {
3631*4e366538SXin Li MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3632*4e366538SXin Li reinterpret_cast<const uint16_t*>(src_pixels_v),
3633*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3634*4e366538SXin Li kPixels);
3635*4e366538SXin Li }
3636*4e366538SXin Li }
3637*4e366538SXin Li
3638*4e366538SXin Li for (int i = 0; i < kPixels * 2 * 2; ++i) {
3639*4e366538SXin Li EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
3640*4e366538SXin Li }
3641*4e366538SXin Li
3642*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_u);
3643*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_v);
3644*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_uv_opt);
3645*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_uv_c);
3646*4e366538SXin Li }
3647*4e366538SXin Li #endif
3648*4e366538SXin Li
3649*4e366538SXin Li // TODO(fbarchard): Improve test for more platforms.
3650*4e366538SXin Li #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest,MultiplyRow_16_Opt)3651*4e366538SXin Li TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
3652*4e366538SXin Li // Round count up to multiple of 32
3653*4e366538SXin Li const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3654*4e366538SXin Li
3655*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels * 2);
3656*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3657*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3658*4e366538SXin Li
3659*4e366538SXin Li MemRandomize(src_pixels_y, kPixels * 2);
3660*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels * 2);
3661*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels * 2);
3662*4e366538SXin Li
3663*4e366538SXin Li MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3664*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
3665*4e366538SXin Li
3666*4e366538SXin Li int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3667*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3668*4e366538SXin Li if (has_avx2) {
3669*4e366538SXin Li MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3670*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3671*4e366538SXin Li kPixels);
3672*4e366538SXin Li } else {
3673*4e366538SXin Li MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3674*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3675*4e366538SXin Li kPixels);
3676*4e366538SXin Li }
3677*4e366538SXin Li }
3678*4e366538SXin Li
3679*4e366538SXin Li for (int i = 0; i < kPixels * 2; ++i) {
3680*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3681*4e366538SXin Li }
3682*4e366538SXin Li
3683*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3684*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3685*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3686*4e366538SXin Li }
3687*4e366538SXin Li #endif // HAS_MULTIPLYROW_16_AVX2
3688*4e366538SXin Li
TEST_F(LibYUVPlanarTest,Convert16To8Plane)3689*4e366538SXin Li TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
3690*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3691*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels * 2);
3692*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels);
3693*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels);
3694*4e366538SXin Li
3695*4e366538SXin Li MemRandomize(src_pixels_y, kPixels * 2);
3696*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels);
3697*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels);
3698*4e366538SXin Li
3699*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3700*4e366538SXin Li Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3701*4e366538SXin Li benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
3702*4e366538SXin Li benchmark_width_, benchmark_height_);
3703*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3704*4e366538SXin Li
3705*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3706*4e366538SXin Li Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3707*4e366538SXin Li benchmark_width_, dst_pixels_y_opt, benchmark_width_,
3708*4e366538SXin Li 16384, benchmark_width_, benchmark_height_);
3709*4e366538SXin Li }
3710*4e366538SXin Li
3711*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
3712*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3713*4e366538SXin Li }
3714*4e366538SXin Li
3715*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3716*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3717*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3718*4e366538SXin Li }
3719*4e366538SXin Li
TEST_F(LibYUVPlanarTest,YUY2ToY)3720*4e366538SXin Li TEST_F(LibYUVPlanarTest, YUY2ToY) {
3721*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3722*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels * 2);
3723*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels);
3724*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels);
3725*4e366538SXin Li
3726*4e366538SXin Li MemRandomize(src_pixels_y, kPixels * 2);
3727*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels);
3728*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels);
3729*4e366538SXin Li
3730*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3731*4e366538SXin Li YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3732*4e366538SXin Li benchmark_width_, benchmark_height_);
3733*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3734*4e366538SXin Li
3735*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3736*4e366538SXin Li YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3737*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
3738*4e366538SXin Li }
3739*4e366538SXin Li
3740*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
3741*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3742*4e366538SXin Li }
3743*4e366538SXin Li
3744*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3745*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3746*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3747*4e366538SXin Li }
3748*4e366538SXin Li
TEST_F(LibYUVPlanarTest,UYVYToY)3749*4e366538SXin Li TEST_F(LibYUVPlanarTest, UYVYToY) {
3750*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3751*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels * 2);
3752*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels);
3753*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels);
3754*4e366538SXin Li
3755*4e366538SXin Li MemRandomize(src_pixels_y, kPixels * 2);
3756*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels);
3757*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels);
3758*4e366538SXin Li
3759*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3760*4e366538SXin Li UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3761*4e366538SXin Li benchmark_width_, benchmark_height_);
3762*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3763*4e366538SXin Li
3764*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3765*4e366538SXin Li UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3766*4e366538SXin Li benchmark_width_, benchmark_width_, benchmark_height_);
3767*4e366538SXin Li }
3768*4e366538SXin Li
3769*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
3770*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3771*4e366538SXin Li }
3772*4e366538SXin Li
3773*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3774*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3775*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3776*4e366538SXin Li }
3777*4e366538SXin Li
3778*4e366538SXin Li #ifdef ENABLE_ROW_TESTS
3779*4e366538SXin Li // TODO(fbarchard): Improve test for more platforms.
3780*4e366538SXin Li #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert16To8Row_Opt)3781*4e366538SXin Li TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
3782*4e366538SXin Li // AVX2 does multiple of 32, so round count up
3783*4e366538SXin Li const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3784*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels * 2);
3785*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels);
3786*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels);
3787*4e366538SXin Li
3788*4e366538SXin Li MemRandomize(src_pixels_y, kPixels * 2);
3789*4e366538SXin Li // clamp source range to 10 bits.
3790*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
3791*4e366538SXin Li reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
3792*4e366538SXin Li }
3793*4e366538SXin Li
3794*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels);
3795*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels);
3796*4e366538SXin Li
3797*4e366538SXin Li Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3798*4e366538SXin Li dst_pixels_y_c, 16384, kPixels);
3799*4e366538SXin Li
3800*4e366538SXin Li int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3801*4e366538SXin Li int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
3802*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3803*4e366538SXin Li if (has_avx2) {
3804*4e366538SXin Li Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3805*4e366538SXin Li dst_pixels_y_opt, 16384, kPixels);
3806*4e366538SXin Li } else if (has_ssse3) {
3807*4e366538SXin Li Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
3808*4e366538SXin Li dst_pixels_y_opt, 16384, kPixels);
3809*4e366538SXin Li } else {
3810*4e366538SXin Li Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3811*4e366538SXin Li dst_pixels_y_opt, 16384, kPixels);
3812*4e366538SXin Li }
3813*4e366538SXin Li }
3814*4e366538SXin Li
3815*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
3816*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3817*4e366538SXin Li }
3818*4e366538SXin Li
3819*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3820*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3821*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3822*4e366538SXin Li }
3823*4e366538SXin Li #endif // HAS_CONVERT16TO8ROW_AVX2
3824*4e366538SXin Li
3825*4e366538SXin Li #ifdef HAS_UYVYTOYROW_NEON
TEST_F(LibYUVPlanarTest,UYVYToYRow_Opt)3826*4e366538SXin Li TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
3827*4e366538SXin Li // NEON does multiple of 16, so round count up
3828*4e366538SXin Li const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
3829*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels * 2);
3830*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels);
3831*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels);
3832*4e366538SXin Li
3833*4e366538SXin Li MemRandomize(src_pixels_y, kPixels * 2);
3834*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels);
3835*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels);
3836*4e366538SXin Li
3837*4e366538SXin Li UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
3838*4e366538SXin Li
3839*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3840*4e366538SXin Li UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
3841*4e366538SXin Li }
3842*4e366538SXin Li
3843*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
3844*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3845*4e366538SXin Li }
3846*4e366538SXin Li
3847*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3848*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3849*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3850*4e366538SXin Li }
3851*4e366538SXin Li #endif // HAS_UYVYTOYROW_NEON
3852*4e366538SXin Li
3853*4e366538SXin Li #endif // ENABLE_ROW_TESTS
3854*4e366538SXin Li
TEST_F(LibYUVPlanarTest,Convert8To16Plane)3855*4e366538SXin Li TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
3856*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
3857*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels);
3858*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3859*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3860*4e366538SXin Li
3861*4e366538SXin Li MemRandomize(src_pixels_y, kPixels);
3862*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels * 2);
3863*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels * 2);
3864*4e366538SXin Li
3865*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
3866*4e366538SXin Li Convert8To16Plane(src_pixels_y, benchmark_width_,
3867*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3868*4e366538SXin Li benchmark_width_, 1024, benchmark_width_,
3869*4e366538SXin Li benchmark_height_);
3870*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
3871*4e366538SXin Li
3872*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3873*4e366538SXin Li Convert8To16Plane(src_pixels_y, benchmark_width_,
3874*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
3875*4e366538SXin Li benchmark_width_, 1024, benchmark_width_,
3876*4e366538SXin Li benchmark_height_);
3877*4e366538SXin Li }
3878*4e366538SXin Li
3879*4e366538SXin Li for (int i = 0; i < kPixels * 2; ++i) {
3880*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3881*4e366538SXin Li }
3882*4e366538SXin Li
3883*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3884*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3885*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3886*4e366538SXin Li }
3887*4e366538SXin Li
3888*4e366538SXin Li #ifdef ENABLE_ROW_TESTS
3889*4e366538SXin Li // TODO(fbarchard): Improve test for more platforms.
3890*4e366538SXin Li #ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert8To16Row_Opt)3891*4e366538SXin Li TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
3892*4e366538SXin Li const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3893*4e366538SXin Li align_buffer_page_end(src_pixels_y, kPixels);
3894*4e366538SXin Li align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3895*4e366538SXin Li align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3896*4e366538SXin Li
3897*4e366538SXin Li MemRandomize(src_pixels_y, kPixels);
3898*4e366538SXin Li memset(dst_pixels_y_opt, 0, kPixels * 2);
3899*4e366538SXin Li memset(dst_pixels_y_c, 1, kPixels * 2);
3900*4e366538SXin Li
3901*4e366538SXin Li Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3902*4e366538SXin Li 1024, kPixels);
3903*4e366538SXin Li
3904*4e366538SXin Li int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3905*4e366538SXin Li int has_sse2 = TestCpuFlag(kCpuHasSSE2);
3906*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
3907*4e366538SXin Li if (has_avx2) {
3908*4e366538SXin Li Convert8To16Row_AVX2(src_pixels_y,
3909*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3910*4e366538SXin Li kPixels);
3911*4e366538SXin Li } else if (has_sse2) {
3912*4e366538SXin Li Convert8To16Row_SSE2(src_pixels_y,
3913*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3914*4e366538SXin Li kPixels);
3915*4e366538SXin Li } else {
3916*4e366538SXin Li Convert8To16Row_C(src_pixels_y,
3917*4e366538SXin Li reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3918*4e366538SXin Li kPixels);
3919*4e366538SXin Li }
3920*4e366538SXin Li }
3921*4e366538SXin Li
3922*4e366538SXin Li for (int i = 0; i < kPixels * 2; ++i) {
3923*4e366538SXin Li EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3924*4e366538SXin Li }
3925*4e366538SXin Li
3926*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_y);
3927*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_opt);
3928*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_y_c);
3929*4e366538SXin Li }
3930*4e366538SXin Li #endif // HAS_CONVERT8TO16ROW_AVX2
3931*4e366538SXin Li
TestScaleMaxSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3932*4e366538SXin Li float TestScaleMaxSamples(int benchmark_width,
3933*4e366538SXin Li int benchmark_height,
3934*4e366538SXin Li int benchmark_iterations,
3935*4e366538SXin Li float scale,
3936*4e366538SXin Li bool opt) {
3937*4e366538SXin Li int i, j;
3938*4e366538SXin Li float max_c, max_opt = 0.f;
3939*4e366538SXin Li // NEON does multiple of 8, so round count up
3940*4e366538SXin Li const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3941*4e366538SXin Li align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
3942*4e366538SXin Li uint8_t* dst_c = orig_y + kPixels * 4 + 16;
3943*4e366538SXin Li uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
3944*4e366538SXin Li
3945*4e366538SXin Li // Randomize works but may contain some denormals affecting performance.
3946*4e366538SXin Li // MemRandomize(orig_y, kPixels * 4);
3947*4e366538SXin Li // large values are problematic. audio is really -1 to 1.
3948*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
3949*4e366538SXin Li (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3950*4e366538SXin Li }
3951*4e366538SXin Li memset(dst_c, 0, kPixels * 4);
3952*4e366538SXin Li memset(dst_opt, 1, kPixels * 4);
3953*4e366538SXin Li
3954*4e366538SXin Li max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3955*4e366538SXin Li reinterpret_cast<float*>(dst_c), scale, kPixels);
3956*4e366538SXin Li
3957*4e366538SXin Li for (j = 0; j < benchmark_iterations; j++) {
3958*4e366538SXin Li if (opt) {
3959*4e366538SXin Li #ifdef HAS_SCALESUMSAMPLES_NEON
3960*4e366538SXin Li max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
3961*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale,
3962*4e366538SXin Li kPixels);
3963*4e366538SXin Li #else
3964*4e366538SXin Li max_opt =
3965*4e366538SXin Li ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3966*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale, kPixels);
3967*4e366538SXin Li #endif
3968*4e366538SXin Li } else {
3969*4e366538SXin Li max_opt =
3970*4e366538SXin Li ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3971*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale, kPixels);
3972*4e366538SXin Li }
3973*4e366538SXin Li }
3974*4e366538SXin Li
3975*4e366538SXin Li float max_diff = FAbs(max_opt - max_c);
3976*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
3977*4e366538SXin Li float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3978*4e366538SXin Li (reinterpret_cast<float*>(dst_opt)[i]));
3979*4e366538SXin Li if (abs_diff > max_diff) {
3980*4e366538SXin Li max_diff = abs_diff;
3981*4e366538SXin Li }
3982*4e366538SXin Li }
3983*4e366538SXin Li
3984*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
3985*4e366538SXin Li return max_diff;
3986*4e366538SXin Li }
3987*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_C)3988*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
3989*4e366538SXin Li float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3990*4e366538SXin Li benchmark_iterations_, 1.2f, false);
3991*4e366538SXin Li EXPECT_EQ(0, diff);
3992*4e366538SXin Li }
3993*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_Opt)3994*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
3995*4e366538SXin Li float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3996*4e366538SXin Li benchmark_iterations_, 1.2f, true);
3997*4e366538SXin Li EXPECT_EQ(0, diff);
3998*4e366538SXin Li }
3999*4e366538SXin Li
TestScaleSumSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)4000*4e366538SXin Li float TestScaleSumSamples(int benchmark_width,
4001*4e366538SXin Li int benchmark_height,
4002*4e366538SXin Li int benchmark_iterations,
4003*4e366538SXin Li float scale,
4004*4e366538SXin Li bool opt) {
4005*4e366538SXin Li int i, j;
4006*4e366538SXin Li float sum_c, sum_opt = 0.f;
4007*4e366538SXin Li // NEON does multiple of 8, so round count up
4008*4e366538SXin Li const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
4009*4e366538SXin Li align_buffer_page_end(orig_y, kPixels * 4 * 3);
4010*4e366538SXin Li uint8_t* dst_c = orig_y + kPixels * 4;
4011*4e366538SXin Li uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4012*4e366538SXin Li
4013*4e366538SXin Li // Randomize works but may contain some denormals affecting performance.
4014*4e366538SXin Li // MemRandomize(orig_y, kPixels * 4);
4015*4e366538SXin Li // large values are problematic. audio is really -1 to 1.
4016*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
4017*4e366538SXin Li (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4018*4e366538SXin Li }
4019*4e366538SXin Li memset(dst_c, 0, kPixels * 4);
4020*4e366538SXin Li memset(dst_opt, 1, kPixels * 4);
4021*4e366538SXin Li
4022*4e366538SXin Li sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4023*4e366538SXin Li reinterpret_cast<float*>(dst_c), scale, kPixels);
4024*4e366538SXin Li
4025*4e366538SXin Li for (j = 0; j < benchmark_iterations; j++) {
4026*4e366538SXin Li if (opt) {
4027*4e366538SXin Li #ifdef HAS_SCALESUMSAMPLES_NEON
4028*4e366538SXin Li sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
4029*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale,
4030*4e366538SXin Li kPixels);
4031*4e366538SXin Li #else
4032*4e366538SXin Li sum_opt =
4033*4e366538SXin Li ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4034*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale, kPixels);
4035*4e366538SXin Li #endif
4036*4e366538SXin Li } else {
4037*4e366538SXin Li sum_opt =
4038*4e366538SXin Li ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4039*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale, kPixels);
4040*4e366538SXin Li }
4041*4e366538SXin Li }
4042*4e366538SXin Li
4043*4e366538SXin Li float mse_opt = sum_opt / kPixels * 4;
4044*4e366538SXin Li float mse_c = sum_c / kPixels * 4;
4045*4e366538SXin Li float mse_error = FAbs(mse_opt - mse_c) / mse_c;
4046*4e366538SXin Li
4047*4e366538SXin Li // If the sum of a float is more than 4 million, small adds are round down on
4048*4e366538SXin Li // float and produce different results with vectorized sum vs scalar sum.
4049*4e366538SXin Li // Ignore the difference if the sum is large.
4050*4e366538SXin Li float max_diff = 0.f;
4051*4e366538SXin Li if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse
4052*4e366538SXin Li max_diff = mse_error;
4053*4e366538SXin Li }
4054*4e366538SXin Li
4055*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
4056*4e366538SXin Li float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4057*4e366538SXin Li (reinterpret_cast<float*>(dst_opt)[i]));
4058*4e366538SXin Li if (abs_diff > max_diff) {
4059*4e366538SXin Li max_diff = abs_diff;
4060*4e366538SXin Li }
4061*4e366538SXin Li }
4062*4e366538SXin Li
4063*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
4064*4e366538SXin Li return max_diff;
4065*4e366538SXin Li }
4066*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_C)4067*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
4068*4e366538SXin Li float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
4069*4e366538SXin Li benchmark_iterations_, 1.2f, false);
4070*4e366538SXin Li EXPECT_EQ(0, diff);
4071*4e366538SXin Li }
4072*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_Opt)4073*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
4074*4e366538SXin Li float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
4075*4e366538SXin Li benchmark_iterations_, 1.2f, true);
4076*4e366538SXin Li EXPECT_EQ(0, diff);
4077*4e366538SXin Li }
4078*4e366538SXin Li
TestScaleSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)4079*4e366538SXin Li float TestScaleSamples(int benchmark_width,
4080*4e366538SXin Li int benchmark_height,
4081*4e366538SXin Li int benchmark_iterations,
4082*4e366538SXin Li float scale,
4083*4e366538SXin Li bool opt) {
4084*4e366538SXin Li int i, j;
4085*4e366538SXin Li // NEON does multiple of 8, so round count up
4086*4e366538SXin Li const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
4087*4e366538SXin Li align_buffer_page_end(orig_y, kPixels * 4 * 3);
4088*4e366538SXin Li uint8_t* dst_c = orig_y + kPixels * 4;
4089*4e366538SXin Li uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4090*4e366538SXin Li
4091*4e366538SXin Li // Randomize works but may contain some denormals affecting performance.
4092*4e366538SXin Li // MemRandomize(orig_y, kPixels * 4);
4093*4e366538SXin Li // large values are problematic. audio is really -1 to 1.
4094*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
4095*4e366538SXin Li (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4096*4e366538SXin Li }
4097*4e366538SXin Li memset(dst_c, 0, kPixels * 4);
4098*4e366538SXin Li memset(dst_opt, 1, kPixels * 4);
4099*4e366538SXin Li
4100*4e366538SXin Li ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4101*4e366538SXin Li reinterpret_cast<float*>(dst_c), scale, kPixels);
4102*4e366538SXin Li
4103*4e366538SXin Li for (j = 0; j < benchmark_iterations; j++) {
4104*4e366538SXin Li if (opt) {
4105*4e366538SXin Li #ifdef HAS_SCALESUMSAMPLES_NEON
4106*4e366538SXin Li ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
4107*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale, kPixels);
4108*4e366538SXin Li #else
4109*4e366538SXin Li ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4110*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale, kPixels);
4111*4e366538SXin Li #endif
4112*4e366538SXin Li } else {
4113*4e366538SXin Li ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4114*4e366538SXin Li reinterpret_cast<float*>(dst_opt), scale, kPixels);
4115*4e366538SXin Li }
4116*4e366538SXin Li }
4117*4e366538SXin Li
4118*4e366538SXin Li float max_diff = 0.f;
4119*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
4120*4e366538SXin Li float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4121*4e366538SXin Li (reinterpret_cast<float*>(dst_opt)[i]));
4122*4e366538SXin Li if (abs_diff > max_diff) {
4123*4e366538SXin Li max_diff = abs_diff;
4124*4e366538SXin Li }
4125*4e366538SXin Li }
4126*4e366538SXin Li
4127*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
4128*4e366538SXin Li return max_diff;
4129*4e366538SXin Li }
4130*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestScaleSamples_C)4131*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
4132*4e366538SXin Li float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4133*4e366538SXin Li benchmark_iterations_, 1.2f, false);
4134*4e366538SXin Li EXPECT_EQ(0, diff);
4135*4e366538SXin Li }
4136*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestScaleSamples_Opt)4137*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
4138*4e366538SXin Li float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4139*4e366538SXin Li benchmark_iterations_, 1.2f, true);
4140*4e366538SXin Li EXPECT_EQ(0, diff);
4141*4e366538SXin Li }
4142*4e366538SXin Li
TestCopySamples(int benchmark_width,int benchmark_height,int benchmark_iterations,bool opt)4143*4e366538SXin Li float TestCopySamples(int benchmark_width,
4144*4e366538SXin Li int benchmark_height,
4145*4e366538SXin Li int benchmark_iterations,
4146*4e366538SXin Li bool opt) {
4147*4e366538SXin Li int i, j;
4148*4e366538SXin Li // NEON does multiple of 16 floats, so round count up
4149*4e366538SXin Li const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
4150*4e366538SXin Li align_buffer_page_end(orig_y, kPixels * 4 * 3);
4151*4e366538SXin Li uint8_t* dst_c = orig_y + kPixels * 4;
4152*4e366538SXin Li uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4153*4e366538SXin Li
4154*4e366538SXin Li // Randomize works but may contain some denormals affecting performance.
4155*4e366538SXin Li // MemRandomize(orig_y, kPixels * 4);
4156*4e366538SXin Li // large values are problematic. audio is really -1 to 1.
4157*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
4158*4e366538SXin Li (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4159*4e366538SXin Li }
4160*4e366538SXin Li memset(dst_c, 0, kPixels * 4);
4161*4e366538SXin Li memset(dst_opt, 1, kPixels * 4);
4162*4e366538SXin Li
4163*4e366538SXin Li memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
4164*4e366538SXin Li kPixels * 4);
4165*4e366538SXin Li
4166*4e366538SXin Li for (j = 0; j < benchmark_iterations; j++) {
4167*4e366538SXin Li if (opt) {
4168*4e366538SXin Li #ifdef HAS_COPYROW_NEON
4169*4e366538SXin Li CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
4170*4e366538SXin Li #else
4171*4e366538SXin Li CopyRow_C(orig_y, dst_opt, kPixels * 4);
4172*4e366538SXin Li #endif
4173*4e366538SXin Li } else {
4174*4e366538SXin Li CopyRow_C(orig_y, dst_opt, kPixels * 4);
4175*4e366538SXin Li }
4176*4e366538SXin Li }
4177*4e366538SXin Li
4178*4e366538SXin Li float max_diff = 0.f;
4179*4e366538SXin Li for (i = 0; i < kPixels; ++i) {
4180*4e366538SXin Li float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4181*4e366538SXin Li (reinterpret_cast<float*>(dst_opt)[i]));
4182*4e366538SXin Li if (abs_diff > max_diff) {
4183*4e366538SXin Li max_diff = abs_diff;
4184*4e366538SXin Li }
4185*4e366538SXin Li }
4186*4e366538SXin Li
4187*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
4188*4e366538SXin Li return max_diff;
4189*4e366538SXin Li }
4190*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestCopySamples_C)4191*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
4192*4e366538SXin Li float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4193*4e366538SXin Li benchmark_iterations_, false);
4194*4e366538SXin Li EXPECT_EQ(0, diff);
4195*4e366538SXin Li }
4196*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestCopySamples_Opt)4197*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
4198*4e366538SXin Li float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4199*4e366538SXin Li benchmark_iterations_, true);
4200*4e366538SXin Li EXPECT_EQ(0, diff);
4201*4e366538SXin Li }
4202*4e366538SXin Li
4203*4e366538SXin Li extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
4204*4e366538SXin Li extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
4205*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestGaussRow_Opt)4206*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
4207*4e366538SXin Li SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
4208*4e366538SXin Li SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
4209*4e366538SXin Li SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
4210*4e366538SXin Li
4211*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
4212*4e366538SXin Li memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4213*4e366538SXin Li memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4214*4e366538SXin Li
4215*4e366538SXin Li for (int i = 0; i < 1280 + 8; ++i) {
4216*4e366538SXin Li orig_pixels[i] = i * 256;
4217*4e366538SXin Li }
4218*4e366538SXin Li GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4219*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4220*4e366538SXin Li #if !defined(LIBYUV_DISABLE_NEON) && \
4221*4e366538SXin Li (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4222*4e366538SXin Li int has_neon = TestCpuFlag(kCpuHasNEON);
4223*4e366538SXin Li if (has_neon) {
4224*4e366538SXin Li GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4225*4e366538SXin Li } else {
4226*4e366538SXin Li GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4227*4e366538SXin Li }
4228*4e366538SXin Li #else
4229*4e366538SXin Li GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4230*4e366538SXin Li #endif
4231*4e366538SXin Li }
4232*4e366538SXin Li
4233*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
4234*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4235*4e366538SXin Li }
4236*4e366538SXin Li
4237*4e366538SXin Li EXPECT_EQ(dst_pixels_c[0],
4238*4e366538SXin Li static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
4239*4e366538SXin Li EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
4240*4e366538SXin Li }
4241*4e366538SXin Li
4242*4e366538SXin Li extern "C" void GaussCol_NEON(const uint16_t* src0,
4243*4e366538SXin Li const uint16_t* src1,
4244*4e366538SXin Li const uint16_t* src2,
4245*4e366538SXin Li const uint16_t* src3,
4246*4e366538SXin Li const uint16_t* src4,
4247*4e366538SXin Li uint32_t* dst,
4248*4e366538SXin Li int width);
4249*4e366538SXin Li
4250*4e366538SXin Li extern "C" void GaussCol_C(const uint16_t* src0,
4251*4e366538SXin Li const uint16_t* src1,
4252*4e366538SXin Li const uint16_t* src2,
4253*4e366538SXin Li const uint16_t* src3,
4254*4e366538SXin Li const uint16_t* src4,
4255*4e366538SXin Li uint32_t* dst,
4256*4e366538SXin Li int width);
4257*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestGaussCol_Opt)4258*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
4259*4e366538SXin Li SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
4260*4e366538SXin Li SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
4261*4e366538SXin Li SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
4262*4e366538SXin Li
4263*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
4264*4e366538SXin Li memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4265*4e366538SXin Li memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4266*4e366538SXin Li
4267*4e366538SXin Li for (int i = 0; i < 1280 * 5; ++i) {
4268*4e366538SXin Li orig_pixels[i] = static_cast<float>(i);
4269*4e366538SXin Li }
4270*4e366538SXin Li GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4271*4e366538SXin Li &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
4272*4e366538SXin Li 1280);
4273*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4274*4e366538SXin Li #if !defined(LIBYUV_DISABLE_NEON) && \
4275*4e366538SXin Li (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4276*4e366538SXin Li int has_neon = TestCpuFlag(kCpuHasNEON);
4277*4e366538SXin Li if (has_neon) {
4278*4e366538SXin Li GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4279*4e366538SXin Li &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4280*4e366538SXin Li &dst_pixels_opt[0], 1280);
4281*4e366538SXin Li } else {
4282*4e366538SXin Li GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4283*4e366538SXin Li &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4284*4e366538SXin Li &dst_pixels_opt[0], 1280);
4285*4e366538SXin Li }
4286*4e366538SXin Li #else
4287*4e366538SXin Li GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4288*4e366538SXin Li &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4289*4e366538SXin Li &dst_pixels_opt[0], 1280);
4290*4e366538SXin Li #endif
4291*4e366538SXin Li }
4292*4e366538SXin Li
4293*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
4294*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4295*4e366538SXin Li }
4296*4e366538SXin Li }
4297*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestGaussRow_F32_Opt)4298*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
4299*4e366538SXin Li SIMD_ALIGNED(float orig_pixels[1280 + 4]);
4300*4e366538SXin Li SIMD_ALIGNED(float dst_pixels_c[1280]);
4301*4e366538SXin Li SIMD_ALIGNED(float dst_pixels_opt[1280]);
4302*4e366538SXin Li
4303*4e366538SXin Li memset(orig_pixels, 0, sizeof(orig_pixels));
4304*4e366538SXin Li memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4305*4e366538SXin Li memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4306*4e366538SXin Li
4307*4e366538SXin Li for (int i = 0; i < 1280 + 4; ++i) {
4308*4e366538SXin Li orig_pixels[i] = static_cast<float>(i);
4309*4e366538SXin Li }
4310*4e366538SXin Li GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4311*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4312*4e366538SXin Li #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4313*4e366538SXin Li int has_neon = TestCpuFlag(kCpuHasNEON);
4314*4e366538SXin Li if (has_neon) {
4315*4e366538SXin Li GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4316*4e366538SXin Li } else {
4317*4e366538SXin Li GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4318*4e366538SXin Li }
4319*4e366538SXin Li #else
4320*4e366538SXin Li GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4321*4e366538SXin Li #endif
4322*4e366538SXin Li }
4323*4e366538SXin Li
4324*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
4325*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4326*4e366538SXin Li }
4327*4e366538SXin Li }
4328*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestGaussCol_F32_Opt)4329*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
4330*4e366538SXin Li SIMD_ALIGNED(float dst_pixels_c[1280]);
4331*4e366538SXin Li SIMD_ALIGNED(float dst_pixels_opt[1280]);
4332*4e366538SXin Li align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows
4333*4e366538SXin Li float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
4334*4e366538SXin Li
4335*4e366538SXin Li memset(orig_pixels, 0, 1280 * 5 * 4);
4336*4e366538SXin Li memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4337*4e366538SXin Li memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4338*4e366538SXin Li
4339*4e366538SXin Li for (int i = 0; i < 1280 * 5; ++i) {
4340*4e366538SXin Li orig_pixels[i] = static_cast<float>(i);
4341*4e366538SXin Li }
4342*4e366538SXin Li GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4343*4e366538SXin Li &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4344*4e366538SXin Li &dst_pixels_c[0], 1280);
4345*4e366538SXin Li for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4346*4e366538SXin Li #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4347*4e366538SXin Li int has_neon = TestCpuFlag(kCpuHasNEON);
4348*4e366538SXin Li if (has_neon) {
4349*4e366538SXin Li GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
4350*4e366538SXin Li &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4351*4e366538SXin Li &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4352*4e366538SXin Li } else {
4353*4e366538SXin Li GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
4354*4e366538SXin Li &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4355*4e366538SXin Li &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4356*4e366538SXin Li }
4357*4e366538SXin Li #else
4358*4e366538SXin Li GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4359*4e366538SXin Li &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4360*4e366538SXin Li &dst_pixels_opt[0], 1280);
4361*4e366538SXin Li #endif
4362*4e366538SXin Li }
4363*4e366538SXin Li
4364*4e366538SXin Li for (int i = 0; i < 1280; ++i) {
4365*4e366538SXin Li EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4366*4e366538SXin Li }
4367*4e366538SXin Li free_aligned_buffer_page_end(orig_pixels_buf);
4368*4e366538SXin Li }
4369*4e366538SXin Li
TEST_F(LibYUVPlanarTest,SwapUVRow)4370*4e366538SXin Li TEST_F(LibYUVPlanarTest, SwapUVRow) {
4371*4e366538SXin Li const int kPixels = benchmark_width_ * benchmark_height_;
4372*4e366538SXin Li void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
4373*4e366538SXin Li SwapUVRow_C;
4374*4e366538SXin Li
4375*4e366538SXin Li align_buffer_page_end(src_pixels_vu, kPixels * 2);
4376*4e366538SXin Li align_buffer_page_end(dst_pixels_uv, kPixels * 2);
4377*4e366538SXin Li MemRandomize(src_pixels_vu, kPixels * 2);
4378*4e366538SXin Li memset(dst_pixels_uv, 1, kPixels * 2);
4379*4e366538SXin Li
4380*4e366538SXin Li #if defined(HAS_SWAPUVROW_NEON)
4381*4e366538SXin Li if (TestCpuFlag(kCpuHasNEON)) {
4382*4e366538SXin Li SwapUVRow = SwapUVRow_Any_NEON;
4383*4e366538SXin Li if (IS_ALIGNED(kPixels, 16)) {
4384*4e366538SXin Li SwapUVRow = SwapUVRow_NEON;
4385*4e366538SXin Li }
4386*4e366538SXin Li }
4387*4e366538SXin Li #endif
4388*4e366538SXin Li
4389*4e366538SXin Li for (int j = 0; j < benchmark_iterations_; j++) {
4390*4e366538SXin Li SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
4391*4e366538SXin Li }
4392*4e366538SXin Li for (int i = 0; i < kPixels; ++i) {
4393*4e366538SXin Li EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
4394*4e366538SXin Li EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
4395*4e366538SXin Li }
4396*4e366538SXin Li
4397*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_vu);
4398*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_uv);
4399*4e366538SXin Li }
4400*4e366538SXin Li #endif // ENABLE_ROW_TESTS
4401*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestGaussPlane_F32)4402*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
4403*4e366538SXin Li const int kSize = benchmark_width_ * benchmark_height_ * 4;
4404*4e366538SXin Li align_buffer_page_end(orig_pixels, kSize);
4405*4e366538SXin Li align_buffer_page_end(dst_pixels_opt, kSize);
4406*4e366538SXin Li align_buffer_page_end(dst_pixels_c, kSize);
4407*4e366538SXin Li
4408*4e366538SXin Li for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4409*4e366538SXin Li ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
4410*4e366538SXin Li }
4411*4e366538SXin Li memset(dst_pixels_opt, 1, kSize);
4412*4e366538SXin Li memset(dst_pixels_c, 2, kSize);
4413*4e366538SXin Li
4414*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
4415*4e366538SXin Li GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4416*4e366538SXin Li (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
4417*4e366538SXin Li benchmark_height_);
4418*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
4419*4e366538SXin Li
4420*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
4421*4e366538SXin Li GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4422*4e366538SXin Li (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
4423*4e366538SXin Li benchmark_height_);
4424*4e366538SXin Li }
4425*4e366538SXin Li for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4426*4e366538SXin Li EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
4427*4e366538SXin Li << i;
4428*4e366538SXin Li }
4429*4e366538SXin Li
4430*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_c);
4431*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_opt);
4432*4e366538SXin Li free_aligned_buffer_page_end(orig_pixels);
4433*4e366538SXin Li }
4434*4e366538SXin Li
TEST_F(LibYUVPlanarTest,HalfMergeUVPlane_Opt)4435*4e366538SXin Li TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
4436*4e366538SXin Li int dst_width = (benchmark_width_ + 1) / 2;
4437*4e366538SXin Li int dst_height = (benchmark_height_ + 1) / 2;
4438*4e366538SXin Li align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
4439*4e366538SXin Li align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
4440*4e366538SXin Li align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
4441*4e366538SXin Li align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
4442*4e366538SXin Li align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4443*4e366538SXin Li align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
4444*4e366538SXin Li
4445*4e366538SXin Li MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
4446*4e366538SXin Li MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
4447*4e366538SXin Li MemRandomize(tmp_pixels_u, dst_width * dst_height);
4448*4e366538SXin Li MemRandomize(tmp_pixels_v, dst_width * dst_height);
4449*4e366538SXin Li MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4450*4e366538SXin Li MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
4451*4e366538SXin Li
4452*4e366538SXin Li MaskCpuFlags(disable_cpu_flags_);
4453*4e366538SXin Li HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4454*4e366538SXin Li benchmark_width_, dst_pixels_uv_c, dst_width * 2,
4455*4e366538SXin Li benchmark_width_, benchmark_height_);
4456*4e366538SXin Li MaskCpuFlags(benchmark_cpu_info_);
4457*4e366538SXin Li
4458*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
4459*4e366538SXin Li HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4460*4e366538SXin Li benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
4461*4e366538SXin Li benchmark_width_, benchmark_height_);
4462*4e366538SXin Li }
4463*4e366538SXin Li
4464*4e366538SXin Li for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
4465*4e366538SXin Li EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
4466*4e366538SXin Li }
4467*4e366538SXin Li
4468*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_u);
4469*4e366538SXin Li free_aligned_buffer_page_end(src_pixels_v);
4470*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_u);
4471*4e366538SXin Li free_aligned_buffer_page_end(tmp_pixels_v);
4472*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_uv_opt);
4473*4e366538SXin Li free_aligned_buffer_page_end(dst_pixels_uv_c);
4474*4e366538SXin Li }
4475*4e366538SXin Li
TEST_F(LibYUVPlanarTest,NV12Copy)4476*4e366538SXin Li TEST_F(LibYUVPlanarTest, NV12Copy) {
4477*4e366538SXin Li const int halfwidth = (benchmark_width_ + 1) >> 1;
4478*4e366538SXin Li const int halfheight = (benchmark_height_ + 1) >> 1;
4479*4e366538SXin Li align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4480*4e366538SXin Li align_buffer_page_end(src_uv, halfwidth * 2 * halfheight);
4481*4e366538SXin Li align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4482*4e366538SXin Li align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight);
4483*4e366538SXin Li
4484*4e366538SXin Li MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4485*4e366538SXin Li MemRandomize(src_uv, halfwidth * 2 * halfheight);
4486*4e366538SXin Li MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4487*4e366538SXin Li MemRandomize(dst_uv, halfwidth * 2 * halfheight);
4488*4e366538SXin Li
4489*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
4490*4e366538SXin Li NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y,
4491*4e366538SXin Li benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_,
4492*4e366538SXin Li benchmark_height_);
4493*4e366538SXin Li }
4494*4e366538SXin Li
4495*4e366538SXin Li for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4496*4e366538SXin Li EXPECT_EQ(src_y[i], dst_y[i]);
4497*4e366538SXin Li }
4498*4e366538SXin Li for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4499*4e366538SXin Li EXPECT_EQ(src_uv[i], dst_uv[i]);
4500*4e366538SXin Li }
4501*4e366538SXin Li
4502*4e366538SXin Li free_aligned_buffer_page_end(src_y);
4503*4e366538SXin Li free_aligned_buffer_page_end(src_uv);
4504*4e366538SXin Li free_aligned_buffer_page_end(dst_y);
4505*4e366538SXin Li free_aligned_buffer_page_end(dst_uv);
4506*4e366538SXin Li }
4507*4e366538SXin Li
TEST_F(LibYUVPlanarTest,NV21Copy)4508*4e366538SXin Li TEST_F(LibYUVPlanarTest, NV21Copy) {
4509*4e366538SXin Li const int halfwidth = (benchmark_width_ + 1) >> 1;
4510*4e366538SXin Li const int halfheight = (benchmark_height_ + 1) >> 1;
4511*4e366538SXin Li align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4512*4e366538SXin Li align_buffer_page_end(src_vu, halfwidth * 2 * halfheight);
4513*4e366538SXin Li align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4514*4e366538SXin Li align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight);
4515*4e366538SXin Li
4516*4e366538SXin Li MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4517*4e366538SXin Li MemRandomize(src_vu, halfwidth * 2 * halfheight);
4518*4e366538SXin Li MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4519*4e366538SXin Li MemRandomize(dst_vu, halfwidth * 2 * halfheight);
4520*4e366538SXin Li
4521*4e366538SXin Li for (int i = 0; i < benchmark_iterations_; ++i) {
4522*4e366538SXin Li NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y,
4523*4e366538SXin Li benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_,
4524*4e366538SXin Li benchmark_height_);
4525*4e366538SXin Li }
4526*4e366538SXin Li
4527*4e366538SXin Li for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4528*4e366538SXin Li EXPECT_EQ(src_y[i], dst_y[i]);
4529*4e366538SXin Li }
4530*4e366538SXin Li for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4531*4e366538SXin Li EXPECT_EQ(src_vu[i], dst_vu[i]);
4532*4e366538SXin Li }
4533*4e366538SXin Li
4534*4e366538SXin Li free_aligned_buffer_page_end(src_y);
4535*4e366538SXin Li free_aligned_buffer_page_end(src_vu);
4536*4e366538SXin Li free_aligned_buffer_page_end(dst_y);
4537*4e366538SXin Li free_aligned_buffer_page_end(dst_vu);
4538*4e366538SXin Li }
4539*4e366538SXin Li
4540*4e366538SXin Li #if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \
4541*4e366538SXin Li defined(__aarch64__)
4542*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestConvertFP16ToFP32)4543*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) {
4544*4e366538SXin Li int i, j;
4545*4e366538SXin Li const int y_plane_size = benchmark_width_ * benchmark_height_;
4546*4e366538SXin Li
4547*4e366538SXin Li align_buffer_page_end(orig_f, y_plane_size * 4);
4548*4e366538SXin Li align_buffer_page_end(orig_y, y_plane_size * 2);
4549*4e366538SXin Li align_buffer_page_end(dst_opt, y_plane_size * 4);
4550*4e366538SXin Li align_buffer_page_end(rec_opt, y_plane_size * 2);
4551*4e366538SXin Li
4552*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
4553*4e366538SXin Li ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
4554*4e366538SXin Li }
4555*4e366538SXin Li memset(orig_y, 1, y_plane_size * 2);
4556*4e366538SXin Li memset(dst_opt, 2, y_plane_size * 4);
4557*4e366538SXin Li memset(rec_opt, 3, y_plane_size * 2);
4558*4e366538SXin Li
4559*4e366538SXin Li ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
4560*4e366538SXin Li y_plane_size);
4561*4e366538SXin Li
4562*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
4563*4e366538SXin Li ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt,
4564*4e366538SXin Li y_plane_size);
4565*4e366538SXin Li }
4566*4e366538SXin Li
4567*4e366538SXin Li ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
4568*4e366538SXin Li y_plane_size);
4569*4e366538SXin Li
4570*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
4571*4e366538SXin Li EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
4572*4e366538SXin Li }
4573*4e366538SXin Li
4574*4e366538SXin Li free_aligned_buffer_page_end(orig_f);
4575*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
4576*4e366538SXin Li free_aligned_buffer_page_end(dst_opt);
4577*4e366538SXin Li free_aligned_buffer_page_end(rec_opt);
4578*4e366538SXin Li }
4579*4e366538SXin Li
TEST_F(LibYUVPlanarTest,TestConvertFP16ToFP32Column)4580*4e366538SXin Li TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) {
4581*4e366538SXin Li int i, j;
4582*4e366538SXin Li const int y_plane_size = benchmark_width_ * benchmark_height_;
4583*4e366538SXin Li
4584*4e366538SXin Li align_buffer_page_end(orig_f, y_plane_size * 4);
4585*4e366538SXin Li align_buffer_page_end(orig_y, y_plane_size * 2);
4586*4e366538SXin Li align_buffer_page_end(dst_opt, y_plane_size * 4);
4587*4e366538SXin Li align_buffer_page_end(rec_opt, y_plane_size * 2);
4588*4e366538SXin Li
4589*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
4590*4e366538SXin Li ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
4591*4e366538SXin Li }
4592*4e366538SXin Li memset(orig_y, 1, y_plane_size * 2);
4593*4e366538SXin Li memset(dst_opt, 2, y_plane_size * 4);
4594*4e366538SXin Li memset(rec_opt, 3, y_plane_size * 2);
4595*4e366538SXin Li
4596*4e366538SXin Li ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
4597*4e366538SXin Li y_plane_size);
4598*4e366538SXin Li
4599*4e366538SXin Li for (j = 0; j < benchmark_iterations_; j++) {
4600*4e366538SXin Li ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt,
4601*4e366538SXin Li y_plane_size);
4602*4e366538SXin Li }
4603*4e366538SXin Li
4604*4e366538SXin Li ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
4605*4e366538SXin Li y_plane_size);
4606*4e366538SXin Li
4607*4e366538SXin Li for (i = 0; i < y_plane_size; ++i) {
4608*4e366538SXin Li EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
4609*4e366538SXin Li }
4610*4e366538SXin Li
4611*4e366538SXin Li free_aligned_buffer_page_end(orig_f);
4612*4e366538SXin Li free_aligned_buffer_page_end(orig_y);
4613*4e366538SXin Li free_aligned_buffer_page_end(dst_opt);
4614*4e366538SXin Li free_aligned_buffer_page_end(rec_opt);
4615*4e366538SXin Li }
4616*4e366538SXin Li
4617*4e366538SXin Li #endif // defined(ENABLE_ROW_TESTS) && defined(__aarch64__)
4618*4e366538SXin Li
4619*4e366538SXin Li } // namespace libyuv
4620