xref: /aosp_15_r20/external/libyuv/unit_test/planar_test.cc (revision 4e366538070a3a6c5c163c31b791eab742e1657a)
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <time.h>
14 
15 #include "../unit_test/unit_test.h"
16 #include "libyuv/compare.h"
17 #include "libyuv/convert.h"
18 #include "libyuv/convert_argb.h"
19 #include "libyuv/convert_from.h"
20 #include "libyuv/convert_from_argb.h"
21 #include "libyuv/cpu_id.h"
22 #include "libyuv/planar_functions.h"
23 #include "libyuv/rotate.h"
24 #include "libyuv/scale.h"
25 
26 #ifdef ENABLE_ROW_TESTS
27 // row.h defines SIMD_ALIGNED, overriding unit_test.h
28 // TODO(fbarchard): Remove row.h from unittests.  Test public functions.
29 #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
30 #endif
31 
32 #if defined(LIBYUV_BIT_EXACT)
33 #define EXPECTED_UNATTENUATE_DIFF 0
34 #else
35 #define EXPECTED_UNATTENUATE_DIFF 2
36 #endif
37 
38 namespace libyuv {
39 
TEST_F(LibYUVPlanarTest,TestAttenuate)40 TEST_F(LibYUVPlanarTest, TestAttenuate) {
41   const int kSize = 1280 * 4;
42   align_buffer_page_end(orig_pixels, kSize);
43   align_buffer_page_end(atten_pixels, kSize);
44   align_buffer_page_end(unatten_pixels, kSize);
45   align_buffer_page_end(atten2_pixels, kSize);
46 
47   // Test unattenuation clamps
48   orig_pixels[0 * 4 + 0] = 200u;
49   orig_pixels[0 * 4 + 1] = 129u;
50   orig_pixels[0 * 4 + 2] = 127u;
51   orig_pixels[0 * 4 + 3] = 128u;
52   // Test unattenuation transparent and opaque are unaffected
53   orig_pixels[1 * 4 + 0] = 16u;
54   orig_pixels[1 * 4 + 1] = 64u;
55   orig_pixels[1 * 4 + 2] = 192u;
56   orig_pixels[1 * 4 + 3] = 0u;
57   orig_pixels[2 * 4 + 0] = 16u;
58   orig_pixels[2 * 4 + 1] = 64u;
59   orig_pixels[2 * 4 + 2] = 192u;
60   orig_pixels[2 * 4 + 3] = 128u;
61   orig_pixels[3 * 4 + 0] = 16u;
62   orig_pixels[3 * 4 + 1] = 64u;
63   orig_pixels[3 * 4 + 2] = 192u;
64   orig_pixels[3 * 4 + 3] = 255u;
65   orig_pixels[4 * 4 + 0] = 255u;
66   orig_pixels[4 * 4 + 1] = 255u;
67   orig_pixels[4 * 4 + 2] = 255u;
68   orig_pixels[4 * 4 + 3] = 255u;
69 
70   ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1);
71   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
72   EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
73   EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
74   EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
75   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
76   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
77   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
78   EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
79   EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]);
80   EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]);
81   EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]);
82   EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]);
83   EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]);
84   EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]);
85   EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]);
86   EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]);
87   EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]);
88   EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]);
89   EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]);
90   EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]);
91 
92   ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1);
93   EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]);
94   EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]);
95   EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]);
96   EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]);
97   EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]);
98   EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]);
99   EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]);
100   EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]);
101   EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]);
102   EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]);
103   EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]);
104   EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]);
105   EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]);
106   EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]);
107   EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]);
108   EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]);
109   EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]);
110   EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]);
111   EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]);
112   EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]);
113 
114   // test 255
115   for (int i = 0; i < 256; ++i) {
116     orig_pixels[i * 4 + 0] = i;
117     orig_pixels[i * 4 + 1] = 0;
118     orig_pixels[i * 4 + 2] = 0;
119     orig_pixels[i * 4 + 3] = 255;
120   }
121   ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1);
122   for (int i = 0; i < 256; ++i) {
123     EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]);
124     EXPECT_EQ(0, atten_pixels[i * 4 + 1]);
125     EXPECT_EQ(0, atten_pixels[i * 4 + 2]);
126     EXPECT_EQ(255, atten_pixels[i * 4 + 3]);
127   }
128 
129   for (int i = 0; i < 1280; ++i) {
130     orig_pixels[i * 4 + 0] = i;
131     orig_pixels[i * 4 + 1] = i / 2;
132     orig_pixels[i * 4 + 2] = i / 3;
133     orig_pixels[i * 4 + 3] = i;
134   }
135   ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
136   ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
137   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
138     ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
139   }
140   for (int i = 0; i < 1280; ++i) {
141     EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1);
142     EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1);
143     EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1);
144     EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1);
145   }
146   // Make sure transparent, 50% and opaque are fully accurate.
147   EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
148   EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
149   EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
150   EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
151   EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
152   EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
153   EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
154   EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
155   EXPECT_EQ(255, atten_pixels[255 * 4 + 0]);
156   EXPECT_EQ(127, atten_pixels[255 * 4 + 1]);
157   EXPECT_EQ(85, atten_pixels[255 * 4 + 2]);
158   EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
159 
160   free_aligned_buffer_page_end(atten2_pixels);
161   free_aligned_buffer_page_end(unatten_pixels);
162   free_aligned_buffer_page_end(atten_pixels);
163   free_aligned_buffer_page_end(orig_pixels);
164 }
165 
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)166 static int TestAttenuateI(int width,
167                           int height,
168                           int benchmark_iterations,
169                           int disable_cpu_flags,
170                           int benchmark_cpu_info,
171                           int invert,
172                           int off) {
173   if (width < 1) {
174     width = 1;
175   }
176   const int kBpp = 4;
177   const int kStride = width * kBpp;
178   align_buffer_page_end(src_argb, kStride * height + off);
179   align_buffer_page_end(dst_argb_c, kStride * height);
180   align_buffer_page_end(dst_argb_opt, kStride * height);
181   for (int i = 0; i < kStride * height; ++i) {
182     src_argb[i + off] = (fastrand() & 0xff);
183   }
184   memset(dst_argb_c, 0, kStride * height);
185   memset(dst_argb_opt, 0, kStride * height);
186 
187   MaskCpuFlags(disable_cpu_flags);
188   ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
189                 invert * height);
190   MaskCpuFlags(benchmark_cpu_info);
191   for (int i = 0; i < benchmark_iterations; ++i) {
192     ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
193                   invert * height);
194   }
195   int max_diff = 0;
196   for (int i = 0; i < kStride * height; ++i) {
197     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
198                        static_cast<int>(dst_argb_opt[i]));
199     if (abs_diff > max_diff) {
200       max_diff = abs_diff;
201     }
202   }
203   free_aligned_buffer_page_end(src_argb);
204   free_aligned_buffer_page_end(dst_argb_c);
205   free_aligned_buffer_page_end(dst_argb_opt);
206   return max_diff;
207 }
208 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)209 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
210   int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
211                                 benchmark_iterations_, disable_cpu_flags_,
212                                 benchmark_cpu_info_, +1, 0);
213 
214   EXPECT_EQ(max_diff, 0);
215 }
216 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)217 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
218   int max_diff =
219       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
220                      disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
221   EXPECT_EQ(max_diff, 0);
222 }
223 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)224 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
225   int max_diff =
226       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
227                      disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
228   EXPECT_EQ(max_diff, 0);
229 }
230 
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)231 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
232   int max_diff =
233       TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
234                      disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
235   EXPECT_EQ(max_diff, 0);
236 }
237 
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)238 static int TestUnattenuateI(int width,
239                             int height,
240                             int benchmark_iterations,
241                             int disable_cpu_flags,
242                             int benchmark_cpu_info,
243                             int invert,
244                             int off) {
245   if (width < 1) {
246     width = 1;
247   }
248   const int kBpp = 4;
249   const int kStride = width * kBpp;
250   align_buffer_page_end(src_argb, kStride * height + off);
251   align_buffer_page_end(dst_argb_c, kStride * height);
252   align_buffer_page_end(dst_argb_opt, kStride * height);
253   for (int i = 0; i < kStride * height; ++i) {
254     src_argb[i + off] = (fastrand() & 0xff);
255   }
256   ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
257                 height);
258   memset(dst_argb_c, 0, kStride * height);
259   memset(dst_argb_opt, 0, kStride * height);
260 
261   MaskCpuFlags(disable_cpu_flags);
262   ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
263                   invert * height);
264   MaskCpuFlags(benchmark_cpu_info);
265   for (int i = 0; i < benchmark_iterations; ++i) {
266     ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
267                     invert * height);
268   }
269   int max_diff = 0;
270   for (int i = 0; i < kStride * height; ++i) {
271     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
272                        static_cast<int>(dst_argb_opt[i]));
273     if (abs_diff > max_diff) {
274       max_diff = abs_diff;
275     }
276   }
277   free_aligned_buffer_page_end(src_argb);
278   free_aligned_buffer_page_end(dst_argb_c);
279   free_aligned_buffer_page_end(dst_argb_opt);
280   return max_diff;
281 }
282 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)283 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
284   int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
285                                   benchmark_iterations_, disable_cpu_flags_,
286                                   benchmark_cpu_info_, +1, 0);
287   EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
288 }
289 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)290 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
291   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
292                                   benchmark_iterations_, disable_cpu_flags_,
293                                   benchmark_cpu_info_, +1, 1);
294   EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
295 }
296 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)297 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
298   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
299                                   benchmark_iterations_, disable_cpu_flags_,
300                                   benchmark_cpu_info_, -1, 0);
301   EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
302 }
303 
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)304 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
305   int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
306                                   benchmark_iterations_, disable_cpu_flags_,
307                                   benchmark_cpu_info_, +1, 0);
308   EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
309 }
310 
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)311 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
312   SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
313   SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
314 
315   for (int y = 0; y < 16; ++y) {
316     for (int x = 0; x < 16; ++x) {
317       orig_pixels[y][x][0] = 1u;
318       orig_pixels[y][x][1] = 2u;
319       orig_pixels[y][x][2] = 3u;
320       orig_pixels[y][x][3] = 255u;
321     }
322   }
323 
324   ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
325                            &added_pixels[0][0][0], 16 * 4, 16, 16);
326 
327   for (int y = 0; y < 16; ++y) {
328     for (int x = 0; x < 16; ++x) {
329       EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
330       EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
331       EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
332       EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
333     }
334   }
335 }
336 
337 // near is for legacy platforms.
TEST_F(LibYUVPlanarTest,TestARGBGray)338 TEST_F(LibYUVPlanarTest, TestARGBGray) {
339   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
340   memset(orig_pixels, 0, sizeof(orig_pixels));
341 
342   // Test blue
343   orig_pixels[0][0] = 255u;
344   orig_pixels[0][1] = 0u;
345   orig_pixels[0][2] = 0u;
346   orig_pixels[0][3] = 128u;
347   // Test green
348   orig_pixels[1][0] = 0u;
349   orig_pixels[1][1] = 255u;
350   orig_pixels[1][2] = 0u;
351   orig_pixels[1][3] = 0u;
352   // Test red
353   orig_pixels[2][0] = 0u;
354   orig_pixels[2][1] = 0u;
355   orig_pixels[2][2] = 255u;
356   orig_pixels[2][3] = 255u;
357   // Test black
358   orig_pixels[3][0] = 0u;
359   orig_pixels[3][1] = 0u;
360   orig_pixels[3][2] = 0u;
361   orig_pixels[3][3] = 255u;
362   // Test white
363   orig_pixels[4][0] = 255u;
364   orig_pixels[4][1] = 255u;
365   orig_pixels[4][2] = 255u;
366   orig_pixels[4][3] = 255u;
367   // Test color
368   orig_pixels[5][0] = 16u;
369   orig_pixels[5][1] = 64u;
370   orig_pixels[5][2] = 192u;
371   orig_pixels[5][3] = 224u;
372   // Do 16 to test asm version.
373   ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
374   EXPECT_NEAR(29u, orig_pixels[0][0], 1);
375   EXPECT_NEAR(29u, orig_pixels[0][1], 1);
376   EXPECT_NEAR(29u, orig_pixels[0][2], 1);
377   EXPECT_EQ(128u, orig_pixels[0][3]);
378   EXPECT_EQ(149u, orig_pixels[1][0]);
379   EXPECT_EQ(149u, orig_pixels[1][1]);
380   EXPECT_EQ(149u, orig_pixels[1][2]);
381   EXPECT_EQ(0u, orig_pixels[1][3]);
382   EXPECT_NEAR(77u, orig_pixels[2][0], 1);
383   EXPECT_NEAR(77u, orig_pixels[2][1], 1);
384   EXPECT_NEAR(77u, orig_pixels[2][2], 1);
385   EXPECT_EQ(255u, orig_pixels[2][3]);
386   EXPECT_EQ(0u, orig_pixels[3][0]);
387   EXPECT_EQ(0u, orig_pixels[3][1]);
388   EXPECT_EQ(0u, orig_pixels[3][2]);
389   EXPECT_EQ(255u, orig_pixels[3][3]);
390   EXPECT_EQ(255u, orig_pixels[4][0]);
391   EXPECT_EQ(255u, orig_pixels[4][1]);
392   EXPECT_EQ(255u, orig_pixels[4][2]);
393   EXPECT_EQ(255u, orig_pixels[4][3]);
394   EXPECT_NEAR(97u, orig_pixels[5][0], 1);
395   EXPECT_NEAR(97u, orig_pixels[5][1], 1);
396   EXPECT_NEAR(97u, orig_pixels[5][2], 1);
397   EXPECT_EQ(224u, orig_pixels[5][3]);
398   for (int i = 0; i < 1280; ++i) {
399     orig_pixels[i][0] = i;
400     orig_pixels[i][1] = i / 2;
401     orig_pixels[i][2] = i / 3;
402     orig_pixels[i][3] = i;
403   }
404   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
405     ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
406   }
407 }
408 
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)409 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
410   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
411   SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
412   memset(orig_pixels, 0, sizeof(orig_pixels));
413 
414   // Test blue
415   orig_pixels[0][0] = 255u;
416   orig_pixels[0][1] = 0u;
417   orig_pixels[0][2] = 0u;
418   orig_pixels[0][3] = 128u;
419   // Test green
420   orig_pixels[1][0] = 0u;
421   orig_pixels[1][1] = 255u;
422   orig_pixels[1][2] = 0u;
423   orig_pixels[1][3] = 0u;
424   // Test red
425   orig_pixels[2][0] = 0u;
426   orig_pixels[2][1] = 0u;
427   orig_pixels[2][2] = 255u;
428   orig_pixels[2][3] = 255u;
429   // Test black
430   orig_pixels[3][0] = 0u;
431   orig_pixels[3][1] = 0u;
432   orig_pixels[3][2] = 0u;
433   orig_pixels[3][3] = 255u;
434   // Test white
435   orig_pixels[4][0] = 255u;
436   orig_pixels[4][1] = 255u;
437   orig_pixels[4][2] = 255u;
438   orig_pixels[4][3] = 255u;
439   // Test color
440   orig_pixels[5][0] = 16u;
441   orig_pixels[5][1] = 64u;
442   orig_pixels[5][2] = 192u;
443   orig_pixels[5][3] = 224u;
444   // Do 16 to test asm version.
445   ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
446   EXPECT_NEAR(30u, gray_pixels[0][0], 1);
447   EXPECT_NEAR(30u, gray_pixels[0][1], 1);
448   EXPECT_NEAR(30u, gray_pixels[0][2], 1);
449   EXPECT_NEAR(128u, gray_pixels[0][3], 1);
450   EXPECT_NEAR(149u, gray_pixels[1][0], 1);
451   EXPECT_NEAR(149u, gray_pixels[1][1], 1);
452   EXPECT_NEAR(149u, gray_pixels[1][2], 1);
453   EXPECT_NEAR(0u, gray_pixels[1][3], 1);
454   EXPECT_NEAR(76u, gray_pixels[2][0], 1);
455   EXPECT_NEAR(76u, gray_pixels[2][1], 1);
456   EXPECT_NEAR(76u, gray_pixels[2][2], 1);
457   EXPECT_NEAR(255u, gray_pixels[2][3], 1);
458   EXPECT_NEAR(0u, gray_pixels[3][0], 1);
459   EXPECT_NEAR(0u, gray_pixels[3][1], 1);
460   EXPECT_NEAR(0u, gray_pixels[3][2], 1);
461   EXPECT_NEAR(255u, gray_pixels[3][3], 1);
462   EXPECT_NEAR(255u, gray_pixels[4][0], 1);
463   EXPECT_NEAR(255u, gray_pixels[4][1], 1);
464   EXPECT_NEAR(255u, gray_pixels[4][2], 1);
465   EXPECT_NEAR(255u, gray_pixels[4][3], 1);
466   EXPECT_NEAR(96u, gray_pixels[5][0], 1);
467   EXPECT_NEAR(96u, gray_pixels[5][1], 1);
468   EXPECT_NEAR(96u, gray_pixels[5][2], 1);
469   EXPECT_NEAR(224u, gray_pixels[5][3], 1);
470   for (int i = 0; i < 1280; ++i) {
471     orig_pixels[i][0] = i;
472     orig_pixels[i][1] = i / 2;
473     orig_pixels[i][2] = i / 3;
474     orig_pixels[i][3] = i;
475   }
476   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
477     ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
478   }
479 
480   for (int i = 0; i < 256; ++i) {
481     orig_pixels[i][0] = i;
482     orig_pixels[i][1] = i;
483     orig_pixels[i][2] = i;
484     orig_pixels[i][3] = i;
485   }
486   ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
487   for (int i = 0; i < 256; ++i) {
488     EXPECT_EQ(i, orig_pixels[i][0]);
489     EXPECT_EQ(i, orig_pixels[i][1]);
490     EXPECT_EQ(i, orig_pixels[i][2]);
491     EXPECT_EQ(i, orig_pixels[i][3]);
492   }
493 }
494 
TEST_F(LibYUVPlanarTest,TestARGBSepia)495 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
496   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
497   memset(orig_pixels, 0, sizeof(orig_pixels));
498 
499   // Test blue
500   orig_pixels[0][0] = 255u;
501   orig_pixels[0][1] = 0u;
502   orig_pixels[0][2] = 0u;
503   orig_pixels[0][3] = 128u;
504   // Test green
505   orig_pixels[1][0] = 0u;
506   orig_pixels[1][1] = 255u;
507   orig_pixels[1][2] = 0u;
508   orig_pixels[1][3] = 0u;
509   // Test red
510   orig_pixels[2][0] = 0u;
511   orig_pixels[2][1] = 0u;
512   orig_pixels[2][2] = 255u;
513   orig_pixels[2][3] = 255u;
514   // Test black
515   orig_pixels[3][0] = 0u;
516   orig_pixels[3][1] = 0u;
517   orig_pixels[3][2] = 0u;
518   orig_pixels[3][3] = 255u;
519   // Test white
520   orig_pixels[4][0] = 255u;
521   orig_pixels[4][1] = 255u;
522   orig_pixels[4][2] = 255u;
523   orig_pixels[4][3] = 255u;
524   // Test color
525   orig_pixels[5][0] = 16u;
526   orig_pixels[5][1] = 64u;
527   orig_pixels[5][2] = 192u;
528   orig_pixels[5][3] = 224u;
529   // Do 16 to test asm version.
530   ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
531   EXPECT_EQ(33u, orig_pixels[0][0]);
532   EXPECT_EQ(43u, orig_pixels[0][1]);
533   EXPECT_EQ(47u, orig_pixels[0][2]);
534   EXPECT_EQ(128u, orig_pixels[0][3]);
535   EXPECT_EQ(135u, orig_pixels[1][0]);
536   EXPECT_EQ(175u, orig_pixels[1][1]);
537   EXPECT_EQ(195u, orig_pixels[1][2]);
538   EXPECT_EQ(0u, orig_pixels[1][3]);
539   EXPECT_EQ(69u, orig_pixels[2][0]);
540   EXPECT_EQ(89u, orig_pixels[2][1]);
541   EXPECT_EQ(99u, orig_pixels[2][2]);
542   EXPECT_EQ(255u, orig_pixels[2][3]);
543   EXPECT_EQ(0u, orig_pixels[3][0]);
544   EXPECT_EQ(0u, orig_pixels[3][1]);
545   EXPECT_EQ(0u, orig_pixels[3][2]);
546   EXPECT_EQ(255u, orig_pixels[3][3]);
547   EXPECT_EQ(239u, orig_pixels[4][0]);
548   EXPECT_EQ(255u, orig_pixels[4][1]);
549   EXPECT_EQ(255u, orig_pixels[4][2]);
550   EXPECT_EQ(255u, orig_pixels[4][3]);
551   EXPECT_EQ(88u, orig_pixels[5][0]);
552   EXPECT_EQ(114u, orig_pixels[5][1]);
553   EXPECT_EQ(127u, orig_pixels[5][2]);
554   EXPECT_EQ(224u, orig_pixels[5][3]);
555 
556   for (int i = 0; i < 1280; ++i) {
557     orig_pixels[i][0] = i;
558     orig_pixels[i][1] = i / 2;
559     orig_pixels[i][2] = i / 3;
560     orig_pixels[i][3] = i;
561   }
562   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
563     ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
564   }
565 }
566 
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)567 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
568   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
569   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
570   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
571 
572   // Matrix for Sepia.
573   SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
574       17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
575       24 / 2, 98 / 2, 50 / 2, 0, 0,      0,      0,      64,  // Copy alpha.
576   };
577   memset(orig_pixels, 0, sizeof(orig_pixels));
578 
579   // Test blue
580   orig_pixels[0][0] = 255u;
581   orig_pixels[0][1] = 0u;
582   orig_pixels[0][2] = 0u;
583   orig_pixels[0][3] = 128u;
584   // Test green
585   orig_pixels[1][0] = 0u;
586   orig_pixels[1][1] = 255u;
587   orig_pixels[1][2] = 0u;
588   orig_pixels[1][3] = 0u;
589   // Test red
590   orig_pixels[2][0] = 0u;
591   orig_pixels[2][1] = 0u;
592   orig_pixels[2][2] = 255u;
593   orig_pixels[2][3] = 255u;
594   // Test color
595   orig_pixels[3][0] = 16u;
596   orig_pixels[3][1] = 64u;
597   orig_pixels[3][2] = 192u;
598   orig_pixels[3][3] = 224u;
599   // Do 16 to test asm version.
600   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
601                   &kRGBToSepia[0], 16, 1);
602   EXPECT_EQ(31u, dst_pixels_opt[0][0]);
603   EXPECT_EQ(43u, dst_pixels_opt[0][1]);
604   EXPECT_EQ(47u, dst_pixels_opt[0][2]);
605   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
606   EXPECT_EQ(135u, dst_pixels_opt[1][0]);
607   EXPECT_EQ(175u, dst_pixels_opt[1][1]);
608   EXPECT_EQ(195u, dst_pixels_opt[1][2]);
609   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
610   EXPECT_EQ(67u, dst_pixels_opt[2][0]);
611   EXPECT_EQ(87u, dst_pixels_opt[2][1]);
612   EXPECT_EQ(99u, dst_pixels_opt[2][2]);
613   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
614   EXPECT_EQ(87u, dst_pixels_opt[3][0]);
615   EXPECT_EQ(112u, dst_pixels_opt[3][1]);
616   EXPECT_EQ(127u, dst_pixels_opt[3][2]);
617   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
618 
619   for (int i = 0; i < 1280; ++i) {
620     orig_pixels[i][0] = i;
621     orig_pixels[i][1] = i / 2;
622     orig_pixels[i][2] = i / 3;
623     orig_pixels[i][3] = i;
624   }
625   MaskCpuFlags(disable_cpu_flags_);
626   ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
627                   &kRGBToSepia[0], 1280, 1);
628   MaskCpuFlags(benchmark_cpu_info_);
629 
630   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
631     ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
632                     &kRGBToSepia[0], 1280, 1);
633   }
634 
635   for (int i = 0; i < 1280; ++i) {
636     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
637     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
638     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
639     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
640   }
641 }
642 
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)643 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
644   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
645 
646   // Matrix for Sepia.
647   SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
648       17, 68, 35, 0, 22, 88, 45, 0,
649       24, 98, 50, 0, 0,  0,  0,  0,  // Unused but makes matrix 16 bytes.
650   };
651   memset(orig_pixels, 0, sizeof(orig_pixels));
652 
653   // Test blue
654   orig_pixels[0][0] = 255u;
655   orig_pixels[0][1] = 0u;
656   orig_pixels[0][2] = 0u;
657   orig_pixels[0][3] = 128u;
658   // Test green
659   orig_pixels[1][0] = 0u;
660   orig_pixels[1][1] = 255u;
661   orig_pixels[1][2] = 0u;
662   orig_pixels[1][3] = 0u;
663   // Test red
664   orig_pixels[2][0] = 0u;
665   orig_pixels[2][1] = 0u;
666   orig_pixels[2][2] = 255u;
667   orig_pixels[2][3] = 255u;
668   // Test color
669   orig_pixels[3][0] = 16u;
670   orig_pixels[3][1] = 64u;
671   orig_pixels[3][2] = 192u;
672   orig_pixels[3][3] = 224u;
673   // Do 16 to test asm version.
674   RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
675   EXPECT_EQ(31u, orig_pixels[0][0]);
676   EXPECT_EQ(43u, orig_pixels[0][1]);
677   EXPECT_EQ(47u, orig_pixels[0][2]);
678   EXPECT_EQ(128u, orig_pixels[0][3]);
679   EXPECT_EQ(135u, orig_pixels[1][0]);
680   EXPECT_EQ(175u, orig_pixels[1][1]);
681   EXPECT_EQ(195u, orig_pixels[1][2]);
682   EXPECT_EQ(0u, orig_pixels[1][3]);
683   EXPECT_EQ(67u, orig_pixels[2][0]);
684   EXPECT_EQ(87u, orig_pixels[2][1]);
685   EXPECT_EQ(99u, orig_pixels[2][2]);
686   EXPECT_EQ(255u, orig_pixels[2][3]);
687   EXPECT_EQ(87u, orig_pixels[3][0]);
688   EXPECT_EQ(112u, orig_pixels[3][1]);
689   EXPECT_EQ(127u, orig_pixels[3][2]);
690   EXPECT_EQ(224u, orig_pixels[3][3]);
691 
692   for (int i = 0; i < 1280; ++i) {
693     orig_pixels[i][0] = i;
694     orig_pixels[i][1] = i / 2;
695     orig_pixels[i][2] = i / 3;
696     orig_pixels[i][3] = i;
697   }
698   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
699     RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
700   }
701 }
702 
TEST_F(LibYUVPlanarTest,TestARGBColorTable)703 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
704   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
705   memset(orig_pixels, 0, sizeof(orig_pixels));
706 
707   // Matrix for Sepia.
708   static const uint8_t kARGBTable[256 * 4] = {
709       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
710   };
711 
712   orig_pixels[0][0] = 0u;
713   orig_pixels[0][1] = 0u;
714   orig_pixels[0][2] = 0u;
715   orig_pixels[0][3] = 0u;
716   orig_pixels[1][0] = 1u;
717   orig_pixels[1][1] = 1u;
718   orig_pixels[1][2] = 1u;
719   orig_pixels[1][3] = 1u;
720   orig_pixels[2][0] = 2u;
721   orig_pixels[2][1] = 2u;
722   orig_pixels[2][2] = 2u;
723   orig_pixels[2][3] = 2u;
724   orig_pixels[3][0] = 0u;
725   orig_pixels[3][1] = 1u;
726   orig_pixels[3][2] = 2u;
727   orig_pixels[3][3] = 3u;
728   // Do 16 to test asm version.
729   ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
730   EXPECT_EQ(1u, orig_pixels[0][0]);
731   EXPECT_EQ(2u, orig_pixels[0][1]);
732   EXPECT_EQ(3u, orig_pixels[0][2]);
733   EXPECT_EQ(4u, orig_pixels[0][3]);
734   EXPECT_EQ(5u, orig_pixels[1][0]);
735   EXPECT_EQ(6u, orig_pixels[1][1]);
736   EXPECT_EQ(7u, orig_pixels[1][2]);
737   EXPECT_EQ(8u, orig_pixels[1][3]);
738   EXPECT_EQ(9u, orig_pixels[2][0]);
739   EXPECT_EQ(10u, orig_pixels[2][1]);
740   EXPECT_EQ(11u, orig_pixels[2][2]);
741   EXPECT_EQ(12u, orig_pixels[2][3]);
742   EXPECT_EQ(1u, orig_pixels[3][0]);
743   EXPECT_EQ(6u, orig_pixels[3][1]);
744   EXPECT_EQ(11u, orig_pixels[3][2]);
745   EXPECT_EQ(16u, orig_pixels[3][3]);
746 
747   for (int i = 0; i < 1280; ++i) {
748     orig_pixels[i][0] = i;
749     orig_pixels[i][1] = i / 2;
750     orig_pixels[i][2] = i / 3;
751     orig_pixels[i][3] = i;
752   }
753   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
754     ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
755   }
756 }
757 
758 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)759 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
760   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
761   memset(orig_pixels, 0, sizeof(orig_pixels));
762 
763   // Matrix for Sepia.
764   static const uint8_t kARGBTable[256 * 4] = {
765       1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
766   };
767 
768   orig_pixels[0][0] = 0u;
769   orig_pixels[0][1] = 0u;
770   orig_pixels[0][2] = 0u;
771   orig_pixels[0][3] = 0u;
772   orig_pixels[1][0] = 1u;
773   orig_pixels[1][1] = 1u;
774   orig_pixels[1][2] = 1u;
775   orig_pixels[1][3] = 1u;
776   orig_pixels[2][0] = 2u;
777   orig_pixels[2][1] = 2u;
778   orig_pixels[2][2] = 2u;
779   orig_pixels[2][3] = 2u;
780   orig_pixels[3][0] = 0u;
781   orig_pixels[3][1] = 1u;
782   orig_pixels[3][2] = 2u;
783   orig_pixels[3][3] = 3u;
784   // Do 16 to test asm version.
785   RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
786   EXPECT_EQ(1u, orig_pixels[0][0]);
787   EXPECT_EQ(2u, orig_pixels[0][1]);
788   EXPECT_EQ(3u, orig_pixels[0][2]);
789   EXPECT_EQ(0u, orig_pixels[0][3]);  // Alpha unchanged.
790   EXPECT_EQ(5u, orig_pixels[1][0]);
791   EXPECT_EQ(6u, orig_pixels[1][1]);
792   EXPECT_EQ(7u, orig_pixels[1][2]);
793   EXPECT_EQ(1u, orig_pixels[1][3]);  // Alpha unchanged.
794   EXPECT_EQ(9u, orig_pixels[2][0]);
795   EXPECT_EQ(10u, orig_pixels[2][1]);
796   EXPECT_EQ(11u, orig_pixels[2][2]);
797   EXPECT_EQ(2u, orig_pixels[2][3]);  // Alpha unchanged.
798   EXPECT_EQ(1u, orig_pixels[3][0]);
799   EXPECT_EQ(6u, orig_pixels[3][1]);
800   EXPECT_EQ(11u, orig_pixels[3][2]);
801   EXPECT_EQ(3u, orig_pixels[3][3]);  // Alpha unchanged.
802 
803   for (int i = 0; i < 1280; ++i) {
804     orig_pixels[i][0] = i;
805     orig_pixels[i][1] = i / 2;
806     orig_pixels[i][2] = i / 3;
807     orig_pixels[i][3] = i;
808   }
809   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
810     RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
811   }
812 }
813 
TEST_F(LibYUVPlanarTest,TestARGBQuantize)814 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
815   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
816 
817   for (int i = 0; i < 1280; ++i) {
818     orig_pixels[i][0] = i;
819     orig_pixels[i][1] = i / 2;
820     orig_pixels[i][2] = i / 3;
821     orig_pixels[i][3] = i;
822   }
823   ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
824                1280, 1);
825 
826   for (int i = 0; i < 1280; ++i) {
827     EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
828     EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
829     EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
830     EXPECT_EQ(i & 255, orig_pixels[i][3]);
831   }
832   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
833     ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
834                  1280, 1);
835   }
836 }
837 
TEST_F(LibYUVPlanarTest,ARGBMirror_Opt)838 TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
839   align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
840   align_buffer_page_end(dst_pixels_opt,
841                         benchmark_width_ * benchmark_height_ * 4);
842   align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
843 
844   MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
845   MaskCpuFlags(disable_cpu_flags_);
846   ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
847              benchmark_width_ * 4, benchmark_width_, benchmark_height_);
848   MaskCpuFlags(benchmark_cpu_info_);
849 
850   for (int i = 0; i < benchmark_iterations_; ++i) {
851     ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
852                benchmark_width_ * 4, benchmark_width_, benchmark_height_);
853   }
854   for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
855     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
856   }
857   free_aligned_buffer_page_end(src_pixels);
858   free_aligned_buffer_page_end(dst_pixels_opt);
859   free_aligned_buffer_page_end(dst_pixels_c);
860 }
861 
TEST_F(LibYUVPlanarTest,MirrorPlane_Opt)862 TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
863   align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
864   align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
865   align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
866 
867   MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
868   MaskCpuFlags(disable_cpu_flags_);
869   MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
870               benchmark_width_, benchmark_height_);
871   MaskCpuFlags(benchmark_cpu_info_);
872 
873   for (int i = 0; i < benchmark_iterations_; ++i) {
874     MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
875                 benchmark_width_, benchmark_height_);
876   }
877   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
878     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
879   }
880   free_aligned_buffer_page_end(src_pixels);
881   free_aligned_buffer_page_end(dst_pixels_opt);
882   free_aligned_buffer_page_end(dst_pixels_c);
883 }
884 
TEST_F(LibYUVPlanarTest,MirrorUVPlane_Opt)885 TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
886   align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
887   align_buffer_page_end(dst_pixels_opt,
888                         benchmark_width_ * benchmark_height_ * 2);
889   align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
890 
891   MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
892   MaskCpuFlags(disable_cpu_flags_);
893   MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
894                 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
895   MaskCpuFlags(benchmark_cpu_info_);
896 
897   for (int i = 0; i < benchmark_iterations_; ++i) {
898     MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
899                   benchmark_width_ * 2, benchmark_width_, benchmark_height_);
900   }
901   for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
902     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
903   }
904   free_aligned_buffer_page_end(src_pixels);
905   free_aligned_buffer_page_end(dst_pixels_opt);
906   free_aligned_buffer_page_end(dst_pixels_c);
907 }
908 
TEST_F(LibYUVPlanarTest,TestShade)909 TEST_F(LibYUVPlanarTest, TestShade) {
910   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
911   SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
912   memset(orig_pixels, 0, sizeof(orig_pixels));
913 
914   orig_pixels[0][0] = 10u;
915   orig_pixels[0][1] = 20u;
916   orig_pixels[0][2] = 40u;
917   orig_pixels[0][3] = 80u;
918   orig_pixels[1][0] = 0u;
919   orig_pixels[1][1] = 0u;
920   orig_pixels[1][2] = 0u;
921   orig_pixels[1][3] = 255u;
922   orig_pixels[2][0] = 0u;
923   orig_pixels[2][1] = 0u;
924   orig_pixels[2][2] = 0u;
925   orig_pixels[2][3] = 0u;
926   orig_pixels[3][0] = 0u;
927   orig_pixels[3][1] = 0u;
928   orig_pixels[3][2] = 0u;
929   orig_pixels[3][3] = 0u;
930   // Do 8 pixels to allow opt version to be used.
931   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
932   EXPECT_EQ(10u, shade_pixels[0][0]);
933   EXPECT_EQ(20u, shade_pixels[0][1]);
934   EXPECT_EQ(40u, shade_pixels[0][2]);
935   EXPECT_EQ(40u, shade_pixels[0][3]);
936   EXPECT_EQ(0u, shade_pixels[1][0]);
937   EXPECT_EQ(0u, shade_pixels[1][1]);
938   EXPECT_EQ(0u, shade_pixels[1][2]);
939   EXPECT_EQ(128u, shade_pixels[1][3]);
940   EXPECT_EQ(0u, shade_pixels[2][0]);
941   EXPECT_EQ(0u, shade_pixels[2][1]);
942   EXPECT_EQ(0u, shade_pixels[2][2]);
943   EXPECT_EQ(0u, shade_pixels[2][3]);
944   EXPECT_EQ(0u, shade_pixels[3][0]);
945   EXPECT_EQ(0u, shade_pixels[3][1]);
946   EXPECT_EQ(0u, shade_pixels[3][2]);
947   EXPECT_EQ(0u, shade_pixels[3][3]);
948 
949   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
950   EXPECT_EQ(5u, shade_pixels[0][0]);
951   EXPECT_EQ(10u, shade_pixels[0][1]);
952   EXPECT_EQ(20u, shade_pixels[0][2]);
953   EXPECT_EQ(40u, shade_pixels[0][3]);
954 
955   ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
956   EXPECT_EQ(5u, shade_pixels[0][0]);
957   EXPECT_EQ(5u, shade_pixels[0][1]);
958   EXPECT_EQ(5u, shade_pixels[0][2]);
959   EXPECT_EQ(5u, shade_pixels[0][3]);
960 
961   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
962     ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
963               0x80808080);
964   }
965 }
966 
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)967 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
968   SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
969   SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
970   SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
971   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
972   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
973 
974   orig_pixels_0[0][0] = 16u;
975   orig_pixels_0[0][1] = 32u;
976   orig_pixels_0[0][2] = 64u;
977   orig_pixels_0[0][3] = 128u;
978   orig_pixels_0[1][0] = 0u;
979   orig_pixels_0[1][1] = 0u;
980   orig_pixels_0[1][2] = 0u;
981   orig_pixels_0[1][3] = 255u;
982   orig_pixels_0[2][0] = 0u;
983   orig_pixels_0[2][1] = 0u;
984   orig_pixels_0[2][2] = 0u;
985   orig_pixels_0[2][3] = 0u;
986   orig_pixels_0[3][0] = 0u;
987   orig_pixels_0[3][1] = 0u;
988   orig_pixels_0[3][2] = 0u;
989   orig_pixels_0[3][3] = 0u;
990 
991   orig_pixels_1[0][0] = 0u;
992   orig_pixels_1[0][1] = 0u;
993   orig_pixels_1[0][2] = 0u;
994   orig_pixels_1[0][3] = 0u;
995   orig_pixels_1[1][0] = 0u;
996   orig_pixels_1[1][1] = 0u;
997   orig_pixels_1[1][2] = 0u;
998   orig_pixels_1[1][3] = 0u;
999   orig_pixels_1[2][0] = 0u;
1000   orig_pixels_1[2][1] = 0u;
1001   orig_pixels_1[2][2] = 0u;
1002   orig_pixels_1[2][3] = 0u;
1003   orig_pixels_1[3][0] = 255u;
1004   orig_pixels_1[3][1] = 255u;
1005   orig_pixels_1[3][2] = 255u;
1006   orig_pixels_1[3][3] = 255u;
1007 
1008   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1009                   &interpolate_pixels[0][0], 0, 4, 1, 128);
1010   EXPECT_EQ(8u, interpolate_pixels[0][0]);
1011   EXPECT_EQ(16u, interpolate_pixels[0][1]);
1012   EXPECT_EQ(32u, interpolate_pixels[0][2]);
1013   EXPECT_EQ(64u, interpolate_pixels[0][3]);
1014   EXPECT_EQ(0u, interpolate_pixels[1][0]);
1015   EXPECT_EQ(0u, interpolate_pixels[1][1]);
1016   EXPECT_EQ(0u, interpolate_pixels[1][2]);
1017   EXPECT_EQ(128u, interpolate_pixels[1][3]);
1018   EXPECT_EQ(0u, interpolate_pixels[2][0]);
1019   EXPECT_EQ(0u, interpolate_pixels[2][1]);
1020   EXPECT_EQ(0u, interpolate_pixels[2][2]);
1021   EXPECT_EQ(0u, interpolate_pixels[2][3]);
1022   EXPECT_EQ(128u, interpolate_pixels[3][0]);
1023   EXPECT_EQ(128u, interpolate_pixels[3][1]);
1024   EXPECT_EQ(128u, interpolate_pixels[3][2]);
1025   EXPECT_EQ(128u, interpolate_pixels[3][3]);
1026 
1027   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1028                   &interpolate_pixels[0][0], 0, 4, 1, 0);
1029   EXPECT_EQ(16u, interpolate_pixels[0][0]);
1030   EXPECT_EQ(32u, interpolate_pixels[0][1]);
1031   EXPECT_EQ(64u, interpolate_pixels[0][2]);
1032   EXPECT_EQ(128u, interpolate_pixels[0][3]);
1033 
1034   ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1035                   &interpolate_pixels[0][0], 0, 4, 1, 192);
1036 
1037   EXPECT_EQ(4u, interpolate_pixels[0][0]);
1038   EXPECT_EQ(8u, interpolate_pixels[0][1]);
1039   EXPECT_EQ(16u, interpolate_pixels[0][2]);
1040   EXPECT_EQ(32u, interpolate_pixels[0][3]);
1041 
1042   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1043     ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1044                     &interpolate_pixels[0][0], 0, 1280, 1, 128);
1045   }
1046 }
1047 
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)1048 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
1049   SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
1050   SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
1051   SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
1052   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1053   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1054 
1055   orig_pixels_0[0] = 16u;
1056   orig_pixels_0[1] = 32u;
1057   orig_pixels_0[2] = 64u;
1058   orig_pixels_0[3] = 128u;
1059   orig_pixels_0[4] = 0u;
1060   orig_pixels_0[5] = 0u;
1061   orig_pixels_0[6] = 0u;
1062   orig_pixels_0[7] = 255u;
1063   orig_pixels_0[8] = 0u;
1064   orig_pixels_0[9] = 0u;
1065   orig_pixels_0[10] = 0u;
1066   orig_pixels_0[11] = 0u;
1067   orig_pixels_0[12] = 0u;
1068   orig_pixels_0[13] = 0u;
1069   orig_pixels_0[14] = 0u;
1070   orig_pixels_0[15] = 0u;
1071 
1072   orig_pixels_1[0] = 0u;
1073   orig_pixels_1[1] = 0u;
1074   orig_pixels_1[2] = 0u;
1075   orig_pixels_1[3] = 0u;
1076   orig_pixels_1[4] = 0u;
1077   orig_pixels_1[5] = 0u;
1078   orig_pixels_1[6] = 0u;
1079   orig_pixels_1[7] = 0u;
1080   orig_pixels_1[8] = 0u;
1081   orig_pixels_1[9] = 0u;
1082   orig_pixels_1[10] = 0u;
1083   orig_pixels_1[11] = 0u;
1084   orig_pixels_1[12] = 255u;
1085   orig_pixels_1[13] = 255u;
1086   orig_pixels_1[14] = 255u;
1087   orig_pixels_1[15] = 255u;
1088 
1089   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1090                    &interpolate_pixels[0], 0, 16, 1, 128);
1091   EXPECT_EQ(8u, interpolate_pixels[0]);
1092   EXPECT_EQ(16u, interpolate_pixels[1]);
1093   EXPECT_EQ(32u, interpolate_pixels[2]);
1094   EXPECT_EQ(64u, interpolate_pixels[3]);
1095   EXPECT_EQ(0u, interpolate_pixels[4]);
1096   EXPECT_EQ(0u, interpolate_pixels[5]);
1097   EXPECT_EQ(0u, interpolate_pixels[6]);
1098   EXPECT_EQ(128u, interpolate_pixels[7]);
1099   EXPECT_EQ(0u, interpolate_pixels[8]);
1100   EXPECT_EQ(0u, interpolate_pixels[9]);
1101   EXPECT_EQ(0u, interpolate_pixels[10]);
1102   EXPECT_EQ(0u, interpolate_pixels[11]);
1103   EXPECT_EQ(128u, interpolate_pixels[12]);
1104   EXPECT_EQ(128u, interpolate_pixels[13]);
1105   EXPECT_EQ(128u, interpolate_pixels[14]);
1106   EXPECT_EQ(128u, interpolate_pixels[15]);
1107 
1108   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1109                    &interpolate_pixels[0], 0, 16, 1, 0);
1110   EXPECT_EQ(16u, interpolate_pixels[0]);
1111   EXPECT_EQ(32u, interpolate_pixels[1]);
1112   EXPECT_EQ(64u, interpolate_pixels[2]);
1113   EXPECT_EQ(128u, interpolate_pixels[3]);
1114 
1115   InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1116                    &interpolate_pixels[0], 0, 16, 1, 192);
1117 
1118   EXPECT_EQ(4u, interpolate_pixels[0]);
1119   EXPECT_EQ(8u, interpolate_pixels[1]);
1120   EXPECT_EQ(16u, interpolate_pixels[2]);
1121   EXPECT_EQ(32u, interpolate_pixels[3]);
1122 
1123   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1124     InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1125                      &interpolate_pixels[0], 0, 1280, 1, 123);
1126   }
1127 }
1128 
TEST_F(LibYUVPlanarTest,TestInterpolatePlane_16)1129 TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
1130   SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
1131   SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
1132   SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
1133   memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1134   memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1135 
1136   orig_pixels_0[0] = 16u;
1137   orig_pixels_0[1] = 32u;
1138   orig_pixels_0[2] = 64u;
1139   orig_pixels_0[3] = 128u;
1140   orig_pixels_0[4] = 0u;
1141   orig_pixels_0[5] = 0u;
1142   orig_pixels_0[6] = 0u;
1143   orig_pixels_0[7] = 255u;
1144   orig_pixels_0[8] = 0u;
1145   orig_pixels_0[9] = 0u;
1146   orig_pixels_0[10] = 0u;
1147   orig_pixels_0[11] = 0u;
1148   orig_pixels_0[12] = 0u;
1149   orig_pixels_0[13] = 0u;
1150   orig_pixels_0[14] = 0u;
1151   orig_pixels_0[15] = 0u;
1152 
1153   orig_pixels_1[0] = 0u;
1154   orig_pixels_1[1] = 0u;
1155   orig_pixels_1[2] = 0u;
1156   orig_pixels_1[3] = 0u;
1157   orig_pixels_1[4] = 0u;
1158   orig_pixels_1[5] = 0u;
1159   orig_pixels_1[6] = 0u;
1160   orig_pixels_1[7] = 0u;
1161   orig_pixels_1[8] = 0u;
1162   orig_pixels_1[9] = 0u;
1163   orig_pixels_1[10] = 0u;
1164   orig_pixels_1[11] = 0u;
1165   orig_pixels_1[12] = 255u;
1166   orig_pixels_1[13] = 255u;
1167   orig_pixels_1[14] = 255u;
1168   orig_pixels_1[15] = 255u;
1169 
1170   InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1171                       &interpolate_pixels[0], 0, 16, 1, 128);
1172   EXPECT_EQ(8u, interpolate_pixels[0]);
1173   EXPECT_EQ(16u, interpolate_pixels[1]);
1174   EXPECT_EQ(32u, interpolate_pixels[2]);
1175   EXPECT_EQ(64u, interpolate_pixels[3]);
1176   EXPECT_EQ(0u, interpolate_pixels[4]);
1177   EXPECT_EQ(0u, interpolate_pixels[5]);
1178   EXPECT_EQ(0u, interpolate_pixels[6]);
1179   EXPECT_EQ(128u, interpolate_pixels[7]);
1180   EXPECT_EQ(0u, interpolate_pixels[8]);
1181   EXPECT_EQ(0u, interpolate_pixels[9]);
1182   EXPECT_EQ(0u, interpolate_pixels[10]);
1183   EXPECT_EQ(0u, interpolate_pixels[11]);
1184   EXPECT_EQ(128u, interpolate_pixels[12]);
1185   EXPECT_EQ(128u, interpolate_pixels[13]);
1186   EXPECT_EQ(128u, interpolate_pixels[14]);
1187   EXPECT_EQ(128u, interpolate_pixels[15]);
1188 
1189   InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1190                       &interpolate_pixels[0], 0, 16, 1, 0);
1191   EXPECT_EQ(16u, interpolate_pixels[0]);
1192   EXPECT_EQ(32u, interpolate_pixels[1]);
1193   EXPECT_EQ(64u, interpolate_pixels[2]);
1194   EXPECT_EQ(128u, interpolate_pixels[3]);
1195 
1196   InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1197                       &interpolate_pixels[0], 0, 16, 1, 192);
1198 
1199   EXPECT_EQ(4u, interpolate_pixels[0]);
1200   EXPECT_EQ(8u, interpolate_pixels[1]);
1201   EXPECT_EQ(16u, interpolate_pixels[2]);
1202   EXPECT_EQ(32u, interpolate_pixels[3]);
1203 
1204   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1205     InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1206                         &interpolate_pixels[0], 0, 1280, 1, 123);
1207   }
1208 }
1209 
1210 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1211                  N, NEG, OFF)                                                 \
1212   TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) {                        \
1213     const int kWidth = W1280;                                                 \
1214     const int kHeight = benchmark_height_;                                    \
1215     const int kStrideA =                                                      \
1216         (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A;                \
1217     const int kStrideB =                                                      \
1218         (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B;                \
1219     align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF);               \
1220     align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF);               \
1221     align_buffer_page_end(dst_argb_c, kStrideB* kHeight);                     \
1222     align_buffer_page_end(dst_argb_opt, kStrideB* kHeight);                   \
1223     for (int i = 0; i < kStrideA * kHeight; ++i) {                            \
1224       src_argb_a[i + OFF] = (fastrand() & 0xff);                              \
1225       src_argb_b[i + OFF] = (fastrand() & 0xff);                              \
1226     }                                                                         \
1227     MaskCpuFlags(disable_cpu_flags_);                                         \
1228     ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA,   \
1229                     dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP);         \
1230     MaskCpuFlags(benchmark_cpu_info_);                                        \
1231     for (int i = 0; i < benchmark_iterations_; ++i) {                         \
1232       ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1233                       dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP);     \
1234     }                                                                         \
1235     for (int i = 0; i < kStrideB * kHeight; ++i) {                            \
1236       EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);                              \
1237     }                                                                         \
1238     free_aligned_buffer_page_end(src_argb_a);                                 \
1239     free_aligned_buffer_page_end(src_argb_b);                                 \
1240     free_aligned_buffer_page_end(dst_argb_c);                                 \
1241     free_aligned_buffer_page_end(dst_argb_opt);                               \
1242   }
1243 
1244 #define TESTINTERPOLATE(TERP)                                                \
1245   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0)   \
1246   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1247   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0)    \
1248   TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1249 
1250 TESTINTERPOLATE(0)
1251 TESTINTERPOLATE(64)
1252 TESTINTERPOLATE(128)
1253 TESTINTERPOLATE(192)
1254 TESTINTERPOLATE(255)
1255 
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int attenuate)1256 static int TestBlend(int width,
1257                      int height,
1258                      int benchmark_iterations,
1259                      int disable_cpu_flags,
1260                      int benchmark_cpu_info,
1261                      int invert,
1262                      int off,
1263                      int attenuate) {
1264   if (width < 1) {
1265     width = 1;
1266   }
1267   const int kBpp = 4;
1268   const int kStride = width * kBpp;
1269   align_buffer_page_end(src_argb_a, kStride * height + off);
1270   align_buffer_page_end(src_argb_b, kStride * height + off);
1271   align_buffer_page_end(dst_argb_c, kStride * height);
1272   align_buffer_page_end(dst_argb_opt, kStride * height);
1273   for (int i = 0; i < kStride * height; ++i) {
1274     src_argb_a[i + off] = (fastrand() & 0xff);
1275     src_argb_b[i + off] = (fastrand() & 0xff);
1276   }
1277   MemRandomize(src_argb_a, kStride * height + off);
1278   MemRandomize(src_argb_b, kStride * height + off);
1279   if (attenuate) {
1280     ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1281                   height);
1282   }
1283   memset(dst_argb_c, 255, kStride * height);
1284   memset(dst_argb_opt, 255, kStride * height);
1285 
1286   MaskCpuFlags(disable_cpu_flags);
1287   ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1288             kStride, width, invert * height);
1289   MaskCpuFlags(benchmark_cpu_info);
1290   for (int i = 0; i < benchmark_iterations; ++i) {
1291     ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1292               dst_argb_opt, kStride, width, invert * height);
1293   }
1294   int max_diff = 0;
1295   for (int i = 0; i < kStride * height; ++i) {
1296     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1297                        static_cast<int>(dst_argb_opt[i]));
1298     if (abs_diff > max_diff) {
1299       max_diff = abs_diff;
1300     }
1301   }
1302   free_aligned_buffer_page_end(src_argb_a);
1303   free_aligned_buffer_page_end(src_argb_b);
1304   free_aligned_buffer_page_end(dst_argb_c);
1305   free_aligned_buffer_page_end(dst_argb_opt);
1306   return max_diff;
1307 }
1308 
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1309 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1310   int max_diff =
1311       TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1312                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1313   EXPECT_LE(max_diff, 1);
1314 }
1315 
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1316 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1317   int max_diff =
1318       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1319                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
1320   EXPECT_LE(max_diff, 1);
1321 }
1322 
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1323 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1324   int max_diff =
1325       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1326                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
1327   EXPECT_LE(max_diff, 1);
1328 }
1329 
TEST_F(LibYUVPlanarTest,ARGBBlend_Unattenuated)1330 TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
1331   int max_diff =
1332       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1333                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
1334   EXPECT_LE(max_diff, 1);
1335 }
1336 
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1337 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1338   int max_diff =
1339       TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1340                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1341   EXPECT_LE(max_diff, 1);
1342 }
1343 
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1344 static void TestBlendPlane(int width,
1345                            int height,
1346                            int benchmark_iterations,
1347                            int disable_cpu_flags,
1348                            int benchmark_cpu_info,
1349                            int invert,
1350                            int off) {
1351   if (width < 1) {
1352     width = 1;
1353   }
1354   const int kBpp = 1;
1355   const int kStride = width * kBpp;
1356   align_buffer_page_end(src_argb_a, kStride * height + off);
1357   align_buffer_page_end(src_argb_b, kStride * height + off);
1358   align_buffer_page_end(src_argb_alpha, kStride * height + off);
1359   align_buffer_page_end(dst_argb_c, kStride * height + off);
1360   align_buffer_page_end(dst_argb_opt, kStride * height + off);
1361   memset(dst_argb_c, 255, kStride * height + off);
1362   memset(dst_argb_opt, 255, kStride * height + off);
1363 
1364   // Test source is maintained exactly if alpha is 255.
1365   for (int i = 0; i < width; ++i) {
1366     src_argb_a[i + off] = i & 255;
1367     src_argb_b[i + off] = 255 - (i & 255);
1368   }
1369   memset(src_argb_alpha + off, 255, width);
1370   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1371              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1372   for (int i = 0; i < width; ++i) {
1373     EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1374   }
1375   // Test destination is maintained exactly if alpha is 0.
1376   memset(src_argb_alpha + off, 0, width);
1377   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1378              src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1379   for (int i = 0; i < width; ++i) {
1380     EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1381   }
1382   for (int i = 0; i < kStride * height; ++i) {
1383     src_argb_a[i + off] = (fastrand() & 0xff);
1384     src_argb_b[i + off] = (fastrand() & 0xff);
1385     src_argb_alpha[i + off] = (fastrand() & 0xff);
1386   }
1387 
1388   MaskCpuFlags(disable_cpu_flags);
1389   BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1390              src_argb_alpha + off, width, dst_argb_c + off, width, width,
1391              invert * height);
1392   MaskCpuFlags(benchmark_cpu_info);
1393   for (int i = 0; i < benchmark_iterations; ++i) {
1394     BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1395                src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1396                invert * height);
1397   }
1398   for (int i = 0; i < kStride * height; ++i) {
1399     EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1400   }
1401   free_aligned_buffer_page_end(src_argb_a);
1402   free_aligned_buffer_page_end(src_argb_b);
1403   free_aligned_buffer_page_end(src_argb_alpha);
1404   free_aligned_buffer_page_end(dst_argb_c);
1405   free_aligned_buffer_page_end(dst_argb_opt);
1406 }
1407 
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1408 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1409   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1410                  disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1411 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1412 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1413   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1414                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1415 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1416 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1417   TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1418                  disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1419 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1420 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1421   TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1422                  disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1423 }
1424 
1425 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1426 
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1427 static void TestI420Blend(int width,
1428                           int height,
1429                           int benchmark_iterations,
1430                           int disable_cpu_flags,
1431                           int benchmark_cpu_info,
1432                           int invert,
1433                           int off) {
1434   width = ((width) > 0) ? (width) : 1;
1435   const int kStrideUV = SUBSAMPLE(width, 2);
1436   const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1437   align_buffer_page_end(src_y0, width * height + off);
1438   align_buffer_page_end(src_u0, kSizeUV + off);
1439   align_buffer_page_end(src_v0, kSizeUV + off);
1440   align_buffer_page_end(src_y1, width * height + off);
1441   align_buffer_page_end(src_u1, kSizeUV + off);
1442   align_buffer_page_end(src_v1, kSizeUV + off);
1443   align_buffer_page_end(src_a, width * height + off);
1444   align_buffer_page_end(dst_y_c, width * height + off);
1445   align_buffer_page_end(dst_u_c, kSizeUV + off);
1446   align_buffer_page_end(dst_v_c, kSizeUV + off);
1447   align_buffer_page_end(dst_y_opt, width * height + off);
1448   align_buffer_page_end(dst_u_opt, kSizeUV + off);
1449   align_buffer_page_end(dst_v_opt, kSizeUV + off);
1450 
1451   MemRandomize(src_y0, width * height + off);
1452   MemRandomize(src_u0, kSizeUV + off);
1453   MemRandomize(src_v0, kSizeUV + off);
1454   MemRandomize(src_y1, width * height + off);
1455   MemRandomize(src_u1, kSizeUV + off);
1456   MemRandomize(src_v1, kSizeUV + off);
1457   MemRandomize(src_a, width * height + off);
1458   memset(dst_y_c, 255, width * height + off);
1459   memset(dst_u_c, 255, kSizeUV + off);
1460   memset(dst_v_c, 255, kSizeUV + off);
1461   memset(dst_y_opt, 255, width * height + off);
1462   memset(dst_u_opt, 255, kSizeUV + off);
1463   memset(dst_v_opt, 255, kSizeUV + off);
1464 
1465   MaskCpuFlags(disable_cpu_flags);
1466   I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1467             kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1468             src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1469             dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1470             invert * height);
1471   MaskCpuFlags(benchmark_cpu_info);
1472   for (int i = 0; i < benchmark_iterations; ++i) {
1473     I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1474               kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1475               src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1476               width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1477               width, invert * height);
1478   }
1479   for (int i = 0; i < width * height; ++i) {
1480     EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1481   }
1482   for (int i = 0; i < kSizeUV; ++i) {
1483     EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1484     EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1485   }
1486   free_aligned_buffer_page_end(src_y0);
1487   free_aligned_buffer_page_end(src_u0);
1488   free_aligned_buffer_page_end(src_v0);
1489   free_aligned_buffer_page_end(src_y1);
1490   free_aligned_buffer_page_end(src_u1);
1491   free_aligned_buffer_page_end(src_v1);
1492   free_aligned_buffer_page_end(src_a);
1493   free_aligned_buffer_page_end(dst_y_c);
1494   free_aligned_buffer_page_end(dst_u_c);
1495   free_aligned_buffer_page_end(dst_v_c);
1496   free_aligned_buffer_page_end(dst_y_opt);
1497   free_aligned_buffer_page_end(dst_u_opt);
1498   free_aligned_buffer_page_end(dst_v_opt);
1499 }
1500 
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1501 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1502   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1503                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1504 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1505 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1506   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1507                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1508 }
1509 
1510 // TODO(fbarchard): DISABLED because _Any uses C.  Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1511 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1512   TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1513                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1514 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1515 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1516   TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1517                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1518 }
1519 
TEST_F(LibYUVPlanarTest,TestAffine)1520 TEST_F(LibYUVPlanarTest, TestAffine) {
1521   SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
1522   SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
1523 
1524   for (int i = 0; i < 1280; ++i) {
1525     for (int j = 0; j < 4; ++j) {
1526       orig_pixels_0[i][j] = i;
1527     }
1528   }
1529 
1530   float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1531 
1532   ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1533                   1280);
1534   EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1535   EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1536   EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1537 
1538 #if defined(HAS_ARGBAFFINEROW_SSE2)
1539   SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
1540   ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1541                      uv_step, 1280);
1542   EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1543 
1544   int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1545   if (has_sse2) {
1546     for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1547       ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1548                          uv_step, 1280);
1549     }
1550   }
1551 #endif
1552 }
1553 
TEST_F(LibYUVPlanarTest,TestCopyPlane)1554 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1555   int err = 0;
1556   int yw = benchmark_width_;
1557   int yh = benchmark_height_;
1558   int b = 12;
1559   int i, j;
1560 
1561   int y_plane_size = (yw + b * 2) * (yh + b * 2);
1562   align_buffer_page_end(orig_y, y_plane_size);
1563   align_buffer_page_end(dst_c, y_plane_size);
1564   align_buffer_page_end(dst_opt, y_plane_size);
1565 
1566   memset(orig_y, 0, y_plane_size);
1567   memset(dst_c, 0, y_plane_size);
1568   memset(dst_opt, 0, y_plane_size);
1569 
1570   // Fill image buffers with random data.
1571   for (i = b; i < (yh + b); ++i) {
1572     for (j = b; j < (yw + b); ++j) {
1573       orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1574     }
1575   }
1576 
1577   // Fill destination buffers with random data.
1578   for (i = 0; i < y_plane_size; ++i) {
1579     uint8_t random_number = fastrand() & 0x7f;
1580     dst_c[i] = random_number;
1581     dst_opt[i] = dst_c[i];
1582   }
1583 
1584   int y_off = b * (yw + b * 2) + b;
1585 
1586   int y_st = yw + b * 2;
1587   int stride = 8;
1588 
1589   // Disable all optimizations.
1590   MaskCpuFlags(disable_cpu_flags_);
1591   for (j = 0; j < benchmark_iterations_; j++) {
1592     CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1593   }
1594 
1595   // Enable optimizations.
1596   MaskCpuFlags(benchmark_cpu_info_);
1597   for (j = 0; j < benchmark_iterations_; j++) {
1598     CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1599   }
1600 
1601   for (i = 0; i < y_plane_size; ++i) {
1602     if (dst_c[i] != dst_opt[i]) {
1603       ++err;
1604     }
1605   }
1606 
1607   free_aligned_buffer_page_end(orig_y);
1608   free_aligned_buffer_page_end(dst_c);
1609   free_aligned_buffer_page_end(dst_opt);
1610 
1611   EXPECT_EQ(0, err);
1612 }
1613 
TEST_F(LibYUVPlanarTest,CopyPlane_Opt)1614 TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
1615   int i;
1616   int y_plane_size = benchmark_width_ * benchmark_height_;
1617   align_buffer_page_end(orig_y, y_plane_size);
1618   align_buffer_page_end(dst_c, y_plane_size);
1619   align_buffer_page_end(dst_opt, y_plane_size);
1620 
1621   MemRandomize(orig_y, y_plane_size);
1622   memset(dst_c, 1, y_plane_size);
1623   memset(dst_opt, 2, y_plane_size);
1624 
1625   // Disable all optimizations.
1626   MaskCpuFlags(disable_cpu_flags_);
1627   for (i = 0; i < benchmark_iterations_; i++) {
1628     CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
1629               benchmark_width_, benchmark_height_);
1630   }
1631 
1632   // Enable optimizations.
1633   MaskCpuFlags(benchmark_cpu_info_);
1634   for (i = 0; i < benchmark_iterations_; i++) {
1635     CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
1636               benchmark_width_, benchmark_height_);
1637   }
1638 
1639   for (i = 0; i < y_plane_size; ++i) {
1640     EXPECT_EQ(dst_c[i], dst_opt[i]);
1641   }
1642 
1643   free_aligned_buffer_page_end(orig_y);
1644   free_aligned_buffer_page_end(dst_c);
1645   free_aligned_buffer_page_end(dst_opt);
1646 }
1647 
TEST_F(LibYUVPlanarTest,TestCopyPlaneZero)1648 TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
1649   // Test to verify copying a rect with a zero height or width does
1650   // not touch destination memory.
1651   uint8_t src = 42;
1652   uint8_t dst = 0;
1653 
1654   // Disable all optimizations.
1655   MaskCpuFlags(disable_cpu_flags_);
1656   CopyPlane(&src, 0, &dst, 0, 0, 0);
1657   EXPECT_EQ(src, 42);
1658   EXPECT_EQ(dst, 0);
1659 
1660   CopyPlane(&src, 1, &dst, 1, 1, 0);
1661   EXPECT_EQ(src, 42);
1662   EXPECT_EQ(dst, 0);
1663 
1664   CopyPlane(&src, 1, &dst, 1, 0, 1);
1665   EXPECT_EQ(src, 42);
1666   EXPECT_EQ(dst, 0);
1667 
1668   // Enable optimizations.
1669   MaskCpuFlags(benchmark_cpu_info_);
1670   CopyPlane(&src, 0, &dst, 0, 0, 0);
1671   EXPECT_EQ(src, 42);
1672   EXPECT_EQ(dst, 0);
1673 
1674   CopyPlane(&src, 1, &dst, 1, 1, 0);
1675   EXPECT_EQ(src, 42);
1676   EXPECT_EQ(dst, 0);
1677 
1678   CopyPlane(&src, 1, &dst, 1, 0, 1);
1679   EXPECT_EQ(src, 42);
1680   EXPECT_EQ(dst, 0);
1681 }
1682 
TEST_F(LibYUVPlanarTest,TestDetilePlane)1683 TEST_F(LibYUVPlanarTest, TestDetilePlane) {
1684   int i, j;
1685 
1686   // orig is tiled.  Allocate enough memory for tiles.
1687   int tile_width = (benchmark_width_ + 15) & ~15;
1688   int tile_height = (benchmark_height_ + 15) & ~15;
1689   int tile_plane_size = tile_width * tile_height;
1690   int y_plane_size = benchmark_width_ * benchmark_height_;
1691   align_buffer_page_end(tile_y, tile_plane_size);
1692   align_buffer_page_end(dst_c, y_plane_size);
1693   align_buffer_page_end(dst_opt, y_plane_size);
1694 
1695   MemRandomize(tile_y, tile_plane_size);
1696   memset(dst_c, 0, y_plane_size);
1697   memset(dst_opt, 0, y_plane_size);
1698 
1699   // Disable all optimizations.
1700   MaskCpuFlags(disable_cpu_flags_);
1701   for (j = 0; j < benchmark_iterations_; j++) {
1702     DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
1703                 benchmark_height_, 16);
1704   }
1705 
1706   // Enable optimizations.
1707   MaskCpuFlags(benchmark_cpu_info_);
1708   for (j = 0; j < benchmark_iterations_; j++) {
1709     DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
1710                 benchmark_height_, 16);
1711   }
1712 
1713   for (i = 0; i < y_plane_size; ++i) {
1714     EXPECT_EQ(dst_c[i], dst_opt[i]);
1715   }
1716 
1717   free_aligned_buffer_page_end(tile_y);
1718   free_aligned_buffer_page_end(dst_c);
1719   free_aligned_buffer_page_end(dst_opt);
1720 }
1721 
TEST_F(LibYUVPlanarTest,TestDetilePlane_16)1722 TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
1723   int i, j;
1724 
1725   // orig is tiled.  Allocate enough memory for tiles.
1726   int tile_width = (benchmark_width_ + 15) & ~15;
1727   int tile_height = (benchmark_height_ + 15) & ~15;
1728   int tile_plane_size = tile_width * tile_height * 2;
1729   int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
1730   align_buffer_page_end(tile_y, tile_plane_size);
1731   align_buffer_page_end(dst_c, y_plane_size);
1732   align_buffer_page_end(dst_opt, y_plane_size);
1733 
1734   MemRandomize(tile_y, tile_plane_size);
1735   memset(dst_c, 0, y_plane_size);
1736   memset(dst_opt, 0, y_plane_size);
1737 
1738   // Disable all optimizations.
1739   MaskCpuFlags(disable_cpu_flags_);
1740   for (j = 0; j < benchmark_iterations_; j++) {
1741     DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
1742                    benchmark_width_, benchmark_width_, benchmark_height_, 16);
1743   }
1744 
1745   // Enable optimizations.
1746   MaskCpuFlags(benchmark_cpu_info_);
1747   for (j = 0; j < benchmark_iterations_; j++) {
1748     DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
1749                    benchmark_width_, benchmark_width_, benchmark_height_, 16);
1750   }
1751 
1752   for (i = 0; i < y_plane_size; ++i) {
1753     EXPECT_EQ(dst_c[i], dst_opt[i]);
1754   }
1755 
1756   free_aligned_buffer_page_end(tile_y);
1757   free_aligned_buffer_page_end(dst_c);
1758   free_aligned_buffer_page_end(dst_opt);
1759 }
1760 
1761 // Compares DetileSplitUV to 2 step Detile + SplitUV
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Correctness)1762 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
1763   int i, j;
1764 
1765   // orig is tiled.  Allocate enough memory for tiles.
1766   int tile_width = (benchmark_width_ + 15) & ~15;
1767   int tile_height = (benchmark_height_ + 15) & ~15;
1768   int tile_plane_size = tile_width * tile_height;
1769   int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1770   align_buffer_page_end(tile_uv, tile_plane_size);
1771   align_buffer_page_end(detiled_uv, tile_plane_size);
1772   align_buffer_page_end(dst_u_two_stage, uv_plane_size);
1773   align_buffer_page_end(dst_u_opt, uv_plane_size);
1774   align_buffer_page_end(dst_v_two_stage, uv_plane_size);
1775   align_buffer_page_end(dst_v_opt, uv_plane_size);
1776 
1777   MemRandomize(tile_uv, tile_plane_size);
1778   memset(detiled_uv, 0, tile_plane_size);
1779   memset(dst_u_two_stage, 0, uv_plane_size);
1780   memset(dst_u_opt, 0, uv_plane_size);
1781   memset(dst_v_two_stage, 0, uv_plane_size);
1782   memset(dst_v_opt, 0, uv_plane_size);
1783 
1784   DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
1785                      dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
1786                      benchmark_height_, 16);
1787 
1788   // Benchmark 2 step conversion for comparison.
1789   for (j = 0; j < benchmark_iterations_; j++) {
1790     DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
1791                 benchmark_width_, benchmark_height_, 16);
1792     SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
1793                  (benchmark_width_ + 1) / 2, dst_v_two_stage,
1794                  (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
1795                  benchmark_height_);
1796   }
1797 
1798   for (i = 0; i < uv_plane_size; ++i) {
1799     EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
1800     EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
1801   }
1802 
1803   free_aligned_buffer_page_end(tile_uv);
1804   free_aligned_buffer_page_end(detiled_uv);
1805   free_aligned_buffer_page_end(dst_u_two_stage);
1806   free_aligned_buffer_page_end(dst_u_opt);
1807   free_aligned_buffer_page_end(dst_v_two_stage);
1808   free_aligned_buffer_page_end(dst_v_opt);
1809 }
1810 
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Benchmark)1811 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
1812   int i, j;
1813 
1814   // orig is tiled.  Allocate enough memory for tiles.
1815   int tile_width = (benchmark_width_ + 15) & ~15;
1816   int tile_height = (benchmark_height_ + 15) & ~15;
1817   int tile_plane_size = tile_width * tile_height;
1818   int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1819   align_buffer_page_end(tile_uv, tile_plane_size);
1820   align_buffer_page_end(dst_u_c, uv_plane_size);
1821   align_buffer_page_end(dst_u_opt, uv_plane_size);
1822   align_buffer_page_end(dst_v_c, uv_plane_size);
1823   align_buffer_page_end(dst_v_opt, uv_plane_size);
1824 
1825   MemRandomize(tile_uv, tile_plane_size);
1826   memset(dst_u_c, 0, uv_plane_size);
1827   memset(dst_u_opt, 0, uv_plane_size);
1828   memset(dst_v_c, 0, uv_plane_size);
1829   memset(dst_v_opt, 0, uv_plane_size);
1830 
1831   // Disable all optimizations.
1832   MaskCpuFlags(disable_cpu_flags_);
1833 
1834   DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
1835                      dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
1836                      benchmark_height_, 16);
1837 
1838   // Enable optimizations.
1839   MaskCpuFlags(benchmark_cpu_info_);
1840 
1841   for (j = 0; j < benchmark_iterations_; j++) {
1842     DetileSplitUVPlane(
1843         tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
1844         (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
1845   }
1846 
1847   for (i = 0; i < uv_plane_size; ++i) {
1848     EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
1849     EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
1850   }
1851 
1852   free_aligned_buffer_page_end(tile_uv);
1853   free_aligned_buffer_page_end(dst_u_c);
1854   free_aligned_buffer_page_end(dst_u_opt);
1855   free_aligned_buffer_page_end(dst_v_c);
1856   free_aligned_buffer_page_end(dst_v_opt);
1857 }
1858 
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1859 static int TestMultiply(int width,
1860                         int height,
1861                         int benchmark_iterations,
1862                         int disable_cpu_flags,
1863                         int benchmark_cpu_info,
1864                         int invert,
1865                         int off) {
1866   if (width < 1) {
1867     width = 1;
1868   }
1869   const int kBpp = 4;
1870   const int kStride = width * kBpp;
1871   align_buffer_page_end(src_argb_a, kStride * height + off);
1872   align_buffer_page_end(src_argb_b, kStride * height + off);
1873   align_buffer_page_end(dst_argb_c, kStride * height);
1874   align_buffer_page_end(dst_argb_opt, kStride * height);
1875   for (int i = 0; i < kStride * height; ++i) {
1876     src_argb_a[i + off] = (fastrand() & 0xff);
1877     src_argb_b[i + off] = (fastrand() & 0xff);
1878   }
1879   memset(dst_argb_c, 0, kStride * height);
1880   memset(dst_argb_opt, 0, kStride * height);
1881 
1882   MaskCpuFlags(disable_cpu_flags);
1883   ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1884                kStride, width, invert * height);
1885   MaskCpuFlags(benchmark_cpu_info);
1886   for (int i = 0; i < benchmark_iterations; ++i) {
1887     ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1888                  dst_argb_opt, kStride, width, invert * height);
1889   }
1890   int max_diff = 0;
1891   for (int i = 0; i < kStride * height; ++i) {
1892     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1893                        static_cast<int>(dst_argb_opt[i]));
1894     if (abs_diff > max_diff) {
1895       max_diff = abs_diff;
1896     }
1897   }
1898   free_aligned_buffer_page_end(src_argb_a);
1899   free_aligned_buffer_page_end(src_argb_b);
1900   free_aligned_buffer_page_end(dst_argb_c);
1901   free_aligned_buffer_page_end(dst_argb_opt);
1902   return max_diff;
1903 }
1904 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1905 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1906   int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
1907                               benchmark_iterations_, disable_cpu_flags_,
1908                               benchmark_cpu_info_, +1, 0);
1909   EXPECT_LE(max_diff, 1);
1910 }
1911 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1912 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1913   int max_diff =
1914       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1915                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1916   EXPECT_LE(max_diff, 1);
1917 }
1918 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1919 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1920   int max_diff =
1921       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1922                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1923   EXPECT_LE(max_diff, 1);
1924 }
1925 
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1926 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1927   int max_diff =
1928       TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1929                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1930   EXPECT_LE(max_diff, 1);
1931 }
1932 
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1933 static int TestAdd(int width,
1934                    int height,
1935                    int benchmark_iterations,
1936                    int disable_cpu_flags,
1937                    int benchmark_cpu_info,
1938                    int invert,
1939                    int off) {
1940   if (width < 1) {
1941     width = 1;
1942   }
1943   const int kBpp = 4;
1944   const int kStride = width * kBpp;
1945   align_buffer_page_end(src_argb_a, kStride * height + off);
1946   align_buffer_page_end(src_argb_b, kStride * height + off);
1947   align_buffer_page_end(dst_argb_c, kStride * height);
1948   align_buffer_page_end(dst_argb_opt, kStride * height);
1949   for (int i = 0; i < kStride * height; ++i) {
1950     src_argb_a[i + off] = (fastrand() & 0xff);
1951     src_argb_b[i + off] = (fastrand() & 0xff);
1952   }
1953   memset(dst_argb_c, 0, kStride * height);
1954   memset(dst_argb_opt, 0, kStride * height);
1955 
1956   MaskCpuFlags(disable_cpu_flags);
1957   ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1958           kStride, width, invert * height);
1959   MaskCpuFlags(benchmark_cpu_info);
1960   for (int i = 0; i < benchmark_iterations; ++i) {
1961     ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1962             kStride, width, invert * height);
1963   }
1964   int max_diff = 0;
1965   for (int i = 0; i < kStride * height; ++i) {
1966     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1967                        static_cast<int>(dst_argb_opt[i]));
1968     if (abs_diff > max_diff) {
1969       max_diff = abs_diff;
1970     }
1971   }
1972   free_aligned_buffer_page_end(src_argb_a);
1973   free_aligned_buffer_page_end(src_argb_b);
1974   free_aligned_buffer_page_end(dst_argb_c);
1975   free_aligned_buffer_page_end(dst_argb_opt);
1976   return max_diff;
1977 }
1978 
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1979 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1980   int max_diff =
1981       TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1982               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1983   EXPECT_LE(max_diff, 1);
1984 }
1985 
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1986 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1987   int max_diff =
1988       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1989               disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1990   EXPECT_LE(max_diff, 1);
1991 }
1992 
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1993 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1994   int max_diff =
1995       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1996               disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1997   EXPECT_LE(max_diff, 1);
1998 }
1999 
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)2000 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
2001   int max_diff =
2002       TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
2003               disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2004   EXPECT_LE(max_diff, 1);
2005 }
2006 
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2007 static int TestSubtract(int width,
2008                         int height,
2009                         int benchmark_iterations,
2010                         int disable_cpu_flags,
2011                         int benchmark_cpu_info,
2012                         int invert,
2013                         int off) {
2014   if (width < 1) {
2015     width = 1;
2016   }
2017   const int kBpp = 4;
2018   const int kStride = width * kBpp;
2019   align_buffer_page_end(src_argb_a, kStride * height + off);
2020   align_buffer_page_end(src_argb_b, kStride * height + off);
2021   align_buffer_page_end(dst_argb_c, kStride * height);
2022   align_buffer_page_end(dst_argb_opt, kStride * height);
2023   for (int i = 0; i < kStride * height; ++i) {
2024     src_argb_a[i + off] = (fastrand() & 0xff);
2025     src_argb_b[i + off] = (fastrand() & 0xff);
2026   }
2027   memset(dst_argb_c, 0, kStride * height);
2028   memset(dst_argb_opt, 0, kStride * height);
2029 
2030   MaskCpuFlags(disable_cpu_flags);
2031   ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
2032                kStride, width, invert * height);
2033   MaskCpuFlags(benchmark_cpu_info);
2034   for (int i = 0; i < benchmark_iterations; ++i) {
2035     ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
2036                  dst_argb_opt, kStride, width, invert * height);
2037   }
2038   int max_diff = 0;
2039   for (int i = 0; i < kStride * height; ++i) {
2040     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2041                        static_cast<int>(dst_argb_opt[i]));
2042     if (abs_diff > max_diff) {
2043       max_diff = abs_diff;
2044     }
2045   }
2046   free_aligned_buffer_page_end(src_argb_a);
2047   free_aligned_buffer_page_end(src_argb_b);
2048   free_aligned_buffer_page_end(dst_argb_c);
2049   free_aligned_buffer_page_end(dst_argb_opt);
2050   return max_diff;
2051 }
2052 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)2053 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
2054   int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_,
2055                               benchmark_iterations_, disable_cpu_flags_,
2056                               benchmark_cpu_info_, +1, 0);
2057   EXPECT_LE(max_diff, 1);
2058 }
2059 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)2060 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
2061   int max_diff =
2062       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2063                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2064   EXPECT_LE(max_diff, 1);
2065 }
2066 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)2067 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
2068   int max_diff =
2069       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2070                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2071   EXPECT_LE(max_diff, 1);
2072 }
2073 
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)2074 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
2075   int max_diff =
2076       TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2077                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2078   EXPECT_LE(max_diff, 1);
2079 }
2080 
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2081 static int TestSobel(int width,
2082                      int height,
2083                      int benchmark_iterations,
2084                      int disable_cpu_flags,
2085                      int benchmark_cpu_info,
2086                      int invert,
2087                      int off) {
2088   if (width < 1) {
2089     width = 1;
2090   }
2091   const int kBpp = 4;
2092   const int kStride = width * kBpp;
2093   align_buffer_page_end(src_argb_a, kStride * height + off);
2094   align_buffer_page_end(dst_argb_c, kStride * height);
2095   align_buffer_page_end(dst_argb_opt, kStride * height);
2096   memset(src_argb_a, 0, kStride * height + off);
2097   for (int i = 0; i < kStride * height; ++i) {
2098     src_argb_a[i + off] = (fastrand() & 0xff);
2099   }
2100   memset(dst_argb_c, 0, kStride * height);
2101   memset(dst_argb_opt, 0, kStride * height);
2102 
2103   MaskCpuFlags(disable_cpu_flags);
2104   ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2105             invert * height);
2106   MaskCpuFlags(benchmark_cpu_info);
2107   for (int i = 0; i < benchmark_iterations; ++i) {
2108     ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2109               invert * height);
2110   }
2111   int max_diff = 0;
2112   for (int i = 0; i < kStride * height; ++i) {
2113     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2114                        static_cast<int>(dst_argb_opt[i]));
2115     if (abs_diff > max_diff) {
2116       max_diff = abs_diff;
2117     }
2118   }
2119   free_aligned_buffer_page_end(src_argb_a);
2120   free_aligned_buffer_page_end(dst_argb_c);
2121   free_aligned_buffer_page_end(dst_argb_opt);
2122   return max_diff;
2123 }
2124 
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)2125 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
2126   int max_diff =
2127       TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2128                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2129   EXPECT_EQ(0, max_diff);
2130 }
2131 
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)2132 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
2133   int max_diff =
2134       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2135                 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2136   EXPECT_EQ(0, max_diff);
2137 }
2138 
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)2139 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
2140   int max_diff =
2141       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2142                 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2143   EXPECT_EQ(0, max_diff);
2144 }
2145 
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)2146 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
2147   int max_diff =
2148       TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2149                 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2150   EXPECT_EQ(0, max_diff);
2151 }
2152 
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2153 static int TestSobelToPlane(int width,
2154                             int height,
2155                             int benchmark_iterations,
2156                             int disable_cpu_flags,
2157                             int benchmark_cpu_info,
2158                             int invert,
2159                             int off) {
2160   if (width < 1) {
2161     width = 1;
2162   }
2163   const int kSrcBpp = 4;
2164   const int kDstBpp = 1;
2165   const int kSrcStride = (width * kSrcBpp + 15) & ~15;
2166   const int kDstStride = (width * kDstBpp + 15) & ~15;
2167   align_buffer_page_end(src_argb_a, kSrcStride * height + off);
2168   align_buffer_page_end(dst_argb_c, kDstStride * height);
2169   align_buffer_page_end(dst_argb_opt, kDstStride * height);
2170   memset(src_argb_a, 0, kSrcStride * height + off);
2171   for (int i = 0; i < kSrcStride * height; ++i) {
2172     src_argb_a[i + off] = (fastrand() & 0xff);
2173   }
2174   memset(dst_argb_c, 0, kDstStride * height);
2175   memset(dst_argb_opt, 0, kDstStride * height);
2176 
2177   MaskCpuFlags(disable_cpu_flags);
2178   ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
2179                    invert * height);
2180   MaskCpuFlags(benchmark_cpu_info);
2181   for (int i = 0; i < benchmark_iterations; ++i) {
2182     ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
2183                      width, invert * height);
2184   }
2185   int max_diff = 0;
2186   for (int i = 0; i < kDstStride * height; ++i) {
2187     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2188                        static_cast<int>(dst_argb_opt[i]));
2189     if (abs_diff > max_diff) {
2190       max_diff = abs_diff;
2191     }
2192   }
2193   free_aligned_buffer_page_end(src_argb_a);
2194   free_aligned_buffer_page_end(dst_argb_c);
2195   free_aligned_buffer_page_end(dst_argb_opt);
2196   return max_diff;
2197 }
2198 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)2199 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
2200   int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_,
2201                                   benchmark_iterations_, disable_cpu_flags_,
2202                                   benchmark_cpu_info_, +1, 0);
2203   EXPECT_EQ(0, max_diff);
2204 }
2205 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)2206 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
2207   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2208                                   benchmark_iterations_, disable_cpu_flags_,
2209                                   benchmark_cpu_info_, +1, 1);
2210   EXPECT_EQ(0, max_diff);
2211 }
2212 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)2213 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
2214   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2215                                   benchmark_iterations_, disable_cpu_flags_,
2216                                   benchmark_cpu_info_, -1, 0);
2217   EXPECT_EQ(0, max_diff);
2218 }
2219 
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)2220 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
2221   int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2222                                   benchmark_iterations_, disable_cpu_flags_,
2223                                   benchmark_cpu_info_, +1, 0);
2224   EXPECT_EQ(0, max_diff);
2225 }
2226 
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2227 static int TestSobelXY(int width,
2228                        int height,
2229                        int benchmark_iterations,
2230                        int disable_cpu_flags,
2231                        int benchmark_cpu_info,
2232                        int invert,
2233                        int off) {
2234   if (width < 1) {
2235     width = 1;
2236   }
2237   const int kBpp = 4;
2238   const int kStride = width * kBpp;
2239   align_buffer_page_end(src_argb_a, kStride * height + off);
2240   align_buffer_page_end(dst_argb_c, kStride * height);
2241   align_buffer_page_end(dst_argb_opt, kStride * height);
2242   memset(src_argb_a, 0, kStride * height + off);
2243   for (int i = 0; i < kStride * height; ++i) {
2244     src_argb_a[i + off] = (fastrand() & 0xff);
2245   }
2246   memset(dst_argb_c, 0, kStride * height);
2247   memset(dst_argb_opt, 0, kStride * height);
2248 
2249   MaskCpuFlags(disable_cpu_flags);
2250   ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2251               invert * height);
2252   MaskCpuFlags(benchmark_cpu_info);
2253   for (int i = 0; i < benchmark_iterations; ++i) {
2254     ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2255                 invert * height);
2256   }
2257   int max_diff = 0;
2258   for (int i = 0; i < kStride * height; ++i) {
2259     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2260                        static_cast<int>(dst_argb_opt[i]));
2261     if (abs_diff > max_diff) {
2262       max_diff = abs_diff;
2263     }
2264   }
2265   free_aligned_buffer_page_end(src_argb_a);
2266   free_aligned_buffer_page_end(dst_argb_c);
2267   free_aligned_buffer_page_end(dst_argb_opt);
2268   return max_diff;
2269 }
2270 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)2271 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
2272   int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_,
2273                              benchmark_iterations_, disable_cpu_flags_,
2274                              benchmark_cpu_info_, +1, 0);
2275   EXPECT_EQ(0, max_diff);
2276 }
2277 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)2278 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
2279   int max_diff =
2280       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2281                   disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2282   EXPECT_EQ(0, max_diff);
2283 }
2284 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)2285 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
2286   int max_diff =
2287       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2288                   disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2289   EXPECT_EQ(0, max_diff);
2290 }
2291 
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)2292 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
2293   int max_diff =
2294       TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2295                   disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2296   EXPECT_EQ(0, max_diff);
2297 }
2298 
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)2299 static int TestBlur(int width,
2300                     int height,
2301                     int benchmark_iterations,
2302                     int disable_cpu_flags,
2303                     int benchmark_cpu_info,
2304                     int invert,
2305                     int off,
2306                     int radius) {
2307   if (width < 1) {
2308     width = 1;
2309   }
2310   const int kBpp = 4;
2311   const int kStride = width * kBpp;
2312   align_buffer_page_end(src_argb_a, kStride * height + off);
2313   align_buffer_page_end(dst_cumsum, width * height * 16);
2314   align_buffer_page_end(dst_argb_c, kStride * height);
2315   align_buffer_page_end(dst_argb_opt, kStride * height);
2316   for (int i = 0; i < kStride * height; ++i) {
2317     src_argb_a[i + off] = (fastrand() & 0xff);
2318   }
2319   memset(dst_cumsum, 0, width * height * 16);
2320   memset(dst_argb_c, 0, kStride * height);
2321   memset(dst_argb_opt, 0, kStride * height);
2322 
2323   MaskCpuFlags(disable_cpu_flags);
2324   ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
2325            reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2326            invert * height, radius);
2327   MaskCpuFlags(benchmark_cpu_info);
2328   for (int i = 0; i < benchmark_iterations; ++i) {
2329     ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
2330              reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2331              invert * height, radius);
2332   }
2333   int max_diff = 0;
2334   for (int i = 0; i < kStride * height; ++i) {
2335     int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2336                        static_cast<int>(dst_argb_opt[i]));
2337     if (abs_diff > max_diff) {
2338       max_diff = abs_diff;
2339     }
2340   }
2341   free_aligned_buffer_page_end(src_argb_a);
2342   free_aligned_buffer_page_end(dst_cumsum);
2343   free_aligned_buffer_page_end(dst_argb_c);
2344   free_aligned_buffer_page_end(dst_argb_opt);
2345   return max_diff;
2346 }
2347 
2348 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
2349 #define DISABLED_ARM(name) name
2350 #else
2351 #define DISABLED_ARM(name) DISABLED_##name
2352 #endif
2353 
2354 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Any))2355 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) {
2356   int max_diff =
2357       TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2358                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2359   EXPECT_LE(max_diff, 1);
2360 }
2361 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Unaligned))2362 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) {
2363   int max_diff =
2364       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2365                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
2366   EXPECT_LE(max_diff, 1);
2367 }
2368 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Invert))2369 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) {
2370   int max_diff =
2371       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2372                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
2373   EXPECT_LE(max_diff, 1);
2374 }
2375 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Opt))2376 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) {
2377   int max_diff =
2378       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2379                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2380   EXPECT_LE(max_diff, 1);
2381 }
2382 
2383 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Any))2384 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) {
2385   int max_diff =
2386       TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2387                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2388   EXPECT_LE(max_diff, 1);
2389 }
2390 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Unaligned))2391 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) {
2392   int max_diff =
2393       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2394                disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
2395   EXPECT_LE(max_diff, 1);
2396 }
2397 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Invert))2398 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) {
2399   int max_diff =
2400       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2401                disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
2402   EXPECT_LE(max_diff, 1);
2403 }
2404 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Opt))2405 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) {
2406   int max_diff =
2407       TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2408                disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2409   EXPECT_LE(max_diff, 1);
2410 }
2411 
TEST_F(LibYUVPlanarTest,DISABLED_ARM (TestARGBPolynomial))2412 TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) {
2413   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2414   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2415   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2416   memset(orig_pixels, 0, sizeof(orig_pixels));
2417 
2418   SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
2419       0.94230f,  -3.03300f,    -2.92500f,    0.f,  // C0
2420       0.584500f, 1.112000f,    1.535000f,    1.f,  // C1 x
2421       0.001313f, -0.002503f,   -0.004496f,   0.f,  // C2 x * x
2422       0.0f,      0.000006965f, 0.000008781f, 0.f,  // C3 x * x * x
2423   };
2424 
2425   // Test blue
2426   orig_pixels[0][0] = 255u;
2427   orig_pixels[0][1] = 0u;
2428   orig_pixels[0][2] = 0u;
2429   orig_pixels[0][3] = 128u;
2430   // Test green
2431   orig_pixels[1][0] = 0u;
2432   orig_pixels[1][1] = 255u;
2433   orig_pixels[1][2] = 0u;
2434   orig_pixels[1][3] = 0u;
2435   // Test red
2436   orig_pixels[2][0] = 0u;
2437   orig_pixels[2][1] = 0u;
2438   orig_pixels[2][2] = 255u;
2439   orig_pixels[2][3] = 255u;
2440   // Test white
2441   orig_pixels[3][0] = 255u;
2442   orig_pixels[3][1] = 255u;
2443   orig_pixels[3][2] = 255u;
2444   orig_pixels[3][3] = 255u;
2445   // Test color
2446   orig_pixels[4][0] = 16u;
2447   orig_pixels[4][1] = 64u;
2448   orig_pixels[4][2] = 192u;
2449   orig_pixels[4][3] = 224u;
2450   // Do 16 to test asm version.
2451   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2452                  &kWarmifyPolynomial[0], 16, 1);
2453   EXPECT_EQ(235u, dst_pixels_opt[0][0]);
2454   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2455   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2456   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2457   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2458   EXPECT_EQ(233u, dst_pixels_opt[1][1]);
2459   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2460   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2461   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2462   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2463   EXPECT_EQ(241u, dst_pixels_opt[2][2]);
2464   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2465   EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2466   EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2467   EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2468   EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2469   EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2470   EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2471   EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2472   EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2473 
2474   for (int i = 0; i < 1280; ++i) {
2475     orig_pixels[i][0] = i;
2476     orig_pixels[i][1] = i / 2;
2477     orig_pixels[i][2] = i / 3;
2478     orig_pixels[i][3] = i;
2479   }
2480 
2481   MaskCpuFlags(disable_cpu_flags_);
2482   ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2483                  &kWarmifyPolynomial[0], 1280, 1);
2484   MaskCpuFlags(benchmark_cpu_info_);
2485 
2486   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2487     ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2488                    &kWarmifyPolynomial[0], 1280, 1);
2489   }
2490 
2491   for (int i = 0; i < 1280; ++i) {
2492     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2493     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2494     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2495     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2496   }
2497 }
2498 
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2499 int TestHalfFloatPlane(int benchmark_width,
2500                        int benchmark_height,
2501                        int benchmark_iterations,
2502                        int disable_cpu_flags,
2503                        int benchmark_cpu_info,
2504                        float scale,
2505                        int mask) {
2506   int i, j;
2507   const int y_plane_size = benchmark_width * benchmark_height * 2;
2508 
2509   align_buffer_page_end(orig_y, y_plane_size * 3);
2510   uint8_t* dst_opt = orig_y + y_plane_size;
2511   uint8_t* dst_c = orig_y + y_plane_size * 2;
2512 
2513   MemRandomize(orig_y, y_plane_size);
2514   memset(dst_c, 0, y_plane_size);
2515   memset(dst_opt, 1, y_plane_size);
2516 
2517   for (i = 0; i < y_plane_size / 2; ++i) {
2518     reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
2519   }
2520 
2521   // Disable all optimizations.
2522   MaskCpuFlags(disable_cpu_flags);
2523   for (j = 0; j < benchmark_iterations; j++) {
2524     HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2525                    reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
2526                    scale, benchmark_width, benchmark_height);
2527   }
2528 
2529   // Enable optimizations.
2530   MaskCpuFlags(benchmark_cpu_info);
2531   for (j = 0; j < benchmark_iterations; j++) {
2532     HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2533                    reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
2534                    scale, benchmark_width, benchmark_height);
2535   }
2536 
2537   int max_diff = 0;
2538   for (i = 0; i < y_plane_size / 2; ++i) {
2539     int abs_diff =
2540         abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
2541             static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
2542     if (abs_diff > max_diff) {
2543       max_diff = abs_diff;
2544     }
2545   }
2546 
2547   free_aligned_buffer_page_end(orig_y);
2548   return max_diff;
2549 }
2550 
2551 #if defined(__arm__)
EnableFlushDenormalToZero(void)2552 static void EnableFlushDenormalToZero(void) {
2553   uint32_t cw;
2554   __asm__ __volatile__(
2555       "vmrs   %0, fpscr         \n"
2556       "orr    %0, %0, #0x1000000        \n"
2557       "vmsr   fpscr, %0         \n"
2558       : "=r"(cw)::"memory");
2559 }
2560 #endif
2561 
2562 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2563 // exponent to be less than 0.  15 - log2(65536) = -1/  This shouldnt normally
2564 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2565 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2566 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2567 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2568 #if defined(__arm__)
2569   EnableFlushDenormalToZero();
2570 #endif
2571   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2572                                 benchmark_iterations_, disable_cpu_flags_,
2573                                 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2574   EXPECT_EQ(0, diff);
2575 }
2576 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2577 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2578   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2579                                 benchmark_iterations_, disable_cpu_flags_,
2580                                 benchmark_cpu_info_, 1.0f, 65535);
2581   EXPECT_LE(diff, 1);
2582 }
2583 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2584 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2585   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2586                                 benchmark_iterations_, disable_cpu_flags_,
2587                                 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2588   EXPECT_EQ(0, diff);
2589 }
2590 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2591 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2592   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2593                                 benchmark_iterations_, disable_cpu_flags_,
2594                                 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2595   EXPECT_EQ(0, diff);
2596 }
2597 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2598 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2599   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2600                                 benchmark_iterations_, disable_cpu_flags_,
2601                                 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2602   EXPECT_EQ(0, diff);
2603 }
2604 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2605 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2606   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2607                                 benchmark_iterations_, disable_cpu_flags_,
2608                                 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2609   EXPECT_EQ(0, diff);
2610 }
2611 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2612 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2613   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2614                                 benchmark_iterations_, disable_cpu_flags_,
2615                                 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2616   EXPECT_EQ(0, diff);
2617 }
2618 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2619 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2620   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2621                                 benchmark_iterations_, disable_cpu_flags_,
2622                                 benchmark_cpu_info_, 1.0f, 2047);
2623   EXPECT_EQ(0, diff);
2624 }
2625 
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2626 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2627   int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2628                                 benchmark_iterations_, disable_cpu_flags_,
2629                                 benchmark_cpu_info_, 1.0f, 4095);
2630   EXPECT_LE(diff, 1);
2631 }
2632 
TestByteToFloat(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale)2633 float TestByteToFloat(int benchmark_width,
2634                       int benchmark_height,
2635                       int benchmark_iterations,
2636                       int disable_cpu_flags,
2637                       int benchmark_cpu_info,
2638                       float scale) {
2639   int i, j;
2640   const int y_plane_size = benchmark_width * benchmark_height;
2641 
2642   align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
2643   float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
2644   float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
2645 
2646   MemRandomize(orig_y, y_plane_size);
2647   memset(dst_c, 0, y_plane_size * 4);
2648   memset(dst_opt, 1, y_plane_size * 4);
2649 
2650   // Disable all optimizations.
2651   MaskCpuFlags(disable_cpu_flags);
2652   ByteToFloat(orig_y, dst_c, scale, y_plane_size);
2653 
2654   // Enable optimizations.
2655   MaskCpuFlags(benchmark_cpu_info);
2656   for (j = 0; j < benchmark_iterations; j++) {
2657     ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
2658   }
2659 
2660   float max_diff = 0;
2661   for (i = 0; i < y_plane_size; ++i) {
2662     float abs_diff = fabs(dst_c[i] - dst_opt[i]);
2663     if (abs_diff > max_diff) {
2664       max_diff = abs_diff;
2665     }
2666   }
2667 
2668   free_aligned_buffer_page_end(orig_y);
2669   return max_diff;
2670 }
2671 
TEST_F(LibYUVPlanarTest,TestByteToFloat)2672 TEST_F(LibYUVPlanarTest, TestByteToFloat) {
2673   float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
2674                                benchmark_iterations_, disable_cpu_flags_,
2675                                benchmark_cpu_info_, 1.0f);
2676   EXPECT_EQ(0.f, diff);
2677 }
2678 
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2679 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2680   SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2681   SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2682   SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2683   memset(orig_pixels, 0, sizeof(orig_pixels));
2684 
2685   align_buffer_page_end(lumacolortable, 32768);
2686   int v = 0;
2687   for (int i = 0; i < 32768; ++i) {
2688     lumacolortable[i] = v;
2689     v += 3;
2690   }
2691   // Test blue
2692   orig_pixels[0][0] = 255u;
2693   orig_pixels[0][1] = 0u;
2694   orig_pixels[0][2] = 0u;
2695   orig_pixels[0][3] = 128u;
2696   // Test green
2697   orig_pixels[1][0] = 0u;
2698   orig_pixels[1][1] = 255u;
2699   orig_pixels[1][2] = 0u;
2700   orig_pixels[1][3] = 0u;
2701   // Test red
2702   orig_pixels[2][0] = 0u;
2703   orig_pixels[2][1] = 0u;
2704   orig_pixels[2][2] = 255u;
2705   orig_pixels[2][3] = 255u;
2706   // Test color
2707   orig_pixels[3][0] = 16u;
2708   orig_pixels[3][1] = 64u;
2709   orig_pixels[3][2] = 192u;
2710   orig_pixels[3][3] = 224u;
2711   // Do 16 to test asm version.
2712   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2713                      &lumacolortable[0], 16, 1);
2714   EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2715   EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2716   EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2717   EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2718   EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2719   EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2720   EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2721   EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2722   EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2723   EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2724   EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2725   EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2726   EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2727   EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2728   EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2729   EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2730 
2731   for (int i = 0; i < 1280; ++i) {
2732     orig_pixels[i][0] = i;
2733     orig_pixels[i][1] = i / 2;
2734     orig_pixels[i][2] = i / 3;
2735     orig_pixels[i][3] = i;
2736   }
2737 
2738   MaskCpuFlags(disable_cpu_flags_);
2739   ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2740                      lumacolortable, 1280, 1);
2741   MaskCpuFlags(benchmark_cpu_info_);
2742 
2743   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2744     ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2745                        lumacolortable, 1280, 1);
2746   }
2747   for (int i = 0; i < 1280; ++i) {
2748     EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2749     EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2750     EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2751     EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2752   }
2753 
2754   free_aligned_buffer_page_end(lumacolortable);
2755 }
2756 
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2757 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2758   const int kSize = benchmark_width_ * benchmark_height_ * 4;
2759   align_buffer_page_end(orig_pixels, kSize);
2760   align_buffer_page_end(dst_pixels_opt, kSize);
2761   align_buffer_page_end(dst_pixels_c, kSize);
2762 
2763   MemRandomize(orig_pixels, kSize);
2764   MemRandomize(dst_pixels_opt, kSize);
2765   memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2766 
2767   MaskCpuFlags(disable_cpu_flags_);
2768   ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2769                 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2770   MaskCpuFlags(benchmark_cpu_info_);
2771 
2772   for (int i = 0; i < benchmark_iterations_; ++i) {
2773     ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2774                   benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2775   }
2776   for (int i = 0; i < kSize; ++i) {
2777     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2778   }
2779 
2780   free_aligned_buffer_page_end(dst_pixels_c);
2781   free_aligned_buffer_page_end(dst_pixels_opt);
2782   free_aligned_buffer_page_end(orig_pixels);
2783 }
2784 
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2785 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2786   const int kPixels = benchmark_width_ * benchmark_height_;
2787   align_buffer_page_end(src_pixels, kPixels * 4);
2788   align_buffer_page_end(dst_pixels_opt, kPixels);
2789   align_buffer_page_end(dst_pixels_c, kPixels);
2790 
2791   MemRandomize(src_pixels, kPixels * 4);
2792   MemRandomize(dst_pixels_opt, kPixels);
2793   memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2794 
2795   MaskCpuFlags(disable_cpu_flags_);
2796   ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2797                    benchmark_width_, benchmark_width_, benchmark_height_);
2798   double c_time = get_time();
2799   ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2800                    benchmark_width_, benchmark_width_, benchmark_height_);
2801   c_time = (get_time() - c_time);
2802 
2803   MaskCpuFlags(benchmark_cpu_info_);
2804   ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2805                    benchmark_width_, benchmark_width_, benchmark_height_);
2806   double opt_time = get_time();
2807   for (int i = 0; i < benchmark_iterations_; ++i) {
2808     ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2809                      benchmark_width_, benchmark_width_, benchmark_height_);
2810   }
2811   opt_time = (get_time() - opt_time) / benchmark_iterations_;
2812   // Report performance of C vs OPT
2813   printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
2814          static_cast<int>(opt_time * 1e6));
2815   for (int i = 0; i < kPixels; ++i) {
2816     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2817   }
2818 
2819   free_aligned_buffer_page_end(dst_pixels_c);
2820   free_aligned_buffer_page_end(dst_pixels_opt);
2821   free_aligned_buffer_page_end(src_pixels);
2822 }
2823 
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2824 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2825   const int kPixels = benchmark_width_ * benchmark_height_;
2826   align_buffer_page_end(orig_pixels, kPixels);
2827   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2828   align_buffer_page_end(dst_pixels_c, kPixels * 4);
2829 
2830   MemRandomize(orig_pixels, kPixels);
2831   MemRandomize(dst_pixels_opt, kPixels * 4);
2832   memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2833 
2834   MaskCpuFlags(disable_cpu_flags_);
2835   ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2836                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2837   double c_time = get_time();
2838   ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2839                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2840   c_time = (get_time() - c_time);
2841 
2842   MaskCpuFlags(benchmark_cpu_info_);
2843   ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2844                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2845   double opt_time = get_time();
2846   for (int i = 0; i < benchmark_iterations_; ++i) {
2847     ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2848                      benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2849   }
2850   opt_time = (get_time() - opt_time) / benchmark_iterations_;
2851 
2852   // Report performance of C vs OPT
2853   printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
2854          static_cast<int>(opt_time * 1e6));
2855   for (int i = 0; i < kPixels * 4; ++i) {
2856     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2857   }
2858 
2859   free_aligned_buffer_page_end(dst_pixels_c);
2860   free_aligned_buffer_page_end(dst_pixels_opt);
2861   free_aligned_buffer_page_end(orig_pixels);
2862 }
2863 
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2864 static int TestARGBRect(int width,
2865                         int height,
2866                         int benchmark_iterations,
2867                         int disable_cpu_flags,
2868                         int benchmark_cpu_info,
2869                         int invert,
2870                         int off,
2871                         int bpp) {
2872   if (width < 1) {
2873     width = 1;
2874   }
2875   const int kStride = width * bpp;
2876   const int kSize = kStride * height;
2877   const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2878 
2879   align_buffer_page_end(dst_argb_c, kSize + off);
2880   align_buffer_page_end(dst_argb_opt, kSize + off);
2881 
2882   MemRandomize(dst_argb_c + off, kSize);
2883   memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2884 
2885   MaskCpuFlags(disable_cpu_flags);
2886   if (bpp == 4) {
2887     ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2888   } else {
2889     SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2890   }
2891 
2892   MaskCpuFlags(benchmark_cpu_info);
2893   for (int i = 0; i < benchmark_iterations; ++i) {
2894     if (bpp == 4) {
2895       ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2896     } else {
2897       SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2898     }
2899   }
2900   int max_diff = 0;
2901   for (int i = 0; i < kStride * height; ++i) {
2902     int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2903                        static_cast<int>(dst_argb_opt[i + off]));
2904     if (abs_diff > max_diff) {
2905       max_diff = abs_diff;
2906     }
2907   }
2908   free_aligned_buffer_page_end(dst_argb_c);
2909   free_aligned_buffer_page_end(dst_argb_opt);
2910   return max_diff;
2911 }
2912 
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2913 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2914   int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2915                               benchmark_iterations_, disable_cpu_flags_,
2916                               benchmark_cpu_info_, +1, 0, 4);
2917   EXPECT_EQ(0, max_diff);
2918 }
2919 
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2920 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2921   int max_diff =
2922       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2923                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2924   EXPECT_EQ(0, max_diff);
2925 }
2926 
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2927 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2928   int max_diff =
2929       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2930                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2931   EXPECT_EQ(0, max_diff);
2932 }
2933 
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2934 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2935   int max_diff =
2936       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2937                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2938   EXPECT_EQ(0, max_diff);
2939 }
2940 
TEST_F(LibYUVPlanarTest,SetPlane_Any)2941 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2942   int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2943                               benchmark_iterations_, disable_cpu_flags_,
2944                               benchmark_cpu_info_, +1, 0, 1);
2945   EXPECT_EQ(0, max_diff);
2946 }
2947 
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2948 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2949   int max_diff =
2950       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2951                    disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2952   EXPECT_EQ(0, max_diff);
2953 }
2954 
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2955 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2956   int max_diff =
2957       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2958                    disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2959   EXPECT_EQ(0, max_diff);
2960 }
2961 
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2962 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2963   int max_diff =
2964       TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2965                    disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2966   EXPECT_EQ(0, max_diff);
2967 }
2968 
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2969 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2970   const int kPixels = benchmark_width_ * benchmark_height_;
2971   align_buffer_page_end(src_pixels_u, kPixels);
2972   align_buffer_page_end(src_pixels_v, kPixels);
2973   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2974   align_buffer_page_end(dst_pixels_c, kPixels * 2);
2975 
2976   MemRandomize(src_pixels_u, kPixels);
2977   MemRandomize(src_pixels_v, kPixels);
2978   MemRandomize(dst_pixels_opt, kPixels * 2);
2979   MemRandomize(dst_pixels_c, kPixels * 2);
2980 
2981   MaskCpuFlags(disable_cpu_flags_);
2982   MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2983                dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2984                benchmark_height_);
2985   MaskCpuFlags(benchmark_cpu_info_);
2986 
2987   for (int i = 0; i < benchmark_iterations_; ++i) {
2988     MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2989                  dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2990                  benchmark_height_);
2991   }
2992 
2993   for (int i = 0; i < kPixels * 2; ++i) {
2994     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2995   }
2996 
2997   free_aligned_buffer_page_end(src_pixels_u);
2998   free_aligned_buffer_page_end(src_pixels_v);
2999   free_aligned_buffer_page_end(dst_pixels_opt);
3000   free_aligned_buffer_page_end(dst_pixels_c);
3001 }
3002 
3003 // 16 bit channel split and merge
TEST_F(LibYUVPlanarTest,MergeUVPlane_16_Opt)3004 TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
3005   const int kPixels = benchmark_width_ * benchmark_height_;
3006   align_buffer_page_end(src_pixels_u, kPixels * 2);
3007   align_buffer_page_end(src_pixels_v, kPixels * 2);
3008   align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
3009   align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
3010   MemRandomize(src_pixels_u, kPixels * 2);
3011   MemRandomize(src_pixels_v, kPixels * 2);
3012   MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
3013   MemRandomize(dst_pixels_c, kPixels * 2 * 2);
3014 
3015   MaskCpuFlags(disable_cpu_flags_);
3016   MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
3017                   (const uint16_t*)src_pixels_v, benchmark_width_,
3018                   (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
3019                   benchmark_width_, benchmark_height_, 12);
3020   MaskCpuFlags(benchmark_cpu_info_);
3021 
3022   for (int i = 0; i < benchmark_iterations_; ++i) {
3023     MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
3024                     (const uint16_t*)src_pixels_v, benchmark_width_,
3025                     (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
3026                     benchmark_width_, benchmark_height_, 12);
3027   }
3028 
3029   for (int i = 0; i < kPixels * 2 * 2; ++i) {
3030     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3031   }
3032   free_aligned_buffer_page_end(src_pixels_u);
3033   free_aligned_buffer_page_end(src_pixels_v);
3034   free_aligned_buffer_page_end(dst_pixels_opt);
3035   free_aligned_buffer_page_end(dst_pixels_c);
3036 }
3037 
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)3038 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
3039   const int kPixels = benchmark_width_ * benchmark_height_;
3040   align_buffer_page_end(src_pixels, kPixels * 2);
3041   align_buffer_page_end(dst_pixels_u_c, kPixels);
3042   align_buffer_page_end(dst_pixels_v_c, kPixels);
3043   align_buffer_page_end(dst_pixels_u_opt, kPixels);
3044   align_buffer_page_end(dst_pixels_v_opt, kPixels);
3045 
3046   MemRandomize(src_pixels, kPixels * 2);
3047   MemRandomize(dst_pixels_u_c, kPixels);
3048   MemRandomize(dst_pixels_v_c, kPixels);
3049   MemRandomize(dst_pixels_u_opt, kPixels);
3050   MemRandomize(dst_pixels_v_opt, kPixels);
3051 
3052   MaskCpuFlags(disable_cpu_flags_);
3053   SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
3054                benchmark_width_, dst_pixels_v_c, benchmark_width_,
3055                benchmark_width_, benchmark_height_);
3056   MaskCpuFlags(benchmark_cpu_info_);
3057 
3058   for (int i = 0; i < benchmark_iterations_; ++i) {
3059     SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
3060                  benchmark_width_, dst_pixels_v_opt, benchmark_width_,
3061                  benchmark_width_, benchmark_height_);
3062   }
3063 
3064   for (int i = 0; i < kPixels; ++i) {
3065     EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
3066     EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
3067   }
3068 
3069   free_aligned_buffer_page_end(src_pixels);
3070   free_aligned_buffer_page_end(dst_pixels_u_c);
3071   free_aligned_buffer_page_end(dst_pixels_v_c);
3072   free_aligned_buffer_page_end(dst_pixels_u_opt);
3073   free_aligned_buffer_page_end(dst_pixels_v_opt);
3074 }
3075 
3076 // 16 bit channel split
TEST_F(LibYUVPlanarTest,SplitUVPlane_16_Opt)3077 TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
3078   const int kPixels = benchmark_width_ * benchmark_height_;
3079   align_buffer_page_end(src_pixels, kPixels * 2 * 2);
3080   align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
3081   align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
3082   align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
3083   align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
3084   MemRandomize(src_pixels, kPixels * 2 * 2);
3085   MemRandomize(dst_pixels_u_c, kPixels * 2);
3086   MemRandomize(dst_pixels_v_c, kPixels * 2);
3087   MemRandomize(dst_pixels_u_opt, kPixels * 2);
3088   MemRandomize(dst_pixels_v_opt, kPixels * 2);
3089 
3090   MaskCpuFlags(disable_cpu_flags_);
3091   SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
3092                   (uint16_t*)dst_pixels_u_c, benchmark_width_,
3093                   (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
3094                   benchmark_height_, 10);
3095   MaskCpuFlags(benchmark_cpu_info_);
3096 
3097   for (int i = 0; i < benchmark_iterations_; ++i) {
3098     SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
3099                     (uint16_t*)dst_pixels_u_opt, benchmark_width_,
3100                     (uint16_t*)dst_pixels_v_opt, benchmark_width_,
3101                     benchmark_width_, benchmark_height_, 10);
3102   }
3103 
3104   for (int i = 0; i < kPixels * 2; ++i) {
3105     EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
3106     EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
3107   }
3108   free_aligned_buffer_page_end(src_pixels);
3109   free_aligned_buffer_page_end(dst_pixels_u_c);
3110   free_aligned_buffer_page_end(dst_pixels_v_c);
3111   free_aligned_buffer_page_end(dst_pixels_u_opt);
3112   free_aligned_buffer_page_end(dst_pixels_v_opt);
3113 }
3114 
TEST_F(LibYUVPlanarTest,SwapUVPlane_Opt)3115 TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
3116   // Round count up to multiple of 16
3117   const int kPixels = benchmark_width_ * benchmark_height_;
3118   align_buffer_page_end(src_pixels, kPixels * 2);
3119   align_buffer_page_end(dst_pixels_opt, kPixels * 2);
3120   align_buffer_page_end(dst_pixels_c, kPixels * 2);
3121 
3122   MemRandomize(src_pixels, kPixels * 2);
3123   MemRandomize(dst_pixels_opt, kPixels * 2);
3124   MemRandomize(dst_pixels_c, kPixels * 2);
3125 
3126   MaskCpuFlags(disable_cpu_flags_);
3127   SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
3128               benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3129   MaskCpuFlags(benchmark_cpu_info_);
3130 
3131   for (int i = 0; i < benchmark_iterations_; ++i) {
3132     SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
3133                 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3134   }
3135 
3136   for (int i = 0; i < kPixels * 2; ++i) {
3137     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3138   }
3139 
3140   free_aligned_buffer_page_end(src_pixels);
3141   free_aligned_buffer_page_end(dst_pixels_opt);
3142   free_aligned_buffer_page_end(dst_pixels_c);
3143 }
3144 
TEST_F(LibYUVPlanarTest,MergeRGBPlane_Opt)3145 TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
3146   // Round count up to multiple of 16
3147   const int kPixels = benchmark_width_ * benchmark_height_;
3148   align_buffer_page_end(src_pixels, kPixels * 3);
3149   align_buffer_page_end(tmp_pixels_r, kPixels);
3150   align_buffer_page_end(tmp_pixels_g, kPixels);
3151   align_buffer_page_end(tmp_pixels_b, kPixels);
3152   align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3153   align_buffer_page_end(dst_pixels_c, kPixels * 3);
3154 
3155   MemRandomize(src_pixels, kPixels * 3);
3156   MemRandomize(tmp_pixels_r, kPixels);
3157   MemRandomize(tmp_pixels_g, kPixels);
3158   MemRandomize(tmp_pixels_b, kPixels);
3159   MemRandomize(dst_pixels_opt, kPixels * 3);
3160   MemRandomize(dst_pixels_c, kPixels * 3);
3161 
3162   MaskCpuFlags(disable_cpu_flags_);
3163   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3164                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3165                 benchmark_width_, benchmark_width_, benchmark_height_);
3166   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3167                 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3168                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3169   MaskCpuFlags(benchmark_cpu_info_);
3170 
3171   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3172                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3173                 benchmark_width_, benchmark_width_, benchmark_height_);
3174 
3175   for (int i = 0; i < benchmark_iterations_; ++i) {
3176     MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3177                   benchmark_width_, tmp_pixels_b, benchmark_width_,
3178                   dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
3179                   benchmark_height_);
3180   }
3181 
3182   for (int i = 0; i < kPixels * 3; ++i) {
3183     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3184   }
3185 
3186   free_aligned_buffer_page_end(src_pixels);
3187   free_aligned_buffer_page_end(tmp_pixels_r);
3188   free_aligned_buffer_page_end(tmp_pixels_g);
3189   free_aligned_buffer_page_end(tmp_pixels_b);
3190   free_aligned_buffer_page_end(dst_pixels_opt);
3191   free_aligned_buffer_page_end(dst_pixels_c);
3192 }
3193 
TEST_F(LibYUVPlanarTest,SplitRGBPlane_Opt)3194 TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
3195   // Round count up to multiple of 16
3196   const int kPixels = benchmark_width_ * benchmark_height_;
3197   align_buffer_page_end(src_pixels, kPixels * 3);
3198   align_buffer_page_end(tmp_pixels_r, kPixels);
3199   align_buffer_page_end(tmp_pixels_g, kPixels);
3200   align_buffer_page_end(tmp_pixels_b, kPixels);
3201   align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3202   align_buffer_page_end(dst_pixels_c, kPixels * 3);
3203 
3204   MemRandomize(src_pixels, kPixels * 3);
3205   MemRandomize(tmp_pixels_r, kPixels);
3206   MemRandomize(tmp_pixels_g, kPixels);
3207   MemRandomize(tmp_pixels_b, kPixels);
3208   MemRandomize(dst_pixels_opt, kPixels * 3);
3209   MemRandomize(dst_pixels_c, kPixels * 3);
3210 
3211   MaskCpuFlags(disable_cpu_flags_);
3212   SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3213                 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3214                 benchmark_width_, benchmark_width_, benchmark_height_);
3215   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3216                 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3217                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3218   MaskCpuFlags(benchmark_cpu_info_);
3219 
3220   for (int i = 0; i < benchmark_iterations_; ++i) {
3221     SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3222                   benchmark_width_, tmp_pixels_g, benchmark_width_,
3223                   tmp_pixels_b, benchmark_width_, benchmark_width_,
3224                   benchmark_height_);
3225   }
3226   MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3227                 tmp_pixels_b, benchmark_width_, dst_pixels_opt,
3228                 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3229 
3230   for (int i = 0; i < kPixels * 3; ++i) {
3231     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3232   }
3233 
3234   free_aligned_buffer_page_end(src_pixels);
3235   free_aligned_buffer_page_end(tmp_pixels_r);
3236   free_aligned_buffer_page_end(tmp_pixels_g);
3237   free_aligned_buffer_page_end(tmp_pixels_b);
3238   free_aligned_buffer_page_end(dst_pixels_opt);
3239   free_aligned_buffer_page_end(dst_pixels_c);
3240 }
3241 
TEST_F(LibYUVPlanarTest,MergeARGBPlane_Opt)3242 TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
3243   const int kPixels = benchmark_width_ * benchmark_height_;
3244   align_buffer_page_end(src_pixels, kPixels * 4);
3245   align_buffer_page_end(tmp_pixels_r, kPixels);
3246   align_buffer_page_end(tmp_pixels_g, kPixels);
3247   align_buffer_page_end(tmp_pixels_b, kPixels);
3248   align_buffer_page_end(tmp_pixels_a, kPixels);
3249   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3250   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3251 
3252   MemRandomize(src_pixels, kPixels * 4);
3253   MemRandomize(tmp_pixels_r, kPixels);
3254   MemRandomize(tmp_pixels_g, kPixels);
3255   MemRandomize(tmp_pixels_b, kPixels);
3256   MemRandomize(tmp_pixels_a, kPixels);
3257   MemRandomize(dst_pixels_opt, kPixels * 4);
3258   MemRandomize(dst_pixels_c, kPixels * 4);
3259 
3260   MaskCpuFlags(disable_cpu_flags_);
3261   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3262                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3263                  benchmark_width_, tmp_pixels_a, benchmark_width_,
3264                  benchmark_width_, benchmark_height_);
3265   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3266                  tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3267                  dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3268                  benchmark_height_);
3269 
3270   MaskCpuFlags(benchmark_cpu_info_);
3271   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3272                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3273                  benchmark_width_, tmp_pixels_a, benchmark_width_,
3274                  benchmark_width_, benchmark_height_);
3275 
3276   for (int i = 0; i < benchmark_iterations_; ++i) {
3277     MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3278                    benchmark_width_, tmp_pixels_b, benchmark_width_,
3279                    tmp_pixels_a, benchmark_width_, dst_pixels_opt,
3280                    benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3281   }
3282 
3283   for (int i = 0; i < kPixels * 4; ++i) {
3284     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3285   }
3286 
3287   free_aligned_buffer_page_end(src_pixels);
3288   free_aligned_buffer_page_end(tmp_pixels_r);
3289   free_aligned_buffer_page_end(tmp_pixels_g);
3290   free_aligned_buffer_page_end(tmp_pixels_b);
3291   free_aligned_buffer_page_end(tmp_pixels_a);
3292   free_aligned_buffer_page_end(dst_pixels_opt);
3293   free_aligned_buffer_page_end(dst_pixels_c);
3294 }
3295 
TEST_F(LibYUVPlanarTest,SplitARGBPlane_Opt)3296 TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
3297   const int kPixels = benchmark_width_ * benchmark_height_;
3298   align_buffer_page_end(src_pixels, kPixels * 4);
3299   align_buffer_page_end(tmp_pixels_r, kPixels);
3300   align_buffer_page_end(tmp_pixels_g, kPixels);
3301   align_buffer_page_end(tmp_pixels_b, kPixels);
3302   align_buffer_page_end(tmp_pixels_a, kPixels);
3303   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3304   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3305 
3306   MemRandomize(src_pixels, kPixels * 4);
3307   MemRandomize(tmp_pixels_r, kPixels);
3308   MemRandomize(tmp_pixels_g, kPixels);
3309   MemRandomize(tmp_pixels_b, kPixels);
3310   MemRandomize(tmp_pixels_a, kPixels);
3311   MemRandomize(dst_pixels_opt, kPixels * 4);
3312   MemRandomize(dst_pixels_c, kPixels * 4);
3313 
3314   MaskCpuFlags(disable_cpu_flags_);
3315   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3316                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3317                  benchmark_width_, tmp_pixels_a, benchmark_width_,
3318                  benchmark_width_, benchmark_height_);
3319   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3320                  tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3321                  dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3322                  benchmark_height_);
3323 
3324   MaskCpuFlags(benchmark_cpu_info_);
3325   for (int i = 0; i < benchmark_iterations_; ++i) {
3326     SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3327                    benchmark_width_, tmp_pixels_g, benchmark_width_,
3328                    tmp_pixels_b, benchmark_width_, tmp_pixels_a,
3329                    benchmark_width_, benchmark_width_, benchmark_height_);
3330   }
3331 
3332   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3333                  tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3334                  dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3335                  benchmark_height_);
3336 
3337   for (int i = 0; i < kPixels * 4; ++i) {
3338     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3339   }
3340 
3341   free_aligned_buffer_page_end(src_pixels);
3342   free_aligned_buffer_page_end(tmp_pixels_r);
3343   free_aligned_buffer_page_end(tmp_pixels_g);
3344   free_aligned_buffer_page_end(tmp_pixels_b);
3345   free_aligned_buffer_page_end(tmp_pixels_a);
3346   free_aligned_buffer_page_end(dst_pixels_opt);
3347   free_aligned_buffer_page_end(dst_pixels_c);
3348 }
3349 
TEST_F(LibYUVPlanarTest,MergeXRGBPlane_Opt)3350 TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
3351   const int kPixels = benchmark_width_ * benchmark_height_;
3352   align_buffer_page_end(src_pixels, kPixels * 4);
3353   align_buffer_page_end(tmp_pixels_r, kPixels);
3354   align_buffer_page_end(tmp_pixels_g, kPixels);
3355   align_buffer_page_end(tmp_pixels_b, kPixels);
3356   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3357   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3358 
3359   MemRandomize(src_pixels, kPixels * 4);
3360   MemRandomize(tmp_pixels_r, kPixels);
3361   MemRandomize(tmp_pixels_g, kPixels);
3362   MemRandomize(tmp_pixels_b, kPixels);
3363   MemRandomize(dst_pixels_opt, kPixels * 4);
3364   MemRandomize(dst_pixels_c, kPixels * 4);
3365 
3366   MaskCpuFlags(disable_cpu_flags_);
3367   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3368                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3369                  benchmark_width_, NULL, 0, benchmark_width_,
3370                  benchmark_height_);
3371   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3372                  tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3373                  benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3374 
3375   MaskCpuFlags(benchmark_cpu_info_);
3376   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3377                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3378                  benchmark_width_, NULL, 0, benchmark_width_,
3379                  benchmark_height_);
3380 
3381   for (int i = 0; i < benchmark_iterations_; ++i) {
3382     MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3383                    benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0,
3384                    dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3385                    benchmark_height_);
3386   }
3387 
3388   for (int i = 0; i < kPixels * 4; ++i) {
3389     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3390   }
3391 
3392   free_aligned_buffer_page_end(src_pixels);
3393   free_aligned_buffer_page_end(tmp_pixels_r);
3394   free_aligned_buffer_page_end(tmp_pixels_g);
3395   free_aligned_buffer_page_end(tmp_pixels_b);
3396   free_aligned_buffer_page_end(dst_pixels_opt);
3397   free_aligned_buffer_page_end(dst_pixels_c);
3398 }
3399 
TEST_F(LibYUVPlanarTest,SplitXRGBPlane_Opt)3400 TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
3401   const int kPixels = benchmark_width_ * benchmark_height_;
3402   align_buffer_page_end(src_pixels, kPixels * 4);
3403   align_buffer_page_end(tmp_pixels_r, kPixels);
3404   align_buffer_page_end(tmp_pixels_g, kPixels);
3405   align_buffer_page_end(tmp_pixels_b, kPixels);
3406   align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3407   align_buffer_page_end(dst_pixels_c, kPixels * 4);
3408 
3409   MemRandomize(src_pixels, kPixels * 4);
3410   MemRandomize(tmp_pixels_r, kPixels);
3411   MemRandomize(tmp_pixels_g, kPixels);
3412   MemRandomize(tmp_pixels_b, kPixels);
3413   MemRandomize(dst_pixels_opt, kPixels * 4);
3414   MemRandomize(dst_pixels_c, kPixels * 4);
3415 
3416   MaskCpuFlags(disable_cpu_flags_);
3417   SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3418                  benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3419                  benchmark_width_, NULL, 0, benchmark_width_,
3420                  benchmark_height_);
3421   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3422                  tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3423                  benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3424 
3425   MaskCpuFlags(benchmark_cpu_info_);
3426   for (int i = 0; i < benchmark_iterations_; ++i) {
3427     SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3428                    benchmark_width_, tmp_pixels_g, benchmark_width_,
3429                    tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_,
3430                    benchmark_height_);
3431   }
3432 
3433   MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3434                  tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt,
3435                  benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3436 
3437   for (int i = 0; i < kPixels * 4; ++i) {
3438     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3439   }
3440 
3441   free_aligned_buffer_page_end(src_pixels);
3442   free_aligned_buffer_page_end(tmp_pixels_r);
3443   free_aligned_buffer_page_end(tmp_pixels_g);
3444   free_aligned_buffer_page_end(tmp_pixels_b);
3445   free_aligned_buffer_page_end(dst_pixels_opt);
3446   free_aligned_buffer_page_end(dst_pixels_c);
3447 }
3448 
3449 // Merge 4 channels
3450 #define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)      \
3451   TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) {                        \
3452     const int kWidth = W1280;                                               \
3453     const int kPixels = kWidth * benchmark_height_;                         \
3454     align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
3455     align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
3456     align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
3457     align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF);     \
3458     align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
3459     align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
3460     MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
3461     MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
3462     MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
3463     MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF);              \
3464     memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE));                   \
3465     memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE));                 \
3466     STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
3467     STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
3468     STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
3469     STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF);     \
3470     DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
3471     DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
3472     MaskCpuFlags(disable_cpu_flags_);                                       \
3473     FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
3474                 kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4,     \
3475                 kWidth, NEG benchmark_height_, DEPTH);                      \
3476     MaskCpuFlags(benchmark_cpu_info_);                                      \
3477     for (int i = 0; i < benchmark_iterations_; ++i) {                       \
3478       FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3479                   kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \
3480                   kWidth, NEG benchmark_height_, DEPTH);                    \
3481     }                                                                       \
3482     for (int i = 0; i < kPixels * 4; ++i) {                                 \
3483       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);                        \
3484     }                                                                       \
3485     free_aligned_buffer_page_end(src_memory_r);                             \
3486     free_aligned_buffer_page_end(src_memory_g);                             \
3487     free_aligned_buffer_page_end(src_memory_b);                             \
3488     free_aligned_buffer_page_end(src_memory_a);                             \
3489     free_aligned_buffer_page_end(dst_memory_c);                             \
3490     free_aligned_buffer_page_end(dst_memory_opt);                           \
3491   }
3492 
3493 // Merge 3 channel RGB into 4 channel XRGB with opaque alpha
3494 #define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)     \
3495   TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) {                 \
3496     const int kWidth = W1280;                                               \
3497     const int kPixels = kWidth * benchmark_height_;                         \
3498     align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
3499     align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
3500     align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
3501     align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
3502     align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
3503     MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
3504     MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
3505     MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
3506     memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE));                   \
3507     memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE));                 \
3508     STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
3509     STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
3510     STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
3511     DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
3512     DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
3513     MaskCpuFlags(disable_cpu_flags_);                                       \
3514     FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
3515                 kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth,          \
3516                 NEG benchmark_height_, DEPTH);                              \
3517     MaskCpuFlags(benchmark_cpu_info_);                                      \
3518     for (int i = 0; i < benchmark_iterations_; ++i) {                       \
3519       FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3520                   kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth,      \
3521                   NEG benchmark_height_, DEPTH);                            \
3522     }                                                                       \
3523     for (int i = 0; i < kPixels * 4; ++i) {                                 \
3524       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);                        \
3525     }                                                                       \
3526     free_aligned_buffer_page_end(src_memory_r);                             \
3527     free_aligned_buffer_page_end(src_memory_g);                             \
3528     free_aligned_buffer_page_end(src_memory_b);                             \
3529     free_aligned_buffer_page_end(dst_memory_c);                             \
3530     free_aligned_buffer_page_end(dst_memory_opt);                           \
3531   }
3532 
3533 #define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH)                              \
3534   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3535   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +,  \
3536                   2)                                                           \
3537   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0)  \
3538   TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)     \
3539   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +,   \
3540                    0)                                                          \
3541   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3542                    2)                                                          \
3543   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3544   TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3545 
3546 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10)
3547 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12)
3548 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16)
3549 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10)
3550 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12)
3551 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
3552 
3553 #define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF)      \
3554   TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) {                        \
3555     const int kWidth = W1280;                                               \
3556     const int kPixels = kWidth * benchmark_height_;                         \
3557     align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF);     \
3558     align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF);     \
3559     align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF);     \
3560     align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE));       \
3561     align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE));     \
3562     MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF);              \
3563     MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF);              \
3564     MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF);              \
3565     STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF);     \
3566     STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF);     \
3567     STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF);     \
3568     DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c);           \
3569     DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt);       \
3570     memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE));                   \
3571     memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE));                 \
3572     MaskCpuFlags(disable_cpu_flags_);                                       \
3573     FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b,   \
3574                 kWidth, dst_pixels_c, kWidth * 4, kWidth,                   \
3575                 NEG benchmark_height_, DEPTH);                              \
3576     MaskCpuFlags(benchmark_cpu_info_);                                      \
3577     for (int i = 0; i < benchmark_iterations_; ++i) {                       \
3578       FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3579                   kWidth, dst_pixels_opt, kWidth * 4, kWidth,               \
3580                   NEG benchmark_height_, DEPTH);                            \
3581     }                                                                       \
3582     for (int i = 0; i < kPixels * 4; ++i) {                                 \
3583       EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);                        \
3584     }                                                                       \
3585     free_aligned_buffer_page_end(src_memory_r);                             \
3586     free_aligned_buffer_page_end(src_memory_g);                             \
3587     free_aligned_buffer_page_end(src_memory_b);                             \
3588     free_aligned_buffer_page_end(dst_memory_c);                             \
3589     free_aligned_buffer_page_end(dst_memory_opt);                           \
3590   }
3591 
3592 #define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH)                              \
3593   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3594   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +,  \
3595                   2)                                                           \
3596   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0)  \
3597   TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3598 
3599 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
3600 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
3601 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
3602 
3603 // TODO(fbarchard): improve test for platforms and cpu detect
3604 #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest,MergeUVRow_16_Opt)3605 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
3606   // Round count up to multiple of 8
3607   const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
3608 
3609   align_buffer_page_end(src_pixels_u, kPixels * 2);
3610   align_buffer_page_end(src_pixels_v, kPixels * 2);
3611   align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
3612   align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
3613 
3614   MemRandomize(src_pixels_u, kPixels * 2);
3615   MemRandomize(src_pixels_v, kPixels * 2);
3616   memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
3617   memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
3618 
3619   MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3620                   reinterpret_cast<const uint16_t*>(src_pixels_v),
3621                   reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
3622 
3623   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3624   for (int i = 0; i < benchmark_iterations_; ++i) {
3625     if (has_avx2) {
3626       MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
3627                          reinterpret_cast<const uint16_t*>(src_pixels_v),
3628                          reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3629                          kPixels);
3630     } else {
3631       MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3632                       reinterpret_cast<const uint16_t*>(src_pixels_v),
3633                       reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3634                       kPixels);
3635     }
3636   }
3637 
3638   for (int i = 0; i < kPixels * 2 * 2; ++i) {
3639     EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
3640   }
3641 
3642   free_aligned_buffer_page_end(src_pixels_u);
3643   free_aligned_buffer_page_end(src_pixels_v);
3644   free_aligned_buffer_page_end(dst_pixels_uv_opt);
3645   free_aligned_buffer_page_end(dst_pixels_uv_c);
3646 }
3647 #endif
3648 
3649 // TODO(fbarchard): Improve test for more platforms.
3650 #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest,MultiplyRow_16_Opt)3651 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
3652   // Round count up to multiple of 32
3653   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3654 
3655   align_buffer_page_end(src_pixels_y, kPixels * 2);
3656   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3657   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3658 
3659   MemRandomize(src_pixels_y, kPixels * 2);
3660   memset(dst_pixels_y_opt, 0, kPixels * 2);
3661   memset(dst_pixels_y_c, 1, kPixels * 2);
3662 
3663   MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3664                    reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
3665 
3666   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3667   for (int i = 0; i < benchmark_iterations_; ++i) {
3668     if (has_avx2) {
3669       MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3670                           reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3671                           kPixels);
3672     } else {
3673       MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3674                        reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3675                        kPixels);
3676     }
3677   }
3678 
3679   for (int i = 0; i < kPixels * 2; ++i) {
3680     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3681   }
3682 
3683   free_aligned_buffer_page_end(src_pixels_y);
3684   free_aligned_buffer_page_end(dst_pixels_y_opt);
3685   free_aligned_buffer_page_end(dst_pixels_y_c);
3686 }
3687 #endif  // HAS_MULTIPLYROW_16_AVX2
3688 
TEST_F(LibYUVPlanarTest,Convert16To8Plane)3689 TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
3690   const int kPixels = benchmark_width_ * benchmark_height_;
3691   align_buffer_page_end(src_pixels_y, kPixels * 2);
3692   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3693   align_buffer_page_end(dst_pixels_y_c, kPixels);
3694 
3695   MemRandomize(src_pixels_y, kPixels * 2);
3696   memset(dst_pixels_y_opt, 0, kPixels);
3697   memset(dst_pixels_y_c, 1, kPixels);
3698 
3699   MaskCpuFlags(disable_cpu_flags_);
3700   Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3701                     benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
3702                     benchmark_width_, benchmark_height_);
3703   MaskCpuFlags(benchmark_cpu_info_);
3704 
3705   for (int i = 0; i < benchmark_iterations_; ++i) {
3706     Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3707                       benchmark_width_, dst_pixels_y_opt, benchmark_width_,
3708                       16384, benchmark_width_, benchmark_height_);
3709   }
3710 
3711   for (int i = 0; i < kPixels; ++i) {
3712     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3713   }
3714 
3715   free_aligned_buffer_page_end(src_pixels_y);
3716   free_aligned_buffer_page_end(dst_pixels_y_opt);
3717   free_aligned_buffer_page_end(dst_pixels_y_c);
3718 }
3719 
TEST_F(LibYUVPlanarTest,YUY2ToY)3720 TEST_F(LibYUVPlanarTest, YUY2ToY) {
3721   const int kPixels = benchmark_width_ * benchmark_height_;
3722   align_buffer_page_end(src_pixels_y, kPixels * 2);
3723   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3724   align_buffer_page_end(dst_pixels_y_c, kPixels);
3725 
3726   MemRandomize(src_pixels_y, kPixels * 2);
3727   memset(dst_pixels_y_opt, 0, kPixels);
3728   memset(dst_pixels_y_c, 1, kPixels);
3729 
3730   MaskCpuFlags(disable_cpu_flags_);
3731   YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3732           benchmark_width_, benchmark_height_);
3733   MaskCpuFlags(benchmark_cpu_info_);
3734 
3735   for (int i = 0; i < benchmark_iterations_; ++i) {
3736     YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3737             benchmark_width_, benchmark_width_, benchmark_height_);
3738   }
3739 
3740   for (int i = 0; i < kPixels; ++i) {
3741     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3742   }
3743 
3744   free_aligned_buffer_page_end(src_pixels_y);
3745   free_aligned_buffer_page_end(dst_pixels_y_opt);
3746   free_aligned_buffer_page_end(dst_pixels_y_c);
3747 }
3748 
TEST_F(LibYUVPlanarTest,UYVYToY)3749 TEST_F(LibYUVPlanarTest, UYVYToY) {
3750   const int kPixels = benchmark_width_ * benchmark_height_;
3751   align_buffer_page_end(src_pixels_y, kPixels * 2);
3752   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3753   align_buffer_page_end(dst_pixels_y_c, kPixels);
3754 
3755   MemRandomize(src_pixels_y, kPixels * 2);
3756   memset(dst_pixels_y_opt, 0, kPixels);
3757   memset(dst_pixels_y_c, 1, kPixels);
3758 
3759   MaskCpuFlags(disable_cpu_flags_);
3760   UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3761           benchmark_width_, benchmark_height_);
3762   MaskCpuFlags(benchmark_cpu_info_);
3763 
3764   for (int i = 0; i < benchmark_iterations_; ++i) {
3765     UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3766             benchmark_width_, benchmark_width_, benchmark_height_);
3767   }
3768 
3769   for (int i = 0; i < kPixels; ++i) {
3770     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3771   }
3772 
3773   free_aligned_buffer_page_end(src_pixels_y);
3774   free_aligned_buffer_page_end(dst_pixels_y_opt);
3775   free_aligned_buffer_page_end(dst_pixels_y_c);
3776 }
3777 
3778 #ifdef ENABLE_ROW_TESTS
3779 // TODO(fbarchard): Improve test for more platforms.
3780 #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert16To8Row_Opt)3781 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
3782   // AVX2 does multiple of 32, so round count up
3783   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3784   align_buffer_page_end(src_pixels_y, kPixels * 2);
3785   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3786   align_buffer_page_end(dst_pixels_y_c, kPixels);
3787 
3788   MemRandomize(src_pixels_y, kPixels * 2);
3789   // clamp source range to 10 bits.
3790   for (int i = 0; i < kPixels; ++i) {
3791     reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
3792   }
3793 
3794   memset(dst_pixels_y_opt, 0, kPixels);
3795   memset(dst_pixels_y_c, 1, kPixels);
3796 
3797   Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3798                     dst_pixels_y_c, 16384, kPixels);
3799 
3800   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3801   int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
3802   for (int i = 0; i < benchmark_iterations_; ++i) {
3803     if (has_avx2) {
3804       Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3805                            dst_pixels_y_opt, 16384, kPixels);
3806     } else if (has_ssse3) {
3807       Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
3808                             dst_pixels_y_opt, 16384, kPixels);
3809     } else {
3810       Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3811                         dst_pixels_y_opt, 16384, kPixels);
3812     }
3813   }
3814 
3815   for (int i = 0; i < kPixels; ++i) {
3816     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3817   }
3818 
3819   free_aligned_buffer_page_end(src_pixels_y);
3820   free_aligned_buffer_page_end(dst_pixels_y_opt);
3821   free_aligned_buffer_page_end(dst_pixels_y_c);
3822 }
3823 #endif  // HAS_CONVERT16TO8ROW_AVX2
3824 
3825 #ifdef HAS_UYVYTOYROW_NEON
TEST_F(LibYUVPlanarTest,UYVYToYRow_Opt)3826 TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
3827   // NEON does multiple of 16, so round count up
3828   const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
3829   align_buffer_page_end(src_pixels_y, kPixels * 2);
3830   align_buffer_page_end(dst_pixels_y_opt, kPixels);
3831   align_buffer_page_end(dst_pixels_y_c, kPixels);
3832 
3833   MemRandomize(src_pixels_y, kPixels * 2);
3834   memset(dst_pixels_y_opt, 0, kPixels);
3835   memset(dst_pixels_y_c, 1, kPixels);
3836 
3837   UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
3838 
3839   for (int i = 0; i < benchmark_iterations_; ++i) {
3840     UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
3841   }
3842 
3843   for (int i = 0; i < kPixels; ++i) {
3844     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3845   }
3846 
3847   free_aligned_buffer_page_end(src_pixels_y);
3848   free_aligned_buffer_page_end(dst_pixels_y_opt);
3849   free_aligned_buffer_page_end(dst_pixels_y_c);
3850 }
3851 #endif  // HAS_UYVYTOYROW_NEON
3852 
3853 #endif  // ENABLE_ROW_TESTS
3854 
TEST_F(LibYUVPlanarTest,Convert8To16Plane)3855 TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
3856   const int kPixels = benchmark_width_ * benchmark_height_;
3857   align_buffer_page_end(src_pixels_y, kPixels);
3858   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3859   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3860 
3861   MemRandomize(src_pixels_y, kPixels);
3862   memset(dst_pixels_y_opt, 0, kPixels * 2);
3863   memset(dst_pixels_y_c, 1, kPixels * 2);
3864 
3865   MaskCpuFlags(disable_cpu_flags_);
3866   Convert8To16Plane(src_pixels_y, benchmark_width_,
3867                     reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3868                     benchmark_width_, 1024, benchmark_width_,
3869                     benchmark_height_);
3870   MaskCpuFlags(benchmark_cpu_info_);
3871 
3872   for (int i = 0; i < benchmark_iterations_; ++i) {
3873     Convert8To16Plane(src_pixels_y, benchmark_width_,
3874                       reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
3875                       benchmark_width_, 1024, benchmark_width_,
3876                       benchmark_height_);
3877   }
3878 
3879   for (int i = 0; i < kPixels * 2; ++i) {
3880     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3881   }
3882 
3883   free_aligned_buffer_page_end(src_pixels_y);
3884   free_aligned_buffer_page_end(dst_pixels_y_opt);
3885   free_aligned_buffer_page_end(dst_pixels_y_c);
3886 }
3887 
3888 #ifdef ENABLE_ROW_TESTS
3889 // TODO(fbarchard): Improve test for more platforms.
3890 #ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert8To16Row_Opt)3891 TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
3892   const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3893   align_buffer_page_end(src_pixels_y, kPixels);
3894   align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3895   align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3896 
3897   MemRandomize(src_pixels_y, kPixels);
3898   memset(dst_pixels_y_opt, 0, kPixels * 2);
3899   memset(dst_pixels_y_c, 1, kPixels * 2);
3900 
3901   Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3902                     1024, kPixels);
3903 
3904   int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3905   int has_sse2 = TestCpuFlag(kCpuHasSSE2);
3906   for (int i = 0; i < benchmark_iterations_; ++i) {
3907     if (has_avx2) {
3908       Convert8To16Row_AVX2(src_pixels_y,
3909                            reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3910                            kPixels);
3911     } else if (has_sse2) {
3912       Convert8To16Row_SSE2(src_pixels_y,
3913                            reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3914                            kPixels);
3915     } else {
3916       Convert8To16Row_C(src_pixels_y,
3917                         reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3918                         kPixels);
3919     }
3920   }
3921 
3922   for (int i = 0; i < kPixels * 2; ++i) {
3923     EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3924   }
3925 
3926   free_aligned_buffer_page_end(src_pixels_y);
3927   free_aligned_buffer_page_end(dst_pixels_y_opt);
3928   free_aligned_buffer_page_end(dst_pixels_y_c);
3929 }
3930 #endif  // HAS_CONVERT8TO16ROW_AVX2
3931 
TestScaleMaxSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3932 float TestScaleMaxSamples(int benchmark_width,
3933                           int benchmark_height,
3934                           int benchmark_iterations,
3935                           float scale,
3936                           bool opt) {
3937   int i, j;
3938   float max_c, max_opt = 0.f;
3939   // NEON does multiple of 8, so round count up
3940   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3941   align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
3942   uint8_t* dst_c = orig_y + kPixels * 4 + 16;
3943   uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
3944 
3945   // Randomize works but may contain some denormals affecting performance.
3946   // MemRandomize(orig_y, kPixels * 4);
3947   // large values are problematic.  audio is really -1 to 1.
3948   for (i = 0; i < kPixels; ++i) {
3949     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3950   }
3951   memset(dst_c, 0, kPixels * 4);
3952   memset(dst_opt, 1, kPixels * 4);
3953 
3954   max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3955                             reinterpret_cast<float*>(dst_c), scale, kPixels);
3956 
3957   for (j = 0; j < benchmark_iterations; j++) {
3958     if (opt) {
3959 #ifdef HAS_SCALESUMSAMPLES_NEON
3960       max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
3961                                      reinterpret_cast<float*>(dst_opt), scale,
3962                                      kPixels);
3963 #else
3964       max_opt =
3965           ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3966                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3967 #endif
3968     } else {
3969       max_opt =
3970           ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3971                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
3972     }
3973   }
3974 
3975   float max_diff = FAbs(max_opt - max_c);
3976   for (i = 0; i < kPixels; ++i) {
3977     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3978                           (reinterpret_cast<float*>(dst_opt)[i]));
3979     if (abs_diff > max_diff) {
3980       max_diff = abs_diff;
3981     }
3982   }
3983 
3984   free_aligned_buffer_page_end(orig_y);
3985   return max_diff;
3986 }
3987 
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_C)3988 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
3989   float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3990                                    benchmark_iterations_, 1.2f, false);
3991   EXPECT_EQ(0, diff);
3992 }
3993 
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_Opt)3994 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
3995   float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3996                                    benchmark_iterations_, 1.2f, true);
3997   EXPECT_EQ(0, diff);
3998 }
3999 
TestScaleSumSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)4000 float TestScaleSumSamples(int benchmark_width,
4001                           int benchmark_height,
4002                           int benchmark_iterations,
4003                           float scale,
4004                           bool opt) {
4005   int i, j;
4006   float sum_c, sum_opt = 0.f;
4007   // NEON does multiple of 8, so round count up
4008   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
4009   align_buffer_page_end(orig_y, kPixels * 4 * 3);
4010   uint8_t* dst_c = orig_y + kPixels * 4;
4011   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4012 
4013   // Randomize works but may contain some denormals affecting performance.
4014   // MemRandomize(orig_y, kPixels * 4);
4015   // large values are problematic.  audio is really -1 to 1.
4016   for (i = 0; i < kPixels; ++i) {
4017     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4018   }
4019   memset(dst_c, 0, kPixels * 4);
4020   memset(dst_opt, 1, kPixels * 4);
4021 
4022   sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4023                             reinterpret_cast<float*>(dst_c), scale, kPixels);
4024 
4025   for (j = 0; j < benchmark_iterations; j++) {
4026     if (opt) {
4027 #ifdef HAS_SCALESUMSAMPLES_NEON
4028       sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
4029                                      reinterpret_cast<float*>(dst_opt), scale,
4030                                      kPixels);
4031 #else
4032       sum_opt =
4033           ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4034                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
4035 #endif
4036     } else {
4037       sum_opt =
4038           ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4039                             reinterpret_cast<float*>(dst_opt), scale, kPixels);
4040     }
4041   }
4042 
4043   float mse_opt = sum_opt / kPixels * 4;
4044   float mse_c = sum_c / kPixels * 4;
4045   float mse_error = FAbs(mse_opt - mse_c) / mse_c;
4046 
4047   // If the sum of a float is more than 4 million, small adds are round down on
4048   // float and produce different results with vectorized sum vs scalar sum.
4049   // Ignore the difference if the sum is large.
4050   float max_diff = 0.f;
4051   if (mse_error > 0.0001 && sum_c < 4000000) {  // allow .01% difference of mse
4052     max_diff = mse_error;
4053   }
4054 
4055   for (i = 0; i < kPixels; ++i) {
4056     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4057                           (reinterpret_cast<float*>(dst_opt)[i]));
4058     if (abs_diff > max_diff) {
4059       max_diff = abs_diff;
4060     }
4061   }
4062 
4063   free_aligned_buffer_page_end(orig_y);
4064   return max_diff;
4065 }
4066 
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_C)4067 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
4068   float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
4069                                    benchmark_iterations_, 1.2f, false);
4070   EXPECT_EQ(0, diff);
4071 }
4072 
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_Opt)4073 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
4074   float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
4075                                    benchmark_iterations_, 1.2f, true);
4076   EXPECT_EQ(0, diff);
4077 }
4078 
TestScaleSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)4079 float TestScaleSamples(int benchmark_width,
4080                        int benchmark_height,
4081                        int benchmark_iterations,
4082                        float scale,
4083                        bool opt) {
4084   int i, j;
4085   // NEON does multiple of 8, so round count up
4086   const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
4087   align_buffer_page_end(orig_y, kPixels * 4 * 3);
4088   uint8_t* dst_c = orig_y + kPixels * 4;
4089   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4090 
4091   // Randomize works but may contain some denormals affecting performance.
4092   // MemRandomize(orig_y, kPixels * 4);
4093   // large values are problematic.  audio is really -1 to 1.
4094   for (i = 0; i < kPixels; ++i) {
4095     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4096   }
4097   memset(dst_c, 0, kPixels * 4);
4098   memset(dst_opt, 1, kPixels * 4);
4099 
4100   ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4101                  reinterpret_cast<float*>(dst_c), scale, kPixels);
4102 
4103   for (j = 0; j < benchmark_iterations; j++) {
4104     if (opt) {
4105 #ifdef HAS_SCALESUMSAMPLES_NEON
4106       ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
4107                         reinterpret_cast<float*>(dst_opt), scale, kPixels);
4108 #else
4109       ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4110                      reinterpret_cast<float*>(dst_opt), scale, kPixels);
4111 #endif
4112     } else {
4113       ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4114                      reinterpret_cast<float*>(dst_opt), scale, kPixels);
4115     }
4116   }
4117 
4118   float max_diff = 0.f;
4119   for (i = 0; i < kPixels; ++i) {
4120     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4121                           (reinterpret_cast<float*>(dst_opt)[i]));
4122     if (abs_diff > max_diff) {
4123       max_diff = abs_diff;
4124     }
4125   }
4126 
4127   free_aligned_buffer_page_end(orig_y);
4128   return max_diff;
4129 }
4130 
TEST_F(LibYUVPlanarTest,TestScaleSamples_C)4131 TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
4132   float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4133                                 benchmark_iterations_, 1.2f, false);
4134   EXPECT_EQ(0, diff);
4135 }
4136 
TEST_F(LibYUVPlanarTest,TestScaleSamples_Opt)4137 TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
4138   float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4139                                 benchmark_iterations_, 1.2f, true);
4140   EXPECT_EQ(0, diff);
4141 }
4142 
TestCopySamples(int benchmark_width,int benchmark_height,int benchmark_iterations,bool opt)4143 float TestCopySamples(int benchmark_width,
4144                       int benchmark_height,
4145                       int benchmark_iterations,
4146                       bool opt) {
4147   int i, j;
4148   // NEON does multiple of 16 floats, so round count up
4149   const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
4150   align_buffer_page_end(orig_y, kPixels * 4 * 3);
4151   uint8_t* dst_c = orig_y + kPixels * 4;
4152   uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4153 
4154   // Randomize works but may contain some denormals affecting performance.
4155   // MemRandomize(orig_y, kPixels * 4);
4156   // large values are problematic.  audio is really -1 to 1.
4157   for (i = 0; i < kPixels; ++i) {
4158     (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4159   }
4160   memset(dst_c, 0, kPixels * 4);
4161   memset(dst_opt, 1, kPixels * 4);
4162 
4163   memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
4164          kPixels * 4);
4165 
4166   for (j = 0; j < benchmark_iterations; j++) {
4167     if (opt) {
4168 #ifdef HAS_COPYROW_NEON
4169       CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
4170 #else
4171       CopyRow_C(orig_y, dst_opt, kPixels * 4);
4172 #endif
4173     } else {
4174       CopyRow_C(orig_y, dst_opt, kPixels * 4);
4175     }
4176   }
4177 
4178   float max_diff = 0.f;
4179   for (i = 0; i < kPixels; ++i) {
4180     float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4181                           (reinterpret_cast<float*>(dst_opt)[i]));
4182     if (abs_diff > max_diff) {
4183       max_diff = abs_diff;
4184     }
4185   }
4186 
4187   free_aligned_buffer_page_end(orig_y);
4188   return max_diff;
4189 }
4190 
TEST_F(LibYUVPlanarTest,TestCopySamples_C)4191 TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
4192   float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4193                                benchmark_iterations_, false);
4194   EXPECT_EQ(0, diff);
4195 }
4196 
TEST_F(LibYUVPlanarTest,TestCopySamples_Opt)4197 TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
4198   float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4199                                benchmark_iterations_, true);
4200   EXPECT_EQ(0, diff);
4201 }
4202 
4203 extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
4204 extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
4205 
TEST_F(LibYUVPlanarTest,TestGaussRow_Opt)4206 TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
4207   SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
4208   SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
4209   SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
4210 
4211   memset(orig_pixels, 0, sizeof(orig_pixels));
4212   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4213   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4214 
4215   for (int i = 0; i < 1280 + 8; ++i) {
4216     orig_pixels[i] = i * 256;
4217   }
4218   GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4219   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4220 #if !defined(LIBYUV_DISABLE_NEON) && \
4221     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4222     int has_neon = TestCpuFlag(kCpuHasNEON);
4223     if (has_neon) {
4224       GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4225     } else {
4226       GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4227     }
4228 #else
4229     GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4230 #endif
4231   }
4232 
4233   for (int i = 0; i < 1280; ++i) {
4234     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4235   }
4236 
4237   EXPECT_EQ(dst_pixels_c[0],
4238             static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
4239   EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
4240 }
4241 
4242 extern "C" void GaussCol_NEON(const uint16_t* src0,
4243                               const uint16_t* src1,
4244                               const uint16_t* src2,
4245                               const uint16_t* src3,
4246                               const uint16_t* src4,
4247                               uint32_t* dst,
4248                               int width);
4249 
4250 extern "C" void GaussCol_C(const uint16_t* src0,
4251                            const uint16_t* src1,
4252                            const uint16_t* src2,
4253                            const uint16_t* src3,
4254                            const uint16_t* src4,
4255                            uint32_t* dst,
4256                            int width);
4257 
TEST_F(LibYUVPlanarTest,TestGaussCol_Opt)4258 TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
4259   SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
4260   SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
4261   SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
4262 
4263   memset(orig_pixels, 0, sizeof(orig_pixels));
4264   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4265   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4266 
4267   for (int i = 0; i < 1280 * 5; ++i) {
4268     orig_pixels[i] = static_cast<float>(i);
4269   }
4270   GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4271              &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
4272              1280);
4273   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4274 #if !defined(LIBYUV_DISABLE_NEON) && \
4275     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4276     int has_neon = TestCpuFlag(kCpuHasNEON);
4277     if (has_neon) {
4278       GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4279                     &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4280                     &dst_pixels_opt[0], 1280);
4281     } else {
4282       GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4283                  &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4284                  &dst_pixels_opt[0], 1280);
4285     }
4286 #else
4287     GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4288                &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4289                &dst_pixels_opt[0], 1280);
4290 #endif
4291   }
4292 
4293   for (int i = 0; i < 1280; ++i) {
4294     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4295   }
4296 }
4297 
TEST_F(LibYUVPlanarTest,TestGaussRow_F32_Opt)4298 TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
4299   SIMD_ALIGNED(float orig_pixels[1280 + 4]);
4300   SIMD_ALIGNED(float dst_pixels_c[1280]);
4301   SIMD_ALIGNED(float dst_pixels_opt[1280]);
4302 
4303   memset(orig_pixels, 0, sizeof(orig_pixels));
4304   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4305   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4306 
4307   for (int i = 0; i < 1280 + 4; ++i) {
4308     orig_pixels[i] = static_cast<float>(i);
4309   }
4310   GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4311   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4312 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4313     int has_neon = TestCpuFlag(kCpuHasNEON);
4314     if (has_neon) {
4315       GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4316     } else {
4317       GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4318     }
4319 #else
4320     GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4321 #endif
4322   }
4323 
4324   for (int i = 0; i < 1280; ++i) {
4325     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4326   }
4327 }
4328 
TEST_F(LibYUVPlanarTest,TestGaussCol_F32_Opt)4329 TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
4330   SIMD_ALIGNED(float dst_pixels_c[1280]);
4331   SIMD_ALIGNED(float dst_pixels_opt[1280]);
4332   align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4);  // 5 rows
4333   float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
4334 
4335   memset(orig_pixels, 0, 1280 * 5 * 4);
4336   memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4337   memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4338 
4339   for (int i = 0; i < 1280 * 5; ++i) {
4340     orig_pixels[i] = static_cast<float>(i);
4341   }
4342   GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4343                  &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4344                  &dst_pixels_c[0], 1280);
4345   for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4346 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4347     int has_neon = TestCpuFlag(kCpuHasNEON);
4348     if (has_neon) {
4349       GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
4350                         &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4351                         &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4352     } else {
4353       GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
4354                      &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4355                      &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4356     }
4357 #else
4358     GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4359                    &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4360                    &dst_pixels_opt[0], 1280);
4361 #endif
4362   }
4363 
4364   for (int i = 0; i < 1280; ++i) {
4365     EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4366   }
4367   free_aligned_buffer_page_end(orig_pixels_buf);
4368 }
4369 
TEST_F(LibYUVPlanarTest,SwapUVRow)4370 TEST_F(LibYUVPlanarTest, SwapUVRow) {
4371   const int kPixels = benchmark_width_ * benchmark_height_;
4372   void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
4373       SwapUVRow_C;
4374 
4375   align_buffer_page_end(src_pixels_vu, kPixels * 2);
4376   align_buffer_page_end(dst_pixels_uv, kPixels * 2);
4377   MemRandomize(src_pixels_vu, kPixels * 2);
4378   memset(dst_pixels_uv, 1, kPixels * 2);
4379 
4380 #if defined(HAS_SWAPUVROW_NEON)
4381   if (TestCpuFlag(kCpuHasNEON)) {
4382     SwapUVRow = SwapUVRow_Any_NEON;
4383     if (IS_ALIGNED(kPixels, 16)) {
4384       SwapUVRow = SwapUVRow_NEON;
4385     }
4386   }
4387 #endif
4388 
4389   for (int j = 0; j < benchmark_iterations_; j++) {
4390     SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
4391   }
4392   for (int i = 0; i < kPixels; ++i) {
4393     EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
4394     EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
4395   }
4396 
4397   free_aligned_buffer_page_end(src_pixels_vu);
4398   free_aligned_buffer_page_end(dst_pixels_uv);
4399 }
4400 #endif  // ENABLE_ROW_TESTS
4401 
TEST_F(LibYUVPlanarTest,TestGaussPlane_F32)4402 TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
4403   const int kSize = benchmark_width_ * benchmark_height_ * 4;
4404   align_buffer_page_end(orig_pixels, kSize);
4405   align_buffer_page_end(dst_pixels_opt, kSize);
4406   align_buffer_page_end(dst_pixels_c, kSize);
4407 
4408   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4409     ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
4410   }
4411   memset(dst_pixels_opt, 1, kSize);
4412   memset(dst_pixels_c, 2, kSize);
4413 
4414   MaskCpuFlags(disable_cpu_flags_);
4415   GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4416                  (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
4417                  benchmark_height_);
4418   MaskCpuFlags(benchmark_cpu_info_);
4419 
4420   for (int i = 0; i < benchmark_iterations_; ++i) {
4421     GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4422                    (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
4423                    benchmark_height_);
4424   }
4425   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4426     EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
4427         << i;
4428   }
4429 
4430   free_aligned_buffer_page_end(dst_pixels_c);
4431   free_aligned_buffer_page_end(dst_pixels_opt);
4432   free_aligned_buffer_page_end(orig_pixels);
4433 }
4434 
TEST_F(LibYUVPlanarTest,HalfMergeUVPlane_Opt)4435 TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
4436   int dst_width = (benchmark_width_ + 1) / 2;
4437   int dst_height = (benchmark_height_ + 1) / 2;
4438   align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
4439   align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
4440   align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
4441   align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
4442   align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4443   align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
4444 
4445   MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
4446   MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
4447   MemRandomize(tmp_pixels_u, dst_width * dst_height);
4448   MemRandomize(tmp_pixels_v, dst_width * dst_height);
4449   MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4450   MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
4451 
4452   MaskCpuFlags(disable_cpu_flags_);
4453   HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4454                    benchmark_width_, dst_pixels_uv_c, dst_width * 2,
4455                    benchmark_width_, benchmark_height_);
4456   MaskCpuFlags(benchmark_cpu_info_);
4457 
4458   for (int i = 0; i < benchmark_iterations_; ++i) {
4459     HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4460                      benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
4461                      benchmark_width_, benchmark_height_);
4462   }
4463 
4464   for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
4465     EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
4466   }
4467 
4468   free_aligned_buffer_page_end(src_pixels_u);
4469   free_aligned_buffer_page_end(src_pixels_v);
4470   free_aligned_buffer_page_end(tmp_pixels_u);
4471   free_aligned_buffer_page_end(tmp_pixels_v);
4472   free_aligned_buffer_page_end(dst_pixels_uv_opt);
4473   free_aligned_buffer_page_end(dst_pixels_uv_c);
4474 }
4475 
TEST_F(LibYUVPlanarTest,NV12Copy)4476 TEST_F(LibYUVPlanarTest, NV12Copy) {
4477   const int halfwidth = (benchmark_width_ + 1) >> 1;
4478   const int halfheight = (benchmark_height_ + 1) >> 1;
4479   align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4480   align_buffer_page_end(src_uv, halfwidth * 2 * halfheight);
4481   align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4482   align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight);
4483 
4484   MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4485   MemRandomize(src_uv, halfwidth * 2 * halfheight);
4486   MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4487   MemRandomize(dst_uv, halfwidth * 2 * halfheight);
4488 
4489   for (int i = 0; i < benchmark_iterations_; ++i) {
4490     NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y,
4491              benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_,
4492              benchmark_height_);
4493   }
4494 
4495   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4496     EXPECT_EQ(src_y[i], dst_y[i]);
4497   }
4498   for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4499     EXPECT_EQ(src_uv[i], dst_uv[i]);
4500   }
4501 
4502   free_aligned_buffer_page_end(src_y);
4503   free_aligned_buffer_page_end(src_uv);
4504   free_aligned_buffer_page_end(dst_y);
4505   free_aligned_buffer_page_end(dst_uv);
4506 }
4507 
TEST_F(LibYUVPlanarTest,NV21Copy)4508 TEST_F(LibYUVPlanarTest, NV21Copy) {
4509   const int halfwidth = (benchmark_width_ + 1) >> 1;
4510   const int halfheight = (benchmark_height_ + 1) >> 1;
4511   align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4512   align_buffer_page_end(src_vu, halfwidth * 2 * halfheight);
4513   align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4514   align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight);
4515 
4516   MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4517   MemRandomize(src_vu, halfwidth * 2 * halfheight);
4518   MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4519   MemRandomize(dst_vu, halfwidth * 2 * halfheight);
4520 
4521   for (int i = 0; i < benchmark_iterations_; ++i) {
4522     NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y,
4523              benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_,
4524              benchmark_height_);
4525   }
4526 
4527   for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4528     EXPECT_EQ(src_y[i], dst_y[i]);
4529   }
4530   for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4531     EXPECT_EQ(src_vu[i], dst_vu[i]);
4532   }
4533 
4534   free_aligned_buffer_page_end(src_y);
4535   free_aligned_buffer_page_end(src_vu);
4536   free_aligned_buffer_page_end(dst_y);
4537   free_aligned_buffer_page_end(dst_vu);
4538 }
4539 
4540 #if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \
4541     defined(__aarch64__)
4542 
TEST_F(LibYUVPlanarTest,TestConvertFP16ToFP32)4543 TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) {
4544   int i, j;
4545   const int y_plane_size = benchmark_width_ * benchmark_height_;
4546 
4547   align_buffer_page_end(orig_f, y_plane_size * 4);
4548   align_buffer_page_end(orig_y, y_plane_size * 2);
4549   align_buffer_page_end(dst_opt, y_plane_size * 4);
4550   align_buffer_page_end(rec_opt, y_plane_size * 2);
4551 
4552   for (i = 0; i < y_plane_size; ++i) {
4553     ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
4554   }
4555   memset(orig_y, 1, y_plane_size * 2);
4556   memset(dst_opt, 2, y_plane_size * 4);
4557   memset(rec_opt, 3, y_plane_size * 2);
4558 
4559   ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
4560                             y_plane_size);
4561 
4562   for (j = 0; j < benchmark_iterations_; j++) {
4563     ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt,
4564                               y_plane_size);
4565   }
4566 
4567   ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
4568                             y_plane_size);
4569 
4570   for (i = 0; i < y_plane_size; ++i) {
4571     EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
4572   }
4573 
4574   free_aligned_buffer_page_end(orig_f);
4575   free_aligned_buffer_page_end(orig_y);
4576   free_aligned_buffer_page_end(dst_opt);
4577   free_aligned_buffer_page_end(rec_opt);
4578 }
4579 
TEST_F(LibYUVPlanarTest,TestConvertFP16ToFP32Column)4580 TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) {
4581   int i, j;
4582   const int y_plane_size = benchmark_width_ * benchmark_height_;
4583 
4584   align_buffer_page_end(orig_f, y_plane_size * 4);
4585   align_buffer_page_end(orig_y, y_plane_size * 2);
4586   align_buffer_page_end(dst_opt, y_plane_size * 4);
4587   align_buffer_page_end(rec_opt, y_plane_size * 2);
4588 
4589   for (i = 0; i < y_plane_size; ++i) {
4590     ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
4591   }
4592   memset(orig_y, 1, y_plane_size * 2);
4593   memset(dst_opt, 2, y_plane_size * 4);
4594   memset(rec_opt, 3, y_plane_size * 2);
4595 
4596   ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
4597                             y_plane_size);
4598 
4599   for (j = 0; j < benchmark_iterations_; j++) {
4600     ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt,
4601                                  y_plane_size);
4602   }
4603 
4604   ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
4605                             y_plane_size);
4606 
4607   for (i = 0; i < y_plane_size; ++i) {
4608     EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
4609   }
4610 
4611   free_aligned_buffer_page_end(orig_f);
4612   free_aligned_buffer_page_end(orig_y);
4613   free_aligned_buffer_page_end(dst_opt);
4614   free_aligned_buffer_page_end(rec_opt);
4615 }
4616 
4617 #endif  // defined(ENABLE_ROW_TESTS) && defined(__aarch64__)
4618 
4619 }  // namespace libyuv
4620