1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdlib.h>
13 #include <time.h>
14
15 #include "../unit_test/unit_test.h"
16 #include "libyuv/compare.h"
17 #include "libyuv/convert.h"
18 #include "libyuv/convert_argb.h"
19 #include "libyuv/convert_from.h"
20 #include "libyuv/convert_from_argb.h"
21 #include "libyuv/cpu_id.h"
22 #include "libyuv/planar_functions.h"
23 #include "libyuv/rotate.h"
24 #include "libyuv/scale.h"
25
26 #ifdef ENABLE_ROW_TESTS
27 // row.h defines SIMD_ALIGNED, overriding unit_test.h
28 // TODO(fbarchard): Remove row.h from unittests. Test public functions.
29 #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
30 #endif
31
32 #if defined(LIBYUV_BIT_EXACT)
33 #define EXPECTED_UNATTENUATE_DIFF 0
34 #else
35 #define EXPECTED_UNATTENUATE_DIFF 2
36 #endif
37
38 namespace libyuv {
39
TEST_F(LibYUVPlanarTest,TestAttenuate)40 TEST_F(LibYUVPlanarTest, TestAttenuate) {
41 const int kSize = 1280 * 4;
42 align_buffer_page_end(orig_pixels, kSize);
43 align_buffer_page_end(atten_pixels, kSize);
44 align_buffer_page_end(unatten_pixels, kSize);
45 align_buffer_page_end(atten2_pixels, kSize);
46
47 // Test unattenuation clamps
48 orig_pixels[0 * 4 + 0] = 200u;
49 orig_pixels[0 * 4 + 1] = 129u;
50 orig_pixels[0 * 4 + 2] = 127u;
51 orig_pixels[0 * 4 + 3] = 128u;
52 // Test unattenuation transparent and opaque are unaffected
53 orig_pixels[1 * 4 + 0] = 16u;
54 orig_pixels[1 * 4 + 1] = 64u;
55 orig_pixels[1 * 4 + 2] = 192u;
56 orig_pixels[1 * 4 + 3] = 0u;
57 orig_pixels[2 * 4 + 0] = 16u;
58 orig_pixels[2 * 4 + 1] = 64u;
59 orig_pixels[2 * 4 + 2] = 192u;
60 orig_pixels[2 * 4 + 3] = 128u;
61 orig_pixels[3 * 4 + 0] = 16u;
62 orig_pixels[3 * 4 + 1] = 64u;
63 orig_pixels[3 * 4 + 2] = 192u;
64 orig_pixels[3 * 4 + 3] = 255u;
65 orig_pixels[4 * 4 + 0] = 255u;
66 orig_pixels[4 * 4 + 1] = 255u;
67 orig_pixels[4 * 4 + 2] = 255u;
68 orig_pixels[4 * 4 + 3] = 255u;
69
70 ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1);
71 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
72 EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
73 EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
74 EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]);
75 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]);
76 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
77 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
78 EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
79 EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]);
80 EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]);
81 EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]);
82 EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]);
83 EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]);
84 EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]);
85 EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]);
86 EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]);
87 EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]);
88 EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]);
89 EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]);
90 EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]);
91
92 ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1);
93 EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]);
94 EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]);
95 EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]);
96 EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]);
97 EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]);
98 EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]);
99 EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]);
100 EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]);
101 EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]);
102 EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]);
103 EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]);
104 EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]);
105 EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]);
106 EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]);
107 EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]);
108 EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]);
109 EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]);
110 EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]);
111 EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]);
112 EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]);
113
114 // test 255
115 for (int i = 0; i < 256; ++i) {
116 orig_pixels[i * 4 + 0] = i;
117 orig_pixels[i * 4 + 1] = 0;
118 orig_pixels[i * 4 + 2] = 0;
119 orig_pixels[i * 4 + 3] = 255;
120 }
121 ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1);
122 for (int i = 0; i < 256; ++i) {
123 EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]);
124 EXPECT_EQ(0, atten_pixels[i * 4 + 1]);
125 EXPECT_EQ(0, atten_pixels[i * 4 + 2]);
126 EXPECT_EQ(255, atten_pixels[i * 4 + 3]);
127 }
128
129 for (int i = 0; i < 1280; ++i) {
130 orig_pixels[i * 4 + 0] = i;
131 orig_pixels[i * 4 + 1] = i / 2;
132 orig_pixels[i * 4 + 2] = i / 3;
133 orig_pixels[i * 4 + 3] = i;
134 }
135 ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 1280, 1);
136 ARGBUnattenuate(atten_pixels, 0, unatten_pixels, 0, 1280, 1);
137 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
138 ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
139 }
140 for (int i = 0; i < 1280; ++i) {
141 EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1);
142 EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1);
143 EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1);
144 EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1);
145 }
146 // Make sure transparent, 50% and opaque are fully accurate.
147 EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
148 EXPECT_EQ(0, atten_pixels[0 * 4 + 1]);
149 EXPECT_EQ(0, atten_pixels[0 * 4 + 2]);
150 EXPECT_EQ(0, atten_pixels[0 * 4 + 3]);
151 EXPECT_EQ(64, atten_pixels[128 * 4 + 0]);
152 EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
153 EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
154 EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
155 EXPECT_EQ(255, atten_pixels[255 * 4 + 0]);
156 EXPECT_EQ(127, atten_pixels[255 * 4 + 1]);
157 EXPECT_EQ(85, atten_pixels[255 * 4 + 2]);
158 EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
159
160 free_aligned_buffer_page_end(atten2_pixels);
161 free_aligned_buffer_page_end(unatten_pixels);
162 free_aligned_buffer_page_end(atten_pixels);
163 free_aligned_buffer_page_end(orig_pixels);
164 }
165
TestAttenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)166 static int TestAttenuateI(int width,
167 int height,
168 int benchmark_iterations,
169 int disable_cpu_flags,
170 int benchmark_cpu_info,
171 int invert,
172 int off) {
173 if (width < 1) {
174 width = 1;
175 }
176 const int kBpp = 4;
177 const int kStride = width * kBpp;
178 align_buffer_page_end(src_argb, kStride * height + off);
179 align_buffer_page_end(dst_argb_c, kStride * height);
180 align_buffer_page_end(dst_argb_opt, kStride * height);
181 for (int i = 0; i < kStride * height; ++i) {
182 src_argb[i + off] = (fastrand() & 0xff);
183 }
184 memset(dst_argb_c, 0, kStride * height);
185 memset(dst_argb_opt, 0, kStride * height);
186
187 MaskCpuFlags(disable_cpu_flags);
188 ARGBAttenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
189 invert * height);
190 MaskCpuFlags(benchmark_cpu_info);
191 for (int i = 0; i < benchmark_iterations; ++i) {
192 ARGBAttenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
193 invert * height);
194 }
195 int max_diff = 0;
196 for (int i = 0; i < kStride * height; ++i) {
197 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
198 static_cast<int>(dst_argb_opt[i]));
199 if (abs_diff > max_diff) {
200 max_diff = abs_diff;
201 }
202 }
203 free_aligned_buffer_page_end(src_argb);
204 free_aligned_buffer_page_end(dst_argb_c);
205 free_aligned_buffer_page_end(dst_argb_opt);
206 return max_diff;
207 }
208
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Any)209 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) {
210 int max_diff = TestAttenuateI(benchmark_width_ + 1, benchmark_height_,
211 benchmark_iterations_, disable_cpu_flags_,
212 benchmark_cpu_info_, +1, 0);
213
214 EXPECT_EQ(max_diff, 0);
215 }
216
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Unaligned)217 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
218 int max_diff =
219 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
220 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
221 EXPECT_EQ(max_diff, 0);
222 }
223
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Invert)224 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
225 int max_diff =
226 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
227 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
228 EXPECT_EQ(max_diff, 0);
229 }
230
TEST_F(LibYUVPlanarTest,ARGBAttenuate_Opt)231 TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
232 int max_diff =
233 TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
234 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
235 EXPECT_EQ(max_diff, 0);
236 }
237
TestUnattenuateI(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)238 static int TestUnattenuateI(int width,
239 int height,
240 int benchmark_iterations,
241 int disable_cpu_flags,
242 int benchmark_cpu_info,
243 int invert,
244 int off) {
245 if (width < 1) {
246 width = 1;
247 }
248 const int kBpp = 4;
249 const int kStride = width * kBpp;
250 align_buffer_page_end(src_argb, kStride * height + off);
251 align_buffer_page_end(dst_argb_c, kStride * height);
252 align_buffer_page_end(dst_argb_opt, kStride * height);
253 for (int i = 0; i < kStride * height; ++i) {
254 src_argb[i + off] = (fastrand() & 0xff);
255 }
256 ARGBAttenuate(src_argb + off, kStride, src_argb + off, kStride, width,
257 height);
258 memset(dst_argb_c, 0, kStride * height);
259 memset(dst_argb_opt, 0, kStride * height);
260
261 MaskCpuFlags(disable_cpu_flags);
262 ARGBUnattenuate(src_argb + off, kStride, dst_argb_c, kStride, width,
263 invert * height);
264 MaskCpuFlags(benchmark_cpu_info);
265 for (int i = 0; i < benchmark_iterations; ++i) {
266 ARGBUnattenuate(src_argb + off, kStride, dst_argb_opt, kStride, width,
267 invert * height);
268 }
269 int max_diff = 0;
270 for (int i = 0; i < kStride * height; ++i) {
271 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
272 static_cast<int>(dst_argb_opt[i]));
273 if (abs_diff > max_diff) {
274 max_diff = abs_diff;
275 }
276 }
277 free_aligned_buffer_page_end(src_argb);
278 free_aligned_buffer_page_end(dst_argb_c);
279 free_aligned_buffer_page_end(dst_argb_opt);
280 return max_diff;
281 }
282
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Any)283 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) {
284 int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
285 benchmark_iterations_, disable_cpu_flags_,
286 benchmark_cpu_info_, +1, 0);
287 EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
288 }
289
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Unaligned)290 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
291 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
292 benchmark_iterations_, disable_cpu_flags_,
293 benchmark_cpu_info_, +1, 1);
294 EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
295 }
296
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Invert)297 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
298 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
299 benchmark_iterations_, disable_cpu_flags_,
300 benchmark_cpu_info_, -1, 0);
301 EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
302 }
303
TEST_F(LibYUVPlanarTest,ARGBUnattenuate_Opt)304 TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
305 int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
306 benchmark_iterations_, disable_cpu_flags_,
307 benchmark_cpu_info_, +1, 0);
308 EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
309 }
310
TEST_F(LibYUVPlanarTest,TestARGBComputeCumulativeSum)311 TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
312 SIMD_ALIGNED(uint8_t orig_pixels[16][16][4]);
313 SIMD_ALIGNED(int32_t added_pixels[16][16][4]);
314
315 for (int y = 0; y < 16; ++y) {
316 for (int x = 0; x < 16; ++x) {
317 orig_pixels[y][x][0] = 1u;
318 orig_pixels[y][x][1] = 2u;
319 orig_pixels[y][x][2] = 3u;
320 orig_pixels[y][x][3] = 255u;
321 }
322 }
323
324 ARGBComputeCumulativeSum(&orig_pixels[0][0][0], 16 * 4,
325 &added_pixels[0][0][0], 16 * 4, 16, 16);
326
327 for (int y = 0; y < 16; ++y) {
328 for (int x = 0; x < 16; ++x) {
329 EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]);
330 EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]);
331 EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]);
332 EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]);
333 }
334 }
335 }
336
337 // near is for legacy platforms.
TEST_F(LibYUVPlanarTest,TestARGBGray)338 TEST_F(LibYUVPlanarTest, TestARGBGray) {
339 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
340 memset(orig_pixels, 0, sizeof(orig_pixels));
341
342 // Test blue
343 orig_pixels[0][0] = 255u;
344 orig_pixels[0][1] = 0u;
345 orig_pixels[0][2] = 0u;
346 orig_pixels[0][3] = 128u;
347 // Test green
348 orig_pixels[1][0] = 0u;
349 orig_pixels[1][1] = 255u;
350 orig_pixels[1][2] = 0u;
351 orig_pixels[1][3] = 0u;
352 // Test red
353 orig_pixels[2][0] = 0u;
354 orig_pixels[2][1] = 0u;
355 orig_pixels[2][2] = 255u;
356 orig_pixels[2][3] = 255u;
357 // Test black
358 orig_pixels[3][0] = 0u;
359 orig_pixels[3][1] = 0u;
360 orig_pixels[3][2] = 0u;
361 orig_pixels[3][3] = 255u;
362 // Test white
363 orig_pixels[4][0] = 255u;
364 orig_pixels[4][1] = 255u;
365 orig_pixels[4][2] = 255u;
366 orig_pixels[4][3] = 255u;
367 // Test color
368 orig_pixels[5][0] = 16u;
369 orig_pixels[5][1] = 64u;
370 orig_pixels[5][2] = 192u;
371 orig_pixels[5][3] = 224u;
372 // Do 16 to test asm version.
373 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1);
374 EXPECT_NEAR(29u, orig_pixels[0][0], 1);
375 EXPECT_NEAR(29u, orig_pixels[0][1], 1);
376 EXPECT_NEAR(29u, orig_pixels[0][2], 1);
377 EXPECT_EQ(128u, orig_pixels[0][3]);
378 EXPECT_EQ(149u, orig_pixels[1][0]);
379 EXPECT_EQ(149u, orig_pixels[1][1]);
380 EXPECT_EQ(149u, orig_pixels[1][2]);
381 EXPECT_EQ(0u, orig_pixels[1][3]);
382 EXPECT_NEAR(77u, orig_pixels[2][0], 1);
383 EXPECT_NEAR(77u, orig_pixels[2][1], 1);
384 EXPECT_NEAR(77u, orig_pixels[2][2], 1);
385 EXPECT_EQ(255u, orig_pixels[2][3]);
386 EXPECT_EQ(0u, orig_pixels[3][0]);
387 EXPECT_EQ(0u, orig_pixels[3][1]);
388 EXPECT_EQ(0u, orig_pixels[3][2]);
389 EXPECT_EQ(255u, orig_pixels[3][3]);
390 EXPECT_EQ(255u, orig_pixels[4][0]);
391 EXPECT_EQ(255u, orig_pixels[4][1]);
392 EXPECT_EQ(255u, orig_pixels[4][2]);
393 EXPECT_EQ(255u, orig_pixels[4][3]);
394 EXPECT_NEAR(97u, orig_pixels[5][0], 1);
395 EXPECT_NEAR(97u, orig_pixels[5][1], 1);
396 EXPECT_NEAR(97u, orig_pixels[5][2], 1);
397 EXPECT_EQ(224u, orig_pixels[5][3]);
398 for (int i = 0; i < 1280; ++i) {
399 orig_pixels[i][0] = i;
400 orig_pixels[i][1] = i / 2;
401 orig_pixels[i][2] = i / 3;
402 orig_pixels[i][3] = i;
403 }
404 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
405 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
406 }
407 }
408
TEST_F(LibYUVPlanarTest,TestARGBGrayTo)409 TEST_F(LibYUVPlanarTest, TestARGBGrayTo) {
410 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
411 SIMD_ALIGNED(uint8_t gray_pixels[1280][4]);
412 memset(orig_pixels, 0, sizeof(orig_pixels));
413
414 // Test blue
415 orig_pixels[0][0] = 255u;
416 orig_pixels[0][1] = 0u;
417 orig_pixels[0][2] = 0u;
418 orig_pixels[0][3] = 128u;
419 // Test green
420 orig_pixels[1][0] = 0u;
421 orig_pixels[1][1] = 255u;
422 orig_pixels[1][2] = 0u;
423 orig_pixels[1][3] = 0u;
424 // Test red
425 orig_pixels[2][0] = 0u;
426 orig_pixels[2][1] = 0u;
427 orig_pixels[2][2] = 255u;
428 orig_pixels[2][3] = 255u;
429 // Test black
430 orig_pixels[3][0] = 0u;
431 orig_pixels[3][1] = 0u;
432 orig_pixels[3][2] = 0u;
433 orig_pixels[3][3] = 255u;
434 // Test white
435 orig_pixels[4][0] = 255u;
436 orig_pixels[4][1] = 255u;
437 orig_pixels[4][2] = 255u;
438 orig_pixels[4][3] = 255u;
439 // Test color
440 orig_pixels[5][0] = 16u;
441 orig_pixels[5][1] = 64u;
442 orig_pixels[5][2] = 192u;
443 orig_pixels[5][3] = 224u;
444 // Do 16 to test asm version.
445 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1);
446 EXPECT_NEAR(30u, gray_pixels[0][0], 1);
447 EXPECT_NEAR(30u, gray_pixels[0][1], 1);
448 EXPECT_NEAR(30u, gray_pixels[0][2], 1);
449 EXPECT_NEAR(128u, gray_pixels[0][3], 1);
450 EXPECT_NEAR(149u, gray_pixels[1][0], 1);
451 EXPECT_NEAR(149u, gray_pixels[1][1], 1);
452 EXPECT_NEAR(149u, gray_pixels[1][2], 1);
453 EXPECT_NEAR(0u, gray_pixels[1][3], 1);
454 EXPECT_NEAR(76u, gray_pixels[2][0], 1);
455 EXPECT_NEAR(76u, gray_pixels[2][1], 1);
456 EXPECT_NEAR(76u, gray_pixels[2][2], 1);
457 EXPECT_NEAR(255u, gray_pixels[2][3], 1);
458 EXPECT_NEAR(0u, gray_pixels[3][0], 1);
459 EXPECT_NEAR(0u, gray_pixels[3][1], 1);
460 EXPECT_NEAR(0u, gray_pixels[3][2], 1);
461 EXPECT_NEAR(255u, gray_pixels[3][3], 1);
462 EXPECT_NEAR(255u, gray_pixels[4][0], 1);
463 EXPECT_NEAR(255u, gray_pixels[4][1], 1);
464 EXPECT_NEAR(255u, gray_pixels[4][2], 1);
465 EXPECT_NEAR(255u, gray_pixels[4][3], 1);
466 EXPECT_NEAR(96u, gray_pixels[5][0], 1);
467 EXPECT_NEAR(96u, gray_pixels[5][1], 1);
468 EXPECT_NEAR(96u, gray_pixels[5][2], 1);
469 EXPECT_NEAR(224u, gray_pixels[5][3], 1);
470 for (int i = 0; i < 1280; ++i) {
471 orig_pixels[i][0] = i;
472 orig_pixels[i][1] = i / 2;
473 orig_pixels[i][2] = i / 3;
474 orig_pixels[i][3] = i;
475 }
476 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
477 ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 1280, 1);
478 }
479
480 for (int i = 0; i < 256; ++i) {
481 orig_pixels[i][0] = i;
482 orig_pixels[i][1] = i;
483 orig_pixels[i][2] = i;
484 orig_pixels[i][3] = i;
485 }
486 ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1);
487 for (int i = 0; i < 256; ++i) {
488 EXPECT_EQ(i, orig_pixels[i][0]);
489 EXPECT_EQ(i, orig_pixels[i][1]);
490 EXPECT_EQ(i, orig_pixels[i][2]);
491 EXPECT_EQ(i, orig_pixels[i][3]);
492 }
493 }
494
TEST_F(LibYUVPlanarTest,TestARGBSepia)495 TEST_F(LibYUVPlanarTest, TestARGBSepia) {
496 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
497 memset(orig_pixels, 0, sizeof(orig_pixels));
498
499 // Test blue
500 orig_pixels[0][0] = 255u;
501 orig_pixels[0][1] = 0u;
502 orig_pixels[0][2] = 0u;
503 orig_pixels[0][3] = 128u;
504 // Test green
505 orig_pixels[1][0] = 0u;
506 orig_pixels[1][1] = 255u;
507 orig_pixels[1][2] = 0u;
508 orig_pixels[1][3] = 0u;
509 // Test red
510 orig_pixels[2][0] = 0u;
511 orig_pixels[2][1] = 0u;
512 orig_pixels[2][2] = 255u;
513 orig_pixels[2][3] = 255u;
514 // Test black
515 orig_pixels[3][0] = 0u;
516 orig_pixels[3][1] = 0u;
517 orig_pixels[3][2] = 0u;
518 orig_pixels[3][3] = 255u;
519 // Test white
520 orig_pixels[4][0] = 255u;
521 orig_pixels[4][1] = 255u;
522 orig_pixels[4][2] = 255u;
523 orig_pixels[4][3] = 255u;
524 // Test color
525 orig_pixels[5][0] = 16u;
526 orig_pixels[5][1] = 64u;
527 orig_pixels[5][2] = 192u;
528 orig_pixels[5][3] = 224u;
529 // Do 16 to test asm version.
530 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1);
531 EXPECT_EQ(33u, orig_pixels[0][0]);
532 EXPECT_EQ(43u, orig_pixels[0][1]);
533 EXPECT_EQ(47u, orig_pixels[0][2]);
534 EXPECT_EQ(128u, orig_pixels[0][3]);
535 EXPECT_EQ(135u, orig_pixels[1][0]);
536 EXPECT_EQ(175u, orig_pixels[1][1]);
537 EXPECT_EQ(195u, orig_pixels[1][2]);
538 EXPECT_EQ(0u, orig_pixels[1][3]);
539 EXPECT_EQ(69u, orig_pixels[2][0]);
540 EXPECT_EQ(89u, orig_pixels[2][1]);
541 EXPECT_EQ(99u, orig_pixels[2][2]);
542 EXPECT_EQ(255u, orig_pixels[2][3]);
543 EXPECT_EQ(0u, orig_pixels[3][0]);
544 EXPECT_EQ(0u, orig_pixels[3][1]);
545 EXPECT_EQ(0u, orig_pixels[3][2]);
546 EXPECT_EQ(255u, orig_pixels[3][3]);
547 EXPECT_EQ(239u, orig_pixels[4][0]);
548 EXPECT_EQ(255u, orig_pixels[4][1]);
549 EXPECT_EQ(255u, orig_pixels[4][2]);
550 EXPECT_EQ(255u, orig_pixels[4][3]);
551 EXPECT_EQ(88u, orig_pixels[5][0]);
552 EXPECT_EQ(114u, orig_pixels[5][1]);
553 EXPECT_EQ(127u, orig_pixels[5][2]);
554 EXPECT_EQ(224u, orig_pixels[5][3]);
555
556 for (int i = 0; i < 1280; ++i) {
557 orig_pixels[i][0] = i;
558 orig_pixels[i][1] = i / 2;
559 orig_pixels[i][2] = i / 3;
560 orig_pixels[i][3] = i;
561 }
562 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
563 ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 1280, 1);
564 }
565 }
566
TEST_F(LibYUVPlanarTest,TestARGBColorMatrix)567 TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) {
568 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
569 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
570 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
571
572 // Matrix for Sepia.
573 SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
574 17 / 2, 68 / 2, 35 / 2, 0, 22 / 2, 88 / 2, 45 / 2, 0,
575 24 / 2, 98 / 2, 50 / 2, 0, 0, 0, 0, 64, // Copy alpha.
576 };
577 memset(orig_pixels, 0, sizeof(orig_pixels));
578
579 // Test blue
580 orig_pixels[0][0] = 255u;
581 orig_pixels[0][1] = 0u;
582 orig_pixels[0][2] = 0u;
583 orig_pixels[0][3] = 128u;
584 // Test green
585 orig_pixels[1][0] = 0u;
586 orig_pixels[1][1] = 255u;
587 orig_pixels[1][2] = 0u;
588 orig_pixels[1][3] = 0u;
589 // Test red
590 orig_pixels[2][0] = 0u;
591 orig_pixels[2][1] = 0u;
592 orig_pixels[2][2] = 255u;
593 orig_pixels[2][3] = 255u;
594 // Test color
595 orig_pixels[3][0] = 16u;
596 orig_pixels[3][1] = 64u;
597 orig_pixels[3][2] = 192u;
598 orig_pixels[3][3] = 224u;
599 // Do 16 to test asm version.
600 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
601 &kRGBToSepia[0], 16, 1);
602 EXPECT_EQ(31u, dst_pixels_opt[0][0]);
603 EXPECT_EQ(43u, dst_pixels_opt[0][1]);
604 EXPECT_EQ(47u, dst_pixels_opt[0][2]);
605 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
606 EXPECT_EQ(135u, dst_pixels_opt[1][0]);
607 EXPECT_EQ(175u, dst_pixels_opt[1][1]);
608 EXPECT_EQ(195u, dst_pixels_opt[1][2]);
609 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
610 EXPECT_EQ(67u, dst_pixels_opt[2][0]);
611 EXPECT_EQ(87u, dst_pixels_opt[2][1]);
612 EXPECT_EQ(99u, dst_pixels_opt[2][2]);
613 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
614 EXPECT_EQ(87u, dst_pixels_opt[3][0]);
615 EXPECT_EQ(112u, dst_pixels_opt[3][1]);
616 EXPECT_EQ(127u, dst_pixels_opt[3][2]);
617 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
618
619 for (int i = 0; i < 1280; ++i) {
620 orig_pixels[i][0] = i;
621 orig_pixels[i][1] = i / 2;
622 orig_pixels[i][2] = i / 3;
623 orig_pixels[i][3] = i;
624 }
625 MaskCpuFlags(disable_cpu_flags_);
626 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
627 &kRGBToSepia[0], 1280, 1);
628 MaskCpuFlags(benchmark_cpu_info_);
629
630 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
631 ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
632 &kRGBToSepia[0], 1280, 1);
633 }
634
635 for (int i = 0; i < 1280; ++i) {
636 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
637 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
638 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
639 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
640 }
641 }
642
TEST_F(LibYUVPlanarTest,TestRGBColorMatrix)643 TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) {
644 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
645
646 // Matrix for Sepia.
647 SIMD_ALIGNED(static const int8_t kRGBToSepia[]) = {
648 17, 68, 35, 0, 22, 88, 45, 0,
649 24, 98, 50, 0, 0, 0, 0, 0, // Unused but makes matrix 16 bytes.
650 };
651 memset(orig_pixels, 0, sizeof(orig_pixels));
652
653 // Test blue
654 orig_pixels[0][0] = 255u;
655 orig_pixels[0][1] = 0u;
656 orig_pixels[0][2] = 0u;
657 orig_pixels[0][3] = 128u;
658 // Test green
659 orig_pixels[1][0] = 0u;
660 orig_pixels[1][1] = 255u;
661 orig_pixels[1][2] = 0u;
662 orig_pixels[1][3] = 0u;
663 // Test red
664 orig_pixels[2][0] = 0u;
665 orig_pixels[2][1] = 0u;
666 orig_pixels[2][2] = 255u;
667 orig_pixels[2][3] = 255u;
668 // Test color
669 orig_pixels[3][0] = 16u;
670 orig_pixels[3][1] = 64u;
671 orig_pixels[3][2] = 192u;
672 orig_pixels[3][3] = 224u;
673 // Do 16 to test asm version.
674 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1);
675 EXPECT_EQ(31u, orig_pixels[0][0]);
676 EXPECT_EQ(43u, orig_pixels[0][1]);
677 EXPECT_EQ(47u, orig_pixels[0][2]);
678 EXPECT_EQ(128u, orig_pixels[0][3]);
679 EXPECT_EQ(135u, orig_pixels[1][0]);
680 EXPECT_EQ(175u, orig_pixels[1][1]);
681 EXPECT_EQ(195u, orig_pixels[1][2]);
682 EXPECT_EQ(0u, orig_pixels[1][3]);
683 EXPECT_EQ(67u, orig_pixels[2][0]);
684 EXPECT_EQ(87u, orig_pixels[2][1]);
685 EXPECT_EQ(99u, orig_pixels[2][2]);
686 EXPECT_EQ(255u, orig_pixels[2][3]);
687 EXPECT_EQ(87u, orig_pixels[3][0]);
688 EXPECT_EQ(112u, orig_pixels[3][1]);
689 EXPECT_EQ(127u, orig_pixels[3][2]);
690 EXPECT_EQ(224u, orig_pixels[3][3]);
691
692 for (int i = 0; i < 1280; ++i) {
693 orig_pixels[i][0] = i;
694 orig_pixels[i][1] = i / 2;
695 orig_pixels[i][2] = i / 3;
696 orig_pixels[i][3] = i;
697 }
698 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
699 RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 1280, 1);
700 }
701 }
702
TEST_F(LibYUVPlanarTest,TestARGBColorTable)703 TEST_F(LibYUVPlanarTest, TestARGBColorTable) {
704 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
705 memset(orig_pixels, 0, sizeof(orig_pixels));
706
707 // Matrix for Sepia.
708 static const uint8_t kARGBTable[256 * 4] = {
709 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
710 };
711
712 orig_pixels[0][0] = 0u;
713 orig_pixels[0][1] = 0u;
714 orig_pixels[0][2] = 0u;
715 orig_pixels[0][3] = 0u;
716 orig_pixels[1][0] = 1u;
717 orig_pixels[1][1] = 1u;
718 orig_pixels[1][2] = 1u;
719 orig_pixels[1][3] = 1u;
720 orig_pixels[2][0] = 2u;
721 orig_pixels[2][1] = 2u;
722 orig_pixels[2][2] = 2u;
723 orig_pixels[2][3] = 2u;
724 orig_pixels[3][0] = 0u;
725 orig_pixels[3][1] = 1u;
726 orig_pixels[3][2] = 2u;
727 orig_pixels[3][3] = 3u;
728 // Do 16 to test asm version.
729 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
730 EXPECT_EQ(1u, orig_pixels[0][0]);
731 EXPECT_EQ(2u, orig_pixels[0][1]);
732 EXPECT_EQ(3u, orig_pixels[0][2]);
733 EXPECT_EQ(4u, orig_pixels[0][3]);
734 EXPECT_EQ(5u, orig_pixels[1][0]);
735 EXPECT_EQ(6u, orig_pixels[1][1]);
736 EXPECT_EQ(7u, orig_pixels[1][2]);
737 EXPECT_EQ(8u, orig_pixels[1][3]);
738 EXPECT_EQ(9u, orig_pixels[2][0]);
739 EXPECT_EQ(10u, orig_pixels[2][1]);
740 EXPECT_EQ(11u, orig_pixels[2][2]);
741 EXPECT_EQ(12u, orig_pixels[2][3]);
742 EXPECT_EQ(1u, orig_pixels[3][0]);
743 EXPECT_EQ(6u, orig_pixels[3][1]);
744 EXPECT_EQ(11u, orig_pixels[3][2]);
745 EXPECT_EQ(16u, orig_pixels[3][3]);
746
747 for (int i = 0; i < 1280; ++i) {
748 orig_pixels[i][0] = i;
749 orig_pixels[i][1] = i / 2;
750 orig_pixels[i][2] = i / 3;
751 orig_pixels[i][3] = i;
752 }
753 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
754 ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
755 }
756 }
757
758 // Same as TestARGBColorTable except alpha does not change.
TEST_F(LibYUVPlanarTest,TestRGBColorTable)759 TEST_F(LibYUVPlanarTest, TestRGBColorTable) {
760 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
761 memset(orig_pixels, 0, sizeof(orig_pixels));
762
763 // Matrix for Sepia.
764 static const uint8_t kARGBTable[256 * 4] = {
765 1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u, 11u, 12u, 13u, 14u, 15u, 16u,
766 };
767
768 orig_pixels[0][0] = 0u;
769 orig_pixels[0][1] = 0u;
770 orig_pixels[0][2] = 0u;
771 orig_pixels[0][3] = 0u;
772 orig_pixels[1][0] = 1u;
773 orig_pixels[1][1] = 1u;
774 orig_pixels[1][2] = 1u;
775 orig_pixels[1][3] = 1u;
776 orig_pixels[2][0] = 2u;
777 orig_pixels[2][1] = 2u;
778 orig_pixels[2][2] = 2u;
779 orig_pixels[2][3] = 2u;
780 orig_pixels[3][0] = 0u;
781 orig_pixels[3][1] = 1u;
782 orig_pixels[3][2] = 2u;
783 orig_pixels[3][3] = 3u;
784 // Do 16 to test asm version.
785 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1);
786 EXPECT_EQ(1u, orig_pixels[0][0]);
787 EXPECT_EQ(2u, orig_pixels[0][1]);
788 EXPECT_EQ(3u, orig_pixels[0][2]);
789 EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged.
790 EXPECT_EQ(5u, orig_pixels[1][0]);
791 EXPECT_EQ(6u, orig_pixels[1][1]);
792 EXPECT_EQ(7u, orig_pixels[1][2]);
793 EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged.
794 EXPECT_EQ(9u, orig_pixels[2][0]);
795 EXPECT_EQ(10u, orig_pixels[2][1]);
796 EXPECT_EQ(11u, orig_pixels[2][2]);
797 EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged.
798 EXPECT_EQ(1u, orig_pixels[3][0]);
799 EXPECT_EQ(6u, orig_pixels[3][1]);
800 EXPECT_EQ(11u, orig_pixels[3][2]);
801 EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged.
802
803 for (int i = 0; i < 1280; ++i) {
804 orig_pixels[i][0] = i;
805 orig_pixels[i][1] = i / 2;
806 orig_pixels[i][2] = i / 3;
807 orig_pixels[i][3] = i;
808 }
809 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
810 RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 1280, 1);
811 }
812 }
813
TEST_F(LibYUVPlanarTest,TestARGBQuantize)814 TEST_F(LibYUVPlanarTest, TestARGBQuantize) {
815 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
816
817 for (int i = 0; i < 1280; ++i) {
818 orig_pixels[i][0] = i;
819 orig_pixels[i][1] = i / 2;
820 orig_pixels[i][2] = i / 3;
821 orig_pixels[i][3] = i;
822 }
823 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
824 1280, 1);
825
826 for (int i = 0; i < 1280; ++i) {
827 EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]);
828 EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]);
829 EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]);
830 EXPECT_EQ(i & 255, orig_pixels[i][3]);
831 }
832 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
833 ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0,
834 1280, 1);
835 }
836 }
837
TEST_F(LibYUVPlanarTest,ARGBMirror_Opt)838 TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) {
839 align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 4);
840 align_buffer_page_end(dst_pixels_opt,
841 benchmark_width_ * benchmark_height_ * 4);
842 align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 4);
843
844 MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 4);
845 MaskCpuFlags(disable_cpu_flags_);
846 ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_c,
847 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
848 MaskCpuFlags(benchmark_cpu_info_);
849
850 for (int i = 0; i < benchmark_iterations_; ++i) {
851 ARGBMirror(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
852 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
853 }
854 for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
855 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
856 }
857 free_aligned_buffer_page_end(src_pixels);
858 free_aligned_buffer_page_end(dst_pixels_opt);
859 free_aligned_buffer_page_end(dst_pixels_c);
860 }
861
TEST_F(LibYUVPlanarTest,MirrorPlane_Opt)862 TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) {
863 align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_);
864 align_buffer_page_end(dst_pixels_opt, benchmark_width_ * benchmark_height_);
865 align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_);
866
867 MemRandomize(src_pixels, benchmark_width_ * benchmark_height_);
868 MaskCpuFlags(disable_cpu_flags_);
869 MirrorPlane(src_pixels, benchmark_width_, dst_pixels_c, benchmark_width_,
870 benchmark_width_, benchmark_height_);
871 MaskCpuFlags(benchmark_cpu_info_);
872
873 for (int i = 0; i < benchmark_iterations_; ++i) {
874 MirrorPlane(src_pixels, benchmark_width_, dst_pixels_opt, benchmark_width_,
875 benchmark_width_, benchmark_height_);
876 }
877 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
878 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
879 }
880 free_aligned_buffer_page_end(src_pixels);
881 free_aligned_buffer_page_end(dst_pixels_opt);
882 free_aligned_buffer_page_end(dst_pixels_c);
883 }
884
TEST_F(LibYUVPlanarTest,MirrorUVPlane_Opt)885 TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) {
886 align_buffer_page_end(src_pixels, benchmark_width_ * benchmark_height_ * 2);
887 align_buffer_page_end(dst_pixels_opt,
888 benchmark_width_ * benchmark_height_ * 2);
889 align_buffer_page_end(dst_pixels_c, benchmark_width_ * benchmark_height_ * 2);
890
891 MemRandomize(src_pixels, benchmark_width_ * benchmark_height_ * 2);
892 MaskCpuFlags(disable_cpu_flags_);
893 MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
894 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
895 MaskCpuFlags(benchmark_cpu_info_);
896
897 for (int i = 0; i < benchmark_iterations_; ++i) {
898 MirrorUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
899 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
900 }
901 for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
902 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
903 }
904 free_aligned_buffer_page_end(src_pixels);
905 free_aligned_buffer_page_end(dst_pixels_opt);
906 free_aligned_buffer_page_end(dst_pixels_c);
907 }
908
TEST_F(LibYUVPlanarTest,TestShade)909 TEST_F(LibYUVPlanarTest, TestShade) {
910 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
911 SIMD_ALIGNED(uint8_t shade_pixels[1280][4]);
912 memset(orig_pixels, 0, sizeof(orig_pixels));
913
914 orig_pixels[0][0] = 10u;
915 orig_pixels[0][1] = 20u;
916 orig_pixels[0][2] = 40u;
917 orig_pixels[0][3] = 80u;
918 orig_pixels[1][0] = 0u;
919 orig_pixels[1][1] = 0u;
920 orig_pixels[1][2] = 0u;
921 orig_pixels[1][3] = 255u;
922 orig_pixels[2][0] = 0u;
923 orig_pixels[2][1] = 0u;
924 orig_pixels[2][2] = 0u;
925 orig_pixels[2][3] = 0u;
926 orig_pixels[3][0] = 0u;
927 orig_pixels[3][1] = 0u;
928 orig_pixels[3][2] = 0u;
929 orig_pixels[3][3] = 0u;
930 // Do 8 pixels to allow opt version to be used.
931 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff);
932 EXPECT_EQ(10u, shade_pixels[0][0]);
933 EXPECT_EQ(20u, shade_pixels[0][1]);
934 EXPECT_EQ(40u, shade_pixels[0][2]);
935 EXPECT_EQ(40u, shade_pixels[0][3]);
936 EXPECT_EQ(0u, shade_pixels[1][0]);
937 EXPECT_EQ(0u, shade_pixels[1][1]);
938 EXPECT_EQ(0u, shade_pixels[1][2]);
939 EXPECT_EQ(128u, shade_pixels[1][3]);
940 EXPECT_EQ(0u, shade_pixels[2][0]);
941 EXPECT_EQ(0u, shade_pixels[2][1]);
942 EXPECT_EQ(0u, shade_pixels[2][2]);
943 EXPECT_EQ(0u, shade_pixels[2][3]);
944 EXPECT_EQ(0u, shade_pixels[3][0]);
945 EXPECT_EQ(0u, shade_pixels[3][1]);
946 EXPECT_EQ(0u, shade_pixels[3][2]);
947 EXPECT_EQ(0u, shade_pixels[3][3]);
948
949 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080);
950 EXPECT_EQ(5u, shade_pixels[0][0]);
951 EXPECT_EQ(10u, shade_pixels[0][1]);
952 EXPECT_EQ(20u, shade_pixels[0][2]);
953 EXPECT_EQ(40u, shade_pixels[0][3]);
954
955 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080);
956 EXPECT_EQ(5u, shade_pixels[0][0]);
957 EXPECT_EQ(5u, shade_pixels[0][1]);
958 EXPECT_EQ(5u, shade_pixels[0][2]);
959 EXPECT_EQ(5u, shade_pixels[0][3]);
960
961 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
962 ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1,
963 0x80808080);
964 }
965 }
966
TEST_F(LibYUVPlanarTest,TestARGBInterpolate)967 TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
968 SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
969 SIMD_ALIGNED(uint8_t orig_pixels_1[1280][4]);
970 SIMD_ALIGNED(uint8_t interpolate_pixels[1280][4]);
971 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
972 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
973
974 orig_pixels_0[0][0] = 16u;
975 orig_pixels_0[0][1] = 32u;
976 orig_pixels_0[0][2] = 64u;
977 orig_pixels_0[0][3] = 128u;
978 orig_pixels_0[1][0] = 0u;
979 orig_pixels_0[1][1] = 0u;
980 orig_pixels_0[1][2] = 0u;
981 orig_pixels_0[1][3] = 255u;
982 orig_pixels_0[2][0] = 0u;
983 orig_pixels_0[2][1] = 0u;
984 orig_pixels_0[2][2] = 0u;
985 orig_pixels_0[2][3] = 0u;
986 orig_pixels_0[3][0] = 0u;
987 orig_pixels_0[3][1] = 0u;
988 orig_pixels_0[3][2] = 0u;
989 orig_pixels_0[3][3] = 0u;
990
991 orig_pixels_1[0][0] = 0u;
992 orig_pixels_1[0][1] = 0u;
993 orig_pixels_1[0][2] = 0u;
994 orig_pixels_1[0][3] = 0u;
995 orig_pixels_1[1][0] = 0u;
996 orig_pixels_1[1][1] = 0u;
997 orig_pixels_1[1][2] = 0u;
998 orig_pixels_1[1][3] = 0u;
999 orig_pixels_1[2][0] = 0u;
1000 orig_pixels_1[2][1] = 0u;
1001 orig_pixels_1[2][2] = 0u;
1002 orig_pixels_1[2][3] = 0u;
1003 orig_pixels_1[3][0] = 255u;
1004 orig_pixels_1[3][1] = 255u;
1005 orig_pixels_1[3][2] = 255u;
1006 orig_pixels_1[3][3] = 255u;
1007
1008 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1009 &interpolate_pixels[0][0], 0, 4, 1, 128);
1010 EXPECT_EQ(8u, interpolate_pixels[0][0]);
1011 EXPECT_EQ(16u, interpolate_pixels[0][1]);
1012 EXPECT_EQ(32u, interpolate_pixels[0][2]);
1013 EXPECT_EQ(64u, interpolate_pixels[0][3]);
1014 EXPECT_EQ(0u, interpolate_pixels[1][0]);
1015 EXPECT_EQ(0u, interpolate_pixels[1][1]);
1016 EXPECT_EQ(0u, interpolate_pixels[1][2]);
1017 EXPECT_EQ(128u, interpolate_pixels[1][3]);
1018 EXPECT_EQ(0u, interpolate_pixels[2][0]);
1019 EXPECT_EQ(0u, interpolate_pixels[2][1]);
1020 EXPECT_EQ(0u, interpolate_pixels[2][2]);
1021 EXPECT_EQ(0u, interpolate_pixels[2][3]);
1022 EXPECT_EQ(128u, interpolate_pixels[3][0]);
1023 EXPECT_EQ(128u, interpolate_pixels[3][1]);
1024 EXPECT_EQ(128u, interpolate_pixels[3][2]);
1025 EXPECT_EQ(128u, interpolate_pixels[3][3]);
1026
1027 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1028 &interpolate_pixels[0][0], 0, 4, 1, 0);
1029 EXPECT_EQ(16u, interpolate_pixels[0][0]);
1030 EXPECT_EQ(32u, interpolate_pixels[0][1]);
1031 EXPECT_EQ(64u, interpolate_pixels[0][2]);
1032 EXPECT_EQ(128u, interpolate_pixels[0][3]);
1033
1034 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1035 &interpolate_pixels[0][0], 0, 4, 1, 192);
1036
1037 EXPECT_EQ(4u, interpolate_pixels[0][0]);
1038 EXPECT_EQ(8u, interpolate_pixels[0][1]);
1039 EXPECT_EQ(16u, interpolate_pixels[0][2]);
1040 EXPECT_EQ(32u, interpolate_pixels[0][3]);
1041
1042 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1043 ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0,
1044 &interpolate_pixels[0][0], 0, 1280, 1, 128);
1045 }
1046 }
1047
TEST_F(LibYUVPlanarTest,TestInterpolatePlane)1048 TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
1049 SIMD_ALIGNED(uint8_t orig_pixels_0[1280]);
1050 SIMD_ALIGNED(uint8_t orig_pixels_1[1280]);
1051 SIMD_ALIGNED(uint8_t interpolate_pixels[1280]);
1052 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1053 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1054
1055 orig_pixels_0[0] = 16u;
1056 orig_pixels_0[1] = 32u;
1057 orig_pixels_0[2] = 64u;
1058 orig_pixels_0[3] = 128u;
1059 orig_pixels_0[4] = 0u;
1060 orig_pixels_0[5] = 0u;
1061 orig_pixels_0[6] = 0u;
1062 orig_pixels_0[7] = 255u;
1063 orig_pixels_0[8] = 0u;
1064 orig_pixels_0[9] = 0u;
1065 orig_pixels_0[10] = 0u;
1066 orig_pixels_0[11] = 0u;
1067 orig_pixels_0[12] = 0u;
1068 orig_pixels_0[13] = 0u;
1069 orig_pixels_0[14] = 0u;
1070 orig_pixels_0[15] = 0u;
1071
1072 orig_pixels_1[0] = 0u;
1073 orig_pixels_1[1] = 0u;
1074 orig_pixels_1[2] = 0u;
1075 orig_pixels_1[3] = 0u;
1076 orig_pixels_1[4] = 0u;
1077 orig_pixels_1[5] = 0u;
1078 orig_pixels_1[6] = 0u;
1079 orig_pixels_1[7] = 0u;
1080 orig_pixels_1[8] = 0u;
1081 orig_pixels_1[9] = 0u;
1082 orig_pixels_1[10] = 0u;
1083 orig_pixels_1[11] = 0u;
1084 orig_pixels_1[12] = 255u;
1085 orig_pixels_1[13] = 255u;
1086 orig_pixels_1[14] = 255u;
1087 orig_pixels_1[15] = 255u;
1088
1089 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1090 &interpolate_pixels[0], 0, 16, 1, 128);
1091 EXPECT_EQ(8u, interpolate_pixels[0]);
1092 EXPECT_EQ(16u, interpolate_pixels[1]);
1093 EXPECT_EQ(32u, interpolate_pixels[2]);
1094 EXPECT_EQ(64u, interpolate_pixels[3]);
1095 EXPECT_EQ(0u, interpolate_pixels[4]);
1096 EXPECT_EQ(0u, interpolate_pixels[5]);
1097 EXPECT_EQ(0u, interpolate_pixels[6]);
1098 EXPECT_EQ(128u, interpolate_pixels[7]);
1099 EXPECT_EQ(0u, interpolate_pixels[8]);
1100 EXPECT_EQ(0u, interpolate_pixels[9]);
1101 EXPECT_EQ(0u, interpolate_pixels[10]);
1102 EXPECT_EQ(0u, interpolate_pixels[11]);
1103 EXPECT_EQ(128u, interpolate_pixels[12]);
1104 EXPECT_EQ(128u, interpolate_pixels[13]);
1105 EXPECT_EQ(128u, interpolate_pixels[14]);
1106 EXPECT_EQ(128u, interpolate_pixels[15]);
1107
1108 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1109 &interpolate_pixels[0], 0, 16, 1, 0);
1110 EXPECT_EQ(16u, interpolate_pixels[0]);
1111 EXPECT_EQ(32u, interpolate_pixels[1]);
1112 EXPECT_EQ(64u, interpolate_pixels[2]);
1113 EXPECT_EQ(128u, interpolate_pixels[3]);
1114
1115 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1116 &interpolate_pixels[0], 0, 16, 1, 192);
1117
1118 EXPECT_EQ(4u, interpolate_pixels[0]);
1119 EXPECT_EQ(8u, interpolate_pixels[1]);
1120 EXPECT_EQ(16u, interpolate_pixels[2]);
1121 EXPECT_EQ(32u, interpolate_pixels[3]);
1122
1123 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1124 InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1125 &interpolate_pixels[0], 0, 1280, 1, 123);
1126 }
1127 }
1128
TEST_F(LibYUVPlanarTest,TestInterpolatePlane_16)1129 TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) {
1130 SIMD_ALIGNED(uint16_t orig_pixels_0[1280]);
1131 SIMD_ALIGNED(uint16_t orig_pixels_1[1280]);
1132 SIMD_ALIGNED(uint16_t interpolate_pixels[1280]);
1133 memset(orig_pixels_0, 0, sizeof(orig_pixels_0));
1134 memset(orig_pixels_1, 0, sizeof(orig_pixels_1));
1135
1136 orig_pixels_0[0] = 16u;
1137 orig_pixels_0[1] = 32u;
1138 orig_pixels_0[2] = 64u;
1139 orig_pixels_0[3] = 128u;
1140 orig_pixels_0[4] = 0u;
1141 orig_pixels_0[5] = 0u;
1142 orig_pixels_0[6] = 0u;
1143 orig_pixels_0[7] = 255u;
1144 orig_pixels_0[8] = 0u;
1145 orig_pixels_0[9] = 0u;
1146 orig_pixels_0[10] = 0u;
1147 orig_pixels_0[11] = 0u;
1148 orig_pixels_0[12] = 0u;
1149 orig_pixels_0[13] = 0u;
1150 orig_pixels_0[14] = 0u;
1151 orig_pixels_0[15] = 0u;
1152
1153 orig_pixels_1[0] = 0u;
1154 orig_pixels_1[1] = 0u;
1155 orig_pixels_1[2] = 0u;
1156 orig_pixels_1[3] = 0u;
1157 orig_pixels_1[4] = 0u;
1158 orig_pixels_1[5] = 0u;
1159 orig_pixels_1[6] = 0u;
1160 orig_pixels_1[7] = 0u;
1161 orig_pixels_1[8] = 0u;
1162 orig_pixels_1[9] = 0u;
1163 orig_pixels_1[10] = 0u;
1164 orig_pixels_1[11] = 0u;
1165 orig_pixels_1[12] = 255u;
1166 orig_pixels_1[13] = 255u;
1167 orig_pixels_1[14] = 255u;
1168 orig_pixels_1[15] = 255u;
1169
1170 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1171 &interpolate_pixels[0], 0, 16, 1, 128);
1172 EXPECT_EQ(8u, interpolate_pixels[0]);
1173 EXPECT_EQ(16u, interpolate_pixels[1]);
1174 EXPECT_EQ(32u, interpolate_pixels[2]);
1175 EXPECT_EQ(64u, interpolate_pixels[3]);
1176 EXPECT_EQ(0u, interpolate_pixels[4]);
1177 EXPECT_EQ(0u, interpolate_pixels[5]);
1178 EXPECT_EQ(0u, interpolate_pixels[6]);
1179 EXPECT_EQ(128u, interpolate_pixels[7]);
1180 EXPECT_EQ(0u, interpolate_pixels[8]);
1181 EXPECT_EQ(0u, interpolate_pixels[9]);
1182 EXPECT_EQ(0u, interpolate_pixels[10]);
1183 EXPECT_EQ(0u, interpolate_pixels[11]);
1184 EXPECT_EQ(128u, interpolate_pixels[12]);
1185 EXPECT_EQ(128u, interpolate_pixels[13]);
1186 EXPECT_EQ(128u, interpolate_pixels[14]);
1187 EXPECT_EQ(128u, interpolate_pixels[15]);
1188
1189 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1190 &interpolate_pixels[0], 0, 16, 1, 0);
1191 EXPECT_EQ(16u, interpolate_pixels[0]);
1192 EXPECT_EQ(32u, interpolate_pixels[1]);
1193 EXPECT_EQ(64u, interpolate_pixels[2]);
1194 EXPECT_EQ(128u, interpolate_pixels[3]);
1195
1196 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1197 &interpolate_pixels[0], 0, 16, 1, 192);
1198
1199 EXPECT_EQ(4u, interpolate_pixels[0]);
1200 EXPECT_EQ(8u, interpolate_pixels[1]);
1201 EXPECT_EQ(16u, interpolate_pixels[2]);
1202 EXPECT_EQ(32u, interpolate_pixels[3]);
1203
1204 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1205 InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0,
1206 &interpolate_pixels[0], 0, 1280, 1, 123);
1207 }
1208 }
1209
1210 #define TESTTERP(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, STRIDE_B, W1280, TERP, \
1211 N, NEG, OFF) \
1212 TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
1213 const int kWidth = W1280; \
1214 const int kHeight = benchmark_height_; \
1215 const int kStrideA = \
1216 (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
1217 const int kStrideB = \
1218 (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
1219 align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
1220 align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \
1221 align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
1222 align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
1223 for (int i = 0; i < kStrideA * kHeight; ++i) { \
1224 src_argb_a[i + OFF] = (fastrand() & 0xff); \
1225 src_argb_b[i + OFF] = (fastrand() & 0xff); \
1226 } \
1227 MaskCpuFlags(disable_cpu_flags_); \
1228 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1229 dst_argb_c, kStrideB, kWidth, NEG kHeight, TERP); \
1230 MaskCpuFlags(benchmark_cpu_info_); \
1231 for (int i = 0; i < benchmark_iterations_; ++i) { \
1232 ARGBInterpolate(src_argb_a + OFF, kStrideA, src_argb_b + OFF, kStrideA, \
1233 dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \
1234 } \
1235 for (int i = 0; i < kStrideB * kHeight; ++i) { \
1236 EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
1237 } \
1238 free_aligned_buffer_page_end(src_argb_a); \
1239 free_aligned_buffer_page_end(src_argb_b); \
1240 free_aligned_buffer_page_end(dst_argb_c); \
1241 free_aligned_buffer_page_end(dst_argb_opt); \
1242 }
1243
1244 #define TESTINTERPOLATE(TERP) \
1245 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ + 1, TERP, _Any, +, 0) \
1246 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
1247 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
1248 TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
1249
1250 TESTINTERPOLATE(0)
1251 TESTINTERPOLATE(64)
1252 TESTINTERPOLATE(128)
1253 TESTINTERPOLATE(192)
1254 TESTINTERPOLATE(255)
1255
TestBlend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int attenuate)1256 static int TestBlend(int width,
1257 int height,
1258 int benchmark_iterations,
1259 int disable_cpu_flags,
1260 int benchmark_cpu_info,
1261 int invert,
1262 int off,
1263 int attenuate) {
1264 if (width < 1) {
1265 width = 1;
1266 }
1267 const int kBpp = 4;
1268 const int kStride = width * kBpp;
1269 align_buffer_page_end(src_argb_a, kStride * height + off);
1270 align_buffer_page_end(src_argb_b, kStride * height + off);
1271 align_buffer_page_end(dst_argb_c, kStride * height);
1272 align_buffer_page_end(dst_argb_opt, kStride * height);
1273 for (int i = 0; i < kStride * height; ++i) {
1274 src_argb_a[i + off] = (fastrand() & 0xff);
1275 src_argb_b[i + off] = (fastrand() & 0xff);
1276 }
1277 MemRandomize(src_argb_a, kStride * height + off);
1278 MemRandomize(src_argb_b, kStride * height + off);
1279 if (attenuate) {
1280 ARGBAttenuate(src_argb_a + off, kStride, src_argb_a + off, kStride, width,
1281 height);
1282 }
1283 memset(dst_argb_c, 255, kStride * height);
1284 memset(dst_argb_opt, 255, kStride * height);
1285
1286 MaskCpuFlags(disable_cpu_flags);
1287 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1288 kStride, width, invert * height);
1289 MaskCpuFlags(benchmark_cpu_info);
1290 for (int i = 0; i < benchmark_iterations; ++i) {
1291 ARGBBlend(src_argb_a + off, kStride, src_argb_b + off, kStride,
1292 dst_argb_opt, kStride, width, invert * height);
1293 }
1294 int max_diff = 0;
1295 for (int i = 0; i < kStride * height; ++i) {
1296 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1297 static_cast<int>(dst_argb_opt[i]));
1298 if (abs_diff > max_diff) {
1299 max_diff = abs_diff;
1300 }
1301 }
1302 free_aligned_buffer_page_end(src_argb_a);
1303 free_aligned_buffer_page_end(src_argb_b);
1304 free_aligned_buffer_page_end(dst_argb_c);
1305 free_aligned_buffer_page_end(dst_argb_opt);
1306 return max_diff;
1307 }
1308
TEST_F(LibYUVPlanarTest,ARGBBlend_Any)1309 TEST_F(LibYUVPlanarTest, ARGBBlend_Any) {
1310 int max_diff =
1311 TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1312 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1313 EXPECT_LE(max_diff, 1);
1314 }
1315
TEST_F(LibYUVPlanarTest,ARGBBlend_Unaligned)1316 TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) {
1317 int max_diff =
1318 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1319 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
1320 EXPECT_LE(max_diff, 1);
1321 }
1322
TEST_F(LibYUVPlanarTest,ARGBBlend_Invert)1323 TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) {
1324 int max_diff =
1325 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1326 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
1327 EXPECT_LE(max_diff, 1);
1328 }
1329
TEST_F(LibYUVPlanarTest,ARGBBlend_Unattenuated)1330 TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) {
1331 int max_diff =
1332 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1333 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0);
1334 EXPECT_LE(max_diff, 1);
1335 }
1336
TEST_F(LibYUVPlanarTest,ARGBBlend_Opt)1337 TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
1338 int max_diff =
1339 TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1340 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
1341 EXPECT_LE(max_diff, 1);
1342 }
1343
TestBlendPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1344 static void TestBlendPlane(int width,
1345 int height,
1346 int benchmark_iterations,
1347 int disable_cpu_flags,
1348 int benchmark_cpu_info,
1349 int invert,
1350 int off) {
1351 if (width < 1) {
1352 width = 1;
1353 }
1354 const int kBpp = 1;
1355 const int kStride = width * kBpp;
1356 align_buffer_page_end(src_argb_a, kStride * height + off);
1357 align_buffer_page_end(src_argb_b, kStride * height + off);
1358 align_buffer_page_end(src_argb_alpha, kStride * height + off);
1359 align_buffer_page_end(dst_argb_c, kStride * height + off);
1360 align_buffer_page_end(dst_argb_opt, kStride * height + off);
1361 memset(dst_argb_c, 255, kStride * height + off);
1362 memset(dst_argb_opt, 255, kStride * height + off);
1363
1364 // Test source is maintained exactly if alpha is 255.
1365 for (int i = 0; i < width; ++i) {
1366 src_argb_a[i + off] = i & 255;
1367 src_argb_b[i + off] = 255 - (i & 255);
1368 }
1369 memset(src_argb_alpha + off, 255, width);
1370 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1371 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1372 for (int i = 0; i < width; ++i) {
1373 EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
1374 }
1375 // Test destination is maintained exactly if alpha is 0.
1376 memset(src_argb_alpha + off, 0, width);
1377 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1378 src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1);
1379 for (int i = 0; i < width; ++i) {
1380 EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
1381 }
1382 for (int i = 0; i < kStride * height; ++i) {
1383 src_argb_a[i + off] = (fastrand() & 0xff);
1384 src_argb_b[i + off] = (fastrand() & 0xff);
1385 src_argb_alpha[i + off] = (fastrand() & 0xff);
1386 }
1387
1388 MaskCpuFlags(disable_cpu_flags);
1389 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1390 src_argb_alpha + off, width, dst_argb_c + off, width, width,
1391 invert * height);
1392 MaskCpuFlags(benchmark_cpu_info);
1393 for (int i = 0; i < benchmark_iterations; ++i) {
1394 BlendPlane(src_argb_a + off, width, src_argb_b + off, width,
1395 src_argb_alpha + off, width, dst_argb_opt + off, width, width,
1396 invert * height);
1397 }
1398 for (int i = 0; i < kStride * height; ++i) {
1399 EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
1400 }
1401 free_aligned_buffer_page_end(src_argb_a);
1402 free_aligned_buffer_page_end(src_argb_b);
1403 free_aligned_buffer_page_end(src_argb_alpha);
1404 free_aligned_buffer_page_end(dst_argb_c);
1405 free_aligned_buffer_page_end(dst_argb_opt);
1406 }
1407
TEST_F(LibYUVPlanarTest,BlendPlane_Opt)1408 TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
1409 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1410 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1411 }
TEST_F(LibYUVPlanarTest,BlendPlane_Unaligned)1412 TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
1413 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1414 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1415 }
TEST_F(LibYUVPlanarTest,BlendPlane_Any)1416 TEST_F(LibYUVPlanarTest, BlendPlane_Any) {
1417 TestBlendPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1418 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1419 }
TEST_F(LibYUVPlanarTest,BlendPlane_Invert)1420 TEST_F(LibYUVPlanarTest, BlendPlane_Invert) {
1421 TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
1422 disable_cpu_flags_, benchmark_cpu_info_, -1, 1);
1423 }
1424
1425 #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
1426
TestI420Blend(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1427 static void TestI420Blend(int width,
1428 int height,
1429 int benchmark_iterations,
1430 int disable_cpu_flags,
1431 int benchmark_cpu_info,
1432 int invert,
1433 int off) {
1434 width = ((width) > 0) ? (width) : 1;
1435 const int kStrideUV = SUBSAMPLE(width, 2);
1436 const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
1437 align_buffer_page_end(src_y0, width * height + off);
1438 align_buffer_page_end(src_u0, kSizeUV + off);
1439 align_buffer_page_end(src_v0, kSizeUV + off);
1440 align_buffer_page_end(src_y1, width * height + off);
1441 align_buffer_page_end(src_u1, kSizeUV + off);
1442 align_buffer_page_end(src_v1, kSizeUV + off);
1443 align_buffer_page_end(src_a, width * height + off);
1444 align_buffer_page_end(dst_y_c, width * height + off);
1445 align_buffer_page_end(dst_u_c, kSizeUV + off);
1446 align_buffer_page_end(dst_v_c, kSizeUV + off);
1447 align_buffer_page_end(dst_y_opt, width * height + off);
1448 align_buffer_page_end(dst_u_opt, kSizeUV + off);
1449 align_buffer_page_end(dst_v_opt, kSizeUV + off);
1450
1451 MemRandomize(src_y0, width * height + off);
1452 MemRandomize(src_u0, kSizeUV + off);
1453 MemRandomize(src_v0, kSizeUV + off);
1454 MemRandomize(src_y1, width * height + off);
1455 MemRandomize(src_u1, kSizeUV + off);
1456 MemRandomize(src_v1, kSizeUV + off);
1457 MemRandomize(src_a, width * height + off);
1458 memset(dst_y_c, 255, width * height + off);
1459 memset(dst_u_c, 255, kSizeUV + off);
1460 memset(dst_v_c, 255, kSizeUV + off);
1461 memset(dst_y_opt, 255, width * height + off);
1462 memset(dst_u_opt, 255, kSizeUV + off);
1463 memset(dst_v_opt, 255, kSizeUV + off);
1464
1465 MaskCpuFlags(disable_cpu_flags);
1466 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1467 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1468 src_v1 + off, kStrideUV, src_a + off, width, dst_y_c + off, width,
1469 dst_u_c + off, kStrideUV, dst_v_c + off, kStrideUV, width,
1470 invert * height);
1471 MaskCpuFlags(benchmark_cpu_info);
1472 for (int i = 0; i < benchmark_iterations; ++i) {
1473 I420Blend(src_y0 + off, width, src_u0 + off, kStrideUV, src_v0 + off,
1474 kStrideUV, src_y1 + off, width, src_u1 + off, kStrideUV,
1475 src_v1 + off, kStrideUV, src_a + off, width, dst_y_opt + off,
1476 width, dst_u_opt + off, kStrideUV, dst_v_opt + off, kStrideUV,
1477 width, invert * height);
1478 }
1479 for (int i = 0; i < width * height; ++i) {
1480 EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
1481 }
1482 for (int i = 0; i < kSizeUV; ++i) {
1483 EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]);
1484 EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]);
1485 }
1486 free_aligned_buffer_page_end(src_y0);
1487 free_aligned_buffer_page_end(src_u0);
1488 free_aligned_buffer_page_end(src_v0);
1489 free_aligned_buffer_page_end(src_y1);
1490 free_aligned_buffer_page_end(src_u1);
1491 free_aligned_buffer_page_end(src_v1);
1492 free_aligned_buffer_page_end(src_a);
1493 free_aligned_buffer_page_end(dst_y_c);
1494 free_aligned_buffer_page_end(dst_u_c);
1495 free_aligned_buffer_page_end(dst_v_c);
1496 free_aligned_buffer_page_end(dst_y_opt);
1497 free_aligned_buffer_page_end(dst_u_opt);
1498 free_aligned_buffer_page_end(dst_v_opt);
1499 }
1500
TEST_F(LibYUVPlanarTest,I420Blend_Opt)1501 TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
1502 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1503 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1504 }
TEST_F(LibYUVPlanarTest,I420Blend_Unaligned)1505 TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
1506 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1507 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1508 }
1509
1510 // TODO(fbarchard): DISABLED because _Any uses C. Avoid C and re-enable.
TEST_F(LibYUVPlanarTest,DISABLED_I420Blend_Any)1511 TEST_F(LibYUVPlanarTest, DISABLED_I420Blend_Any) {
1512 TestI420Blend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1513 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1514 }
TEST_F(LibYUVPlanarTest,I420Blend_Invert)1515 TEST_F(LibYUVPlanarTest, I420Blend_Invert) {
1516 TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
1517 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1518 }
1519
TEST_F(LibYUVPlanarTest,TestAffine)1520 TEST_F(LibYUVPlanarTest, TestAffine) {
1521 SIMD_ALIGNED(uint8_t orig_pixels_0[1280][4]);
1522 SIMD_ALIGNED(uint8_t interpolate_pixels_C[1280][4]);
1523
1524 for (int i = 0; i < 1280; ++i) {
1525 for (int j = 0; j < 4; ++j) {
1526 orig_pixels_0[i][j] = i;
1527 }
1528 }
1529
1530 float uv_step[4] = {0.f, 0.f, 0.75f, 0.f};
1531
1532 ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step,
1533 1280);
1534 EXPECT_EQ(0u, interpolate_pixels_C[0][0]);
1535 EXPECT_EQ(96u, interpolate_pixels_C[128][0]);
1536 EXPECT_EQ(191u, interpolate_pixels_C[255][3]);
1537
1538 #if defined(HAS_ARGBAFFINEROW_SSE2)
1539 SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]);
1540 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1541 uv_step, 1280);
1542 EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4));
1543
1544 int has_sse2 = TestCpuFlag(kCpuHasSSE2);
1545 if (has_sse2) {
1546 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
1547 ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0],
1548 uv_step, 1280);
1549 }
1550 }
1551 #endif
1552 }
1553
TEST_F(LibYUVPlanarTest,TestCopyPlane)1554 TEST_F(LibYUVPlanarTest, TestCopyPlane) {
1555 int err = 0;
1556 int yw = benchmark_width_;
1557 int yh = benchmark_height_;
1558 int b = 12;
1559 int i, j;
1560
1561 int y_plane_size = (yw + b * 2) * (yh + b * 2);
1562 align_buffer_page_end(orig_y, y_plane_size);
1563 align_buffer_page_end(dst_c, y_plane_size);
1564 align_buffer_page_end(dst_opt, y_plane_size);
1565
1566 memset(orig_y, 0, y_plane_size);
1567 memset(dst_c, 0, y_plane_size);
1568 memset(dst_opt, 0, y_plane_size);
1569
1570 // Fill image buffers with random data.
1571 for (i = b; i < (yh + b); ++i) {
1572 for (j = b; j < (yw + b); ++j) {
1573 orig_y[i * (yw + b * 2) + j] = fastrand() & 0xff;
1574 }
1575 }
1576
1577 // Fill destination buffers with random data.
1578 for (i = 0; i < y_plane_size; ++i) {
1579 uint8_t random_number = fastrand() & 0x7f;
1580 dst_c[i] = random_number;
1581 dst_opt[i] = dst_c[i];
1582 }
1583
1584 int y_off = b * (yw + b * 2) + b;
1585
1586 int y_st = yw + b * 2;
1587 int stride = 8;
1588
1589 // Disable all optimizations.
1590 MaskCpuFlags(disable_cpu_flags_);
1591 for (j = 0; j < benchmark_iterations_; j++) {
1592 CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
1593 }
1594
1595 // Enable optimizations.
1596 MaskCpuFlags(benchmark_cpu_info_);
1597 for (j = 0; j < benchmark_iterations_; j++) {
1598 CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
1599 }
1600
1601 for (i = 0; i < y_plane_size; ++i) {
1602 if (dst_c[i] != dst_opt[i]) {
1603 ++err;
1604 }
1605 }
1606
1607 free_aligned_buffer_page_end(orig_y);
1608 free_aligned_buffer_page_end(dst_c);
1609 free_aligned_buffer_page_end(dst_opt);
1610
1611 EXPECT_EQ(0, err);
1612 }
1613
TEST_F(LibYUVPlanarTest,CopyPlane_Opt)1614 TEST_F(LibYUVPlanarTest, CopyPlane_Opt) {
1615 int i;
1616 int y_plane_size = benchmark_width_ * benchmark_height_;
1617 align_buffer_page_end(orig_y, y_plane_size);
1618 align_buffer_page_end(dst_c, y_plane_size);
1619 align_buffer_page_end(dst_opt, y_plane_size);
1620
1621 MemRandomize(orig_y, y_plane_size);
1622 memset(dst_c, 1, y_plane_size);
1623 memset(dst_opt, 2, y_plane_size);
1624
1625 // Disable all optimizations.
1626 MaskCpuFlags(disable_cpu_flags_);
1627 for (i = 0; i < benchmark_iterations_; i++) {
1628 CopyPlane(orig_y, benchmark_width_, dst_c, benchmark_width_,
1629 benchmark_width_, benchmark_height_);
1630 }
1631
1632 // Enable optimizations.
1633 MaskCpuFlags(benchmark_cpu_info_);
1634 for (i = 0; i < benchmark_iterations_; i++) {
1635 CopyPlane(orig_y, benchmark_width_, dst_opt, benchmark_width_,
1636 benchmark_width_, benchmark_height_);
1637 }
1638
1639 for (i = 0; i < y_plane_size; ++i) {
1640 EXPECT_EQ(dst_c[i], dst_opt[i]);
1641 }
1642
1643 free_aligned_buffer_page_end(orig_y);
1644 free_aligned_buffer_page_end(dst_c);
1645 free_aligned_buffer_page_end(dst_opt);
1646 }
1647
TEST_F(LibYUVPlanarTest,TestCopyPlaneZero)1648 TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) {
1649 // Test to verify copying a rect with a zero height or width does
1650 // not touch destination memory.
1651 uint8_t src = 42;
1652 uint8_t dst = 0;
1653
1654 // Disable all optimizations.
1655 MaskCpuFlags(disable_cpu_flags_);
1656 CopyPlane(&src, 0, &dst, 0, 0, 0);
1657 EXPECT_EQ(src, 42);
1658 EXPECT_EQ(dst, 0);
1659
1660 CopyPlane(&src, 1, &dst, 1, 1, 0);
1661 EXPECT_EQ(src, 42);
1662 EXPECT_EQ(dst, 0);
1663
1664 CopyPlane(&src, 1, &dst, 1, 0, 1);
1665 EXPECT_EQ(src, 42);
1666 EXPECT_EQ(dst, 0);
1667
1668 // Enable optimizations.
1669 MaskCpuFlags(benchmark_cpu_info_);
1670 CopyPlane(&src, 0, &dst, 0, 0, 0);
1671 EXPECT_EQ(src, 42);
1672 EXPECT_EQ(dst, 0);
1673
1674 CopyPlane(&src, 1, &dst, 1, 1, 0);
1675 EXPECT_EQ(src, 42);
1676 EXPECT_EQ(dst, 0);
1677
1678 CopyPlane(&src, 1, &dst, 1, 0, 1);
1679 EXPECT_EQ(src, 42);
1680 EXPECT_EQ(dst, 0);
1681 }
1682
TEST_F(LibYUVPlanarTest,TestDetilePlane)1683 TEST_F(LibYUVPlanarTest, TestDetilePlane) {
1684 int i, j;
1685
1686 // orig is tiled. Allocate enough memory for tiles.
1687 int tile_width = (benchmark_width_ + 15) & ~15;
1688 int tile_height = (benchmark_height_ + 15) & ~15;
1689 int tile_plane_size = tile_width * tile_height;
1690 int y_plane_size = benchmark_width_ * benchmark_height_;
1691 align_buffer_page_end(tile_y, tile_plane_size);
1692 align_buffer_page_end(dst_c, y_plane_size);
1693 align_buffer_page_end(dst_opt, y_plane_size);
1694
1695 MemRandomize(tile_y, tile_plane_size);
1696 memset(dst_c, 0, y_plane_size);
1697 memset(dst_opt, 0, y_plane_size);
1698
1699 // Disable all optimizations.
1700 MaskCpuFlags(disable_cpu_flags_);
1701 for (j = 0; j < benchmark_iterations_; j++) {
1702 DetilePlane(tile_y, tile_width, dst_c, benchmark_width_, benchmark_width_,
1703 benchmark_height_, 16);
1704 }
1705
1706 // Enable optimizations.
1707 MaskCpuFlags(benchmark_cpu_info_);
1708 for (j = 0; j < benchmark_iterations_; j++) {
1709 DetilePlane(tile_y, tile_width, dst_opt, benchmark_width_, benchmark_width_,
1710 benchmark_height_, 16);
1711 }
1712
1713 for (i = 0; i < y_plane_size; ++i) {
1714 EXPECT_EQ(dst_c[i], dst_opt[i]);
1715 }
1716
1717 free_aligned_buffer_page_end(tile_y);
1718 free_aligned_buffer_page_end(dst_c);
1719 free_aligned_buffer_page_end(dst_opt);
1720 }
1721
TEST_F(LibYUVPlanarTest,TestDetilePlane_16)1722 TEST_F(LibYUVPlanarTest, TestDetilePlane_16) {
1723 int i, j;
1724
1725 // orig is tiled. Allocate enough memory for tiles.
1726 int tile_width = (benchmark_width_ + 15) & ~15;
1727 int tile_height = (benchmark_height_ + 15) & ~15;
1728 int tile_plane_size = tile_width * tile_height * 2;
1729 int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
1730 align_buffer_page_end(tile_y, tile_plane_size);
1731 align_buffer_page_end(dst_c, y_plane_size);
1732 align_buffer_page_end(dst_opt, y_plane_size);
1733
1734 MemRandomize(tile_y, tile_plane_size);
1735 memset(dst_c, 0, y_plane_size);
1736 memset(dst_opt, 0, y_plane_size);
1737
1738 // Disable all optimizations.
1739 MaskCpuFlags(disable_cpu_flags_);
1740 for (j = 0; j < benchmark_iterations_; j++) {
1741 DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_c,
1742 benchmark_width_, benchmark_width_, benchmark_height_, 16);
1743 }
1744
1745 // Enable optimizations.
1746 MaskCpuFlags(benchmark_cpu_info_);
1747 for (j = 0; j < benchmark_iterations_; j++) {
1748 DetilePlane_16((const uint16_t*)tile_y, tile_width, (uint16_t*)dst_opt,
1749 benchmark_width_, benchmark_width_, benchmark_height_, 16);
1750 }
1751
1752 for (i = 0; i < y_plane_size; ++i) {
1753 EXPECT_EQ(dst_c[i], dst_opt[i]);
1754 }
1755
1756 free_aligned_buffer_page_end(tile_y);
1757 free_aligned_buffer_page_end(dst_c);
1758 free_aligned_buffer_page_end(dst_opt);
1759 }
1760
1761 // Compares DetileSplitUV to 2 step Detile + SplitUV
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Correctness)1762 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) {
1763 int i, j;
1764
1765 // orig is tiled. Allocate enough memory for tiles.
1766 int tile_width = (benchmark_width_ + 15) & ~15;
1767 int tile_height = (benchmark_height_ + 15) & ~15;
1768 int tile_plane_size = tile_width * tile_height;
1769 int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1770 align_buffer_page_end(tile_uv, tile_plane_size);
1771 align_buffer_page_end(detiled_uv, tile_plane_size);
1772 align_buffer_page_end(dst_u_two_stage, uv_plane_size);
1773 align_buffer_page_end(dst_u_opt, uv_plane_size);
1774 align_buffer_page_end(dst_v_two_stage, uv_plane_size);
1775 align_buffer_page_end(dst_v_opt, uv_plane_size);
1776
1777 MemRandomize(tile_uv, tile_plane_size);
1778 memset(detiled_uv, 0, tile_plane_size);
1779 memset(dst_u_two_stage, 0, uv_plane_size);
1780 memset(dst_u_opt, 0, uv_plane_size);
1781 memset(dst_v_two_stage, 0, uv_plane_size);
1782 memset(dst_v_opt, 0, uv_plane_size);
1783
1784 DetileSplitUVPlane(tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2,
1785 dst_v_opt, (benchmark_width_ + 1) / 2, benchmark_width_,
1786 benchmark_height_, 16);
1787
1788 // Benchmark 2 step conversion for comparison.
1789 for (j = 0; j < benchmark_iterations_; j++) {
1790 DetilePlane(tile_uv, tile_width, detiled_uv, benchmark_width_,
1791 benchmark_width_, benchmark_height_, 16);
1792 SplitUVPlane(detiled_uv, tile_width, dst_u_two_stage,
1793 (benchmark_width_ + 1) / 2, dst_v_two_stage,
1794 (benchmark_width_ + 1) / 2, (benchmark_width_ + 1) / 2,
1795 benchmark_height_);
1796 }
1797
1798 for (i = 0; i < uv_plane_size; ++i) {
1799 EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]);
1800 EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]);
1801 }
1802
1803 free_aligned_buffer_page_end(tile_uv);
1804 free_aligned_buffer_page_end(detiled_uv);
1805 free_aligned_buffer_page_end(dst_u_two_stage);
1806 free_aligned_buffer_page_end(dst_u_opt);
1807 free_aligned_buffer_page_end(dst_v_two_stage);
1808 free_aligned_buffer_page_end(dst_v_opt);
1809 }
1810
TEST_F(LibYUVPlanarTest,TestDetileSplitUVPlane_Benchmark)1811 TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) {
1812 int i, j;
1813
1814 // orig is tiled. Allocate enough memory for tiles.
1815 int tile_width = (benchmark_width_ + 15) & ~15;
1816 int tile_height = (benchmark_height_ + 15) & ~15;
1817 int tile_plane_size = tile_width * tile_height;
1818 int uv_plane_size = ((benchmark_width_ + 1) / 2) * benchmark_height_;
1819 align_buffer_page_end(tile_uv, tile_plane_size);
1820 align_buffer_page_end(dst_u_c, uv_plane_size);
1821 align_buffer_page_end(dst_u_opt, uv_plane_size);
1822 align_buffer_page_end(dst_v_c, uv_plane_size);
1823 align_buffer_page_end(dst_v_opt, uv_plane_size);
1824
1825 MemRandomize(tile_uv, tile_plane_size);
1826 memset(dst_u_c, 0, uv_plane_size);
1827 memset(dst_u_opt, 0, uv_plane_size);
1828 memset(dst_v_c, 0, uv_plane_size);
1829 memset(dst_v_opt, 0, uv_plane_size);
1830
1831 // Disable all optimizations.
1832 MaskCpuFlags(disable_cpu_flags_);
1833
1834 DetileSplitUVPlane(tile_uv, tile_width, dst_u_c, (benchmark_width_ + 1) / 2,
1835 dst_v_c, (benchmark_width_ + 1) / 2, benchmark_width_,
1836 benchmark_height_, 16);
1837
1838 // Enable optimizations.
1839 MaskCpuFlags(benchmark_cpu_info_);
1840
1841 for (j = 0; j < benchmark_iterations_; j++) {
1842 DetileSplitUVPlane(
1843 tile_uv, tile_width, dst_u_opt, (benchmark_width_ + 1) / 2, dst_v_opt,
1844 (benchmark_width_ + 1) / 2, benchmark_width_, benchmark_height_, 16);
1845 }
1846
1847 for (i = 0; i < uv_plane_size; ++i) {
1848 EXPECT_EQ(dst_u_c[i], dst_u_opt[i]);
1849 EXPECT_EQ(dst_v_c[i], dst_v_opt[i]);
1850 }
1851
1852 free_aligned_buffer_page_end(tile_uv);
1853 free_aligned_buffer_page_end(dst_u_c);
1854 free_aligned_buffer_page_end(dst_u_opt);
1855 free_aligned_buffer_page_end(dst_v_c);
1856 free_aligned_buffer_page_end(dst_v_opt);
1857 }
1858
TestMultiply(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1859 static int TestMultiply(int width,
1860 int height,
1861 int benchmark_iterations,
1862 int disable_cpu_flags,
1863 int benchmark_cpu_info,
1864 int invert,
1865 int off) {
1866 if (width < 1) {
1867 width = 1;
1868 }
1869 const int kBpp = 4;
1870 const int kStride = width * kBpp;
1871 align_buffer_page_end(src_argb_a, kStride * height + off);
1872 align_buffer_page_end(src_argb_b, kStride * height + off);
1873 align_buffer_page_end(dst_argb_c, kStride * height);
1874 align_buffer_page_end(dst_argb_opt, kStride * height);
1875 for (int i = 0; i < kStride * height; ++i) {
1876 src_argb_a[i + off] = (fastrand() & 0xff);
1877 src_argb_b[i + off] = (fastrand() & 0xff);
1878 }
1879 memset(dst_argb_c, 0, kStride * height);
1880 memset(dst_argb_opt, 0, kStride * height);
1881
1882 MaskCpuFlags(disable_cpu_flags);
1883 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1884 kStride, width, invert * height);
1885 MaskCpuFlags(benchmark_cpu_info);
1886 for (int i = 0; i < benchmark_iterations; ++i) {
1887 ARGBMultiply(src_argb_a + off, kStride, src_argb_b + off, kStride,
1888 dst_argb_opt, kStride, width, invert * height);
1889 }
1890 int max_diff = 0;
1891 for (int i = 0; i < kStride * height; ++i) {
1892 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1893 static_cast<int>(dst_argb_opt[i]));
1894 if (abs_diff > max_diff) {
1895 max_diff = abs_diff;
1896 }
1897 }
1898 free_aligned_buffer_page_end(src_argb_a);
1899 free_aligned_buffer_page_end(src_argb_b);
1900 free_aligned_buffer_page_end(dst_argb_c);
1901 free_aligned_buffer_page_end(dst_argb_opt);
1902 return max_diff;
1903 }
1904
TEST_F(LibYUVPlanarTest,ARGBMultiply_Any)1905 TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) {
1906 int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_,
1907 benchmark_iterations_, disable_cpu_flags_,
1908 benchmark_cpu_info_, +1, 0);
1909 EXPECT_LE(max_diff, 1);
1910 }
1911
TEST_F(LibYUVPlanarTest,ARGBMultiply_Unaligned)1912 TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) {
1913 int max_diff =
1914 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1915 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1916 EXPECT_LE(max_diff, 1);
1917 }
1918
TEST_F(LibYUVPlanarTest,ARGBMultiply_Invert)1919 TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) {
1920 int max_diff =
1921 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1922 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1923 EXPECT_LE(max_diff, 1);
1924 }
1925
TEST_F(LibYUVPlanarTest,ARGBMultiply_Opt)1926 TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) {
1927 int max_diff =
1928 TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_,
1929 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1930 EXPECT_LE(max_diff, 1);
1931 }
1932
TestAdd(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)1933 static int TestAdd(int width,
1934 int height,
1935 int benchmark_iterations,
1936 int disable_cpu_flags,
1937 int benchmark_cpu_info,
1938 int invert,
1939 int off) {
1940 if (width < 1) {
1941 width = 1;
1942 }
1943 const int kBpp = 4;
1944 const int kStride = width * kBpp;
1945 align_buffer_page_end(src_argb_a, kStride * height + off);
1946 align_buffer_page_end(src_argb_b, kStride * height + off);
1947 align_buffer_page_end(dst_argb_c, kStride * height);
1948 align_buffer_page_end(dst_argb_opt, kStride * height);
1949 for (int i = 0; i < kStride * height; ++i) {
1950 src_argb_a[i + off] = (fastrand() & 0xff);
1951 src_argb_b[i + off] = (fastrand() & 0xff);
1952 }
1953 memset(dst_argb_c, 0, kStride * height);
1954 memset(dst_argb_opt, 0, kStride * height);
1955
1956 MaskCpuFlags(disable_cpu_flags);
1957 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
1958 kStride, width, invert * height);
1959 MaskCpuFlags(benchmark_cpu_info);
1960 for (int i = 0; i < benchmark_iterations; ++i) {
1961 ARGBAdd(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_opt,
1962 kStride, width, invert * height);
1963 }
1964 int max_diff = 0;
1965 for (int i = 0; i < kStride * height; ++i) {
1966 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
1967 static_cast<int>(dst_argb_opt[i]));
1968 if (abs_diff > max_diff) {
1969 max_diff = abs_diff;
1970 }
1971 }
1972 free_aligned_buffer_page_end(src_argb_a);
1973 free_aligned_buffer_page_end(src_argb_b);
1974 free_aligned_buffer_page_end(dst_argb_c);
1975 free_aligned_buffer_page_end(dst_argb_opt);
1976 return max_diff;
1977 }
1978
TEST_F(LibYUVPlanarTest,ARGBAdd_Any)1979 TEST_F(LibYUVPlanarTest, ARGBAdd_Any) {
1980 int max_diff =
1981 TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
1982 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
1983 EXPECT_LE(max_diff, 1);
1984 }
1985
TEST_F(LibYUVPlanarTest,ARGBAdd_Unaligned)1986 TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) {
1987 int max_diff =
1988 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1989 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
1990 EXPECT_LE(max_diff, 1);
1991 }
1992
TEST_F(LibYUVPlanarTest,ARGBAdd_Invert)1993 TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) {
1994 int max_diff =
1995 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
1996 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
1997 EXPECT_LE(max_diff, 1);
1998 }
1999
TEST_F(LibYUVPlanarTest,ARGBAdd_Opt)2000 TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) {
2001 int max_diff =
2002 TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_,
2003 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2004 EXPECT_LE(max_diff, 1);
2005 }
2006
TestSubtract(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2007 static int TestSubtract(int width,
2008 int height,
2009 int benchmark_iterations,
2010 int disable_cpu_flags,
2011 int benchmark_cpu_info,
2012 int invert,
2013 int off) {
2014 if (width < 1) {
2015 width = 1;
2016 }
2017 const int kBpp = 4;
2018 const int kStride = width * kBpp;
2019 align_buffer_page_end(src_argb_a, kStride * height + off);
2020 align_buffer_page_end(src_argb_b, kStride * height + off);
2021 align_buffer_page_end(dst_argb_c, kStride * height);
2022 align_buffer_page_end(dst_argb_opt, kStride * height);
2023 for (int i = 0; i < kStride * height; ++i) {
2024 src_argb_a[i + off] = (fastrand() & 0xff);
2025 src_argb_b[i + off] = (fastrand() & 0xff);
2026 }
2027 memset(dst_argb_c, 0, kStride * height);
2028 memset(dst_argb_opt, 0, kStride * height);
2029
2030 MaskCpuFlags(disable_cpu_flags);
2031 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride, dst_argb_c,
2032 kStride, width, invert * height);
2033 MaskCpuFlags(benchmark_cpu_info);
2034 for (int i = 0; i < benchmark_iterations; ++i) {
2035 ARGBSubtract(src_argb_a + off, kStride, src_argb_b + off, kStride,
2036 dst_argb_opt, kStride, width, invert * height);
2037 }
2038 int max_diff = 0;
2039 for (int i = 0; i < kStride * height; ++i) {
2040 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2041 static_cast<int>(dst_argb_opt[i]));
2042 if (abs_diff > max_diff) {
2043 max_diff = abs_diff;
2044 }
2045 }
2046 free_aligned_buffer_page_end(src_argb_a);
2047 free_aligned_buffer_page_end(src_argb_b);
2048 free_aligned_buffer_page_end(dst_argb_c);
2049 free_aligned_buffer_page_end(dst_argb_opt);
2050 return max_diff;
2051 }
2052
TEST_F(LibYUVPlanarTest,ARGBSubtract_Any)2053 TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) {
2054 int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_,
2055 benchmark_iterations_, disable_cpu_flags_,
2056 benchmark_cpu_info_, +1, 0);
2057 EXPECT_LE(max_diff, 1);
2058 }
2059
TEST_F(LibYUVPlanarTest,ARGBSubtract_Unaligned)2060 TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) {
2061 int max_diff =
2062 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2063 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2064 EXPECT_LE(max_diff, 1);
2065 }
2066
TEST_F(LibYUVPlanarTest,ARGBSubtract_Invert)2067 TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) {
2068 int max_diff =
2069 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2070 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2071 EXPECT_LE(max_diff, 1);
2072 }
2073
TEST_F(LibYUVPlanarTest,ARGBSubtract_Opt)2074 TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) {
2075 int max_diff =
2076 TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_,
2077 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2078 EXPECT_LE(max_diff, 1);
2079 }
2080
TestSobel(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2081 static int TestSobel(int width,
2082 int height,
2083 int benchmark_iterations,
2084 int disable_cpu_flags,
2085 int benchmark_cpu_info,
2086 int invert,
2087 int off) {
2088 if (width < 1) {
2089 width = 1;
2090 }
2091 const int kBpp = 4;
2092 const int kStride = width * kBpp;
2093 align_buffer_page_end(src_argb_a, kStride * height + off);
2094 align_buffer_page_end(dst_argb_c, kStride * height);
2095 align_buffer_page_end(dst_argb_opt, kStride * height);
2096 memset(src_argb_a, 0, kStride * height + off);
2097 for (int i = 0; i < kStride * height; ++i) {
2098 src_argb_a[i + off] = (fastrand() & 0xff);
2099 }
2100 memset(dst_argb_c, 0, kStride * height);
2101 memset(dst_argb_opt, 0, kStride * height);
2102
2103 MaskCpuFlags(disable_cpu_flags);
2104 ARGBSobel(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2105 invert * height);
2106 MaskCpuFlags(benchmark_cpu_info);
2107 for (int i = 0; i < benchmark_iterations; ++i) {
2108 ARGBSobel(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2109 invert * height);
2110 }
2111 int max_diff = 0;
2112 for (int i = 0; i < kStride * height; ++i) {
2113 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2114 static_cast<int>(dst_argb_opt[i]));
2115 if (abs_diff > max_diff) {
2116 max_diff = abs_diff;
2117 }
2118 }
2119 free_aligned_buffer_page_end(src_argb_a);
2120 free_aligned_buffer_page_end(dst_argb_c);
2121 free_aligned_buffer_page_end(dst_argb_opt);
2122 return max_diff;
2123 }
2124
TEST_F(LibYUVPlanarTest,ARGBSobel_Any)2125 TEST_F(LibYUVPlanarTest, ARGBSobel_Any) {
2126 int max_diff =
2127 TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2128 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2129 EXPECT_EQ(0, max_diff);
2130 }
2131
TEST_F(LibYUVPlanarTest,ARGBSobel_Unaligned)2132 TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) {
2133 int max_diff =
2134 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2135 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2136 EXPECT_EQ(0, max_diff);
2137 }
2138
TEST_F(LibYUVPlanarTest,ARGBSobel_Invert)2139 TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) {
2140 int max_diff =
2141 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2142 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2143 EXPECT_EQ(0, max_diff);
2144 }
2145
TEST_F(LibYUVPlanarTest,ARGBSobel_Opt)2146 TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) {
2147 int max_diff =
2148 TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_,
2149 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2150 EXPECT_EQ(0, max_diff);
2151 }
2152
TestSobelToPlane(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2153 static int TestSobelToPlane(int width,
2154 int height,
2155 int benchmark_iterations,
2156 int disable_cpu_flags,
2157 int benchmark_cpu_info,
2158 int invert,
2159 int off) {
2160 if (width < 1) {
2161 width = 1;
2162 }
2163 const int kSrcBpp = 4;
2164 const int kDstBpp = 1;
2165 const int kSrcStride = (width * kSrcBpp + 15) & ~15;
2166 const int kDstStride = (width * kDstBpp + 15) & ~15;
2167 align_buffer_page_end(src_argb_a, kSrcStride * height + off);
2168 align_buffer_page_end(dst_argb_c, kDstStride * height);
2169 align_buffer_page_end(dst_argb_opt, kDstStride * height);
2170 memset(src_argb_a, 0, kSrcStride * height + off);
2171 for (int i = 0; i < kSrcStride * height; ++i) {
2172 src_argb_a[i + off] = (fastrand() & 0xff);
2173 }
2174 memset(dst_argb_c, 0, kDstStride * height);
2175 memset(dst_argb_opt, 0, kDstStride * height);
2176
2177 MaskCpuFlags(disable_cpu_flags);
2178 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_c, kDstStride, width,
2179 invert * height);
2180 MaskCpuFlags(benchmark_cpu_info);
2181 for (int i = 0; i < benchmark_iterations; ++i) {
2182 ARGBSobelToPlane(src_argb_a + off, kSrcStride, dst_argb_opt, kDstStride,
2183 width, invert * height);
2184 }
2185 int max_diff = 0;
2186 for (int i = 0; i < kDstStride * height; ++i) {
2187 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2188 static_cast<int>(dst_argb_opt[i]));
2189 if (abs_diff > max_diff) {
2190 max_diff = abs_diff;
2191 }
2192 }
2193 free_aligned_buffer_page_end(src_argb_a);
2194 free_aligned_buffer_page_end(dst_argb_c);
2195 free_aligned_buffer_page_end(dst_argb_opt);
2196 return max_diff;
2197 }
2198
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Any)2199 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) {
2200 int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_,
2201 benchmark_iterations_, disable_cpu_flags_,
2202 benchmark_cpu_info_, +1, 0);
2203 EXPECT_EQ(0, max_diff);
2204 }
2205
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Unaligned)2206 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) {
2207 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2208 benchmark_iterations_, disable_cpu_flags_,
2209 benchmark_cpu_info_, +1, 1);
2210 EXPECT_EQ(0, max_diff);
2211 }
2212
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Invert)2213 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) {
2214 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2215 benchmark_iterations_, disable_cpu_flags_,
2216 benchmark_cpu_info_, -1, 0);
2217 EXPECT_EQ(0, max_diff);
2218 }
2219
TEST_F(LibYUVPlanarTest,ARGBSobelToPlane_Opt)2220 TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) {
2221 int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
2222 benchmark_iterations_, disable_cpu_flags_,
2223 benchmark_cpu_info_, +1, 0);
2224 EXPECT_EQ(0, max_diff);
2225 }
2226
TestSobelXY(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off)2227 static int TestSobelXY(int width,
2228 int height,
2229 int benchmark_iterations,
2230 int disable_cpu_flags,
2231 int benchmark_cpu_info,
2232 int invert,
2233 int off) {
2234 if (width < 1) {
2235 width = 1;
2236 }
2237 const int kBpp = 4;
2238 const int kStride = width * kBpp;
2239 align_buffer_page_end(src_argb_a, kStride * height + off);
2240 align_buffer_page_end(dst_argb_c, kStride * height);
2241 align_buffer_page_end(dst_argb_opt, kStride * height);
2242 memset(src_argb_a, 0, kStride * height + off);
2243 for (int i = 0; i < kStride * height; ++i) {
2244 src_argb_a[i + off] = (fastrand() & 0xff);
2245 }
2246 memset(dst_argb_c, 0, kStride * height);
2247 memset(dst_argb_opt, 0, kStride * height);
2248
2249 MaskCpuFlags(disable_cpu_flags);
2250 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_c, kStride, width,
2251 invert * height);
2252 MaskCpuFlags(benchmark_cpu_info);
2253 for (int i = 0; i < benchmark_iterations; ++i) {
2254 ARGBSobelXY(src_argb_a + off, kStride, dst_argb_opt, kStride, width,
2255 invert * height);
2256 }
2257 int max_diff = 0;
2258 for (int i = 0; i < kStride * height; ++i) {
2259 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2260 static_cast<int>(dst_argb_opt[i]));
2261 if (abs_diff > max_diff) {
2262 max_diff = abs_diff;
2263 }
2264 }
2265 free_aligned_buffer_page_end(src_argb_a);
2266 free_aligned_buffer_page_end(dst_argb_c);
2267 free_aligned_buffer_page_end(dst_argb_opt);
2268 return max_diff;
2269 }
2270
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Any)2271 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) {
2272 int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_,
2273 benchmark_iterations_, disable_cpu_flags_,
2274 benchmark_cpu_info_, +1, 0);
2275 EXPECT_EQ(0, max_diff);
2276 }
2277
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Unaligned)2278 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) {
2279 int max_diff =
2280 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2281 disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
2282 EXPECT_EQ(0, max_diff);
2283 }
2284
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Invert)2285 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) {
2286 int max_diff =
2287 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2288 disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
2289 EXPECT_EQ(0, max_diff);
2290 }
2291
TEST_F(LibYUVPlanarTest,ARGBSobelXY_Opt)2292 TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) {
2293 int max_diff =
2294 TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_,
2295 disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
2296 EXPECT_EQ(0, max_diff);
2297 }
2298
TestBlur(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int radius)2299 static int TestBlur(int width,
2300 int height,
2301 int benchmark_iterations,
2302 int disable_cpu_flags,
2303 int benchmark_cpu_info,
2304 int invert,
2305 int off,
2306 int radius) {
2307 if (width < 1) {
2308 width = 1;
2309 }
2310 const int kBpp = 4;
2311 const int kStride = width * kBpp;
2312 align_buffer_page_end(src_argb_a, kStride * height + off);
2313 align_buffer_page_end(dst_cumsum, width * height * 16);
2314 align_buffer_page_end(dst_argb_c, kStride * height);
2315 align_buffer_page_end(dst_argb_opt, kStride * height);
2316 for (int i = 0; i < kStride * height; ++i) {
2317 src_argb_a[i + off] = (fastrand() & 0xff);
2318 }
2319 memset(dst_cumsum, 0, width * height * 16);
2320 memset(dst_argb_c, 0, kStride * height);
2321 memset(dst_argb_opt, 0, kStride * height);
2322
2323 MaskCpuFlags(disable_cpu_flags);
2324 ARGBBlur(src_argb_a + off, kStride, dst_argb_c, kStride,
2325 reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2326 invert * height, radius);
2327 MaskCpuFlags(benchmark_cpu_info);
2328 for (int i = 0; i < benchmark_iterations; ++i) {
2329 ARGBBlur(src_argb_a + off, kStride, dst_argb_opt, kStride,
2330 reinterpret_cast<int32_t*>(dst_cumsum), width * 4, width,
2331 invert * height, radius);
2332 }
2333 int max_diff = 0;
2334 for (int i = 0; i < kStride * height; ++i) {
2335 int abs_diff = abs(static_cast<int>(dst_argb_c[i]) -
2336 static_cast<int>(dst_argb_opt[i]));
2337 if (abs_diff > max_diff) {
2338 max_diff = abs_diff;
2339 }
2340 }
2341 free_aligned_buffer_page_end(src_argb_a);
2342 free_aligned_buffer_page_end(dst_cumsum);
2343 free_aligned_buffer_page_end(dst_argb_c);
2344 free_aligned_buffer_page_end(dst_argb_opt);
2345 return max_diff;
2346 }
2347
2348 #if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
2349 #define DISABLED_ARM(name) name
2350 #else
2351 #define DISABLED_ARM(name) DISABLED_##name
2352 #endif
2353
2354 static const int kBlurSize = 55;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Any))2355 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) {
2356 int max_diff =
2357 TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2358 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2359 EXPECT_LE(max_diff, 1);
2360 }
2361
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Unaligned))2362 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) {
2363 int max_diff =
2364 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2365 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize);
2366 EXPECT_LE(max_diff, 1);
2367 }
2368
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Invert))2369 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) {
2370 int max_diff =
2371 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2372 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize);
2373 EXPECT_LE(max_diff, 1);
2374 }
2375
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlur_Opt))2376 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) {
2377 int max_diff =
2378 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2379 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize);
2380 EXPECT_LE(max_diff, 1);
2381 }
2382
2383 static const int kBlurSmallSize = 5;
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Any))2384 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) {
2385 int max_diff =
2386 TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
2387 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2388 EXPECT_LE(max_diff, 1);
2389 }
2390
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Unaligned))2391 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) {
2392 int max_diff =
2393 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2394 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize);
2395 EXPECT_LE(max_diff, 1);
2396 }
2397
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Invert))2398 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) {
2399 int max_diff =
2400 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2401 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize);
2402 EXPECT_LE(max_diff, 1);
2403 }
2404
TEST_F(LibYUVPlanarTest,DISABLED_ARM (ARGBBlurSmall_Opt))2405 TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) {
2406 int max_diff =
2407 TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_,
2408 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize);
2409 EXPECT_LE(max_diff, 1);
2410 }
2411
TEST_F(LibYUVPlanarTest,DISABLED_ARM (TestARGBPolynomial))2412 TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) {
2413 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2414 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2415 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2416 memset(orig_pixels, 0, sizeof(orig_pixels));
2417
2418 SIMD_ALIGNED(static const float kWarmifyPolynomial[16]) = {
2419 0.94230f, -3.03300f, -2.92500f, 0.f, // C0
2420 0.584500f, 1.112000f, 1.535000f, 1.f, // C1 x
2421 0.001313f, -0.002503f, -0.004496f, 0.f, // C2 x * x
2422 0.0f, 0.000006965f, 0.000008781f, 0.f, // C3 x * x * x
2423 };
2424
2425 // Test blue
2426 orig_pixels[0][0] = 255u;
2427 orig_pixels[0][1] = 0u;
2428 orig_pixels[0][2] = 0u;
2429 orig_pixels[0][3] = 128u;
2430 // Test green
2431 orig_pixels[1][0] = 0u;
2432 orig_pixels[1][1] = 255u;
2433 orig_pixels[1][2] = 0u;
2434 orig_pixels[1][3] = 0u;
2435 // Test red
2436 orig_pixels[2][0] = 0u;
2437 orig_pixels[2][1] = 0u;
2438 orig_pixels[2][2] = 255u;
2439 orig_pixels[2][3] = 255u;
2440 // Test white
2441 orig_pixels[3][0] = 255u;
2442 orig_pixels[3][1] = 255u;
2443 orig_pixels[3][2] = 255u;
2444 orig_pixels[3][3] = 255u;
2445 // Test color
2446 orig_pixels[4][0] = 16u;
2447 orig_pixels[4][1] = 64u;
2448 orig_pixels[4][2] = 192u;
2449 orig_pixels[4][3] = 224u;
2450 // Do 16 to test asm version.
2451 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2452 &kWarmifyPolynomial[0], 16, 1);
2453 EXPECT_EQ(235u, dst_pixels_opt[0][0]);
2454 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2455 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2456 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2457 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2458 EXPECT_EQ(233u, dst_pixels_opt[1][1]);
2459 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2460 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2461 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2462 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2463 EXPECT_EQ(241u, dst_pixels_opt[2][2]);
2464 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2465 EXPECT_EQ(235u, dst_pixels_opt[3][0]);
2466 EXPECT_EQ(233u, dst_pixels_opt[3][1]);
2467 EXPECT_EQ(241u, dst_pixels_opt[3][2]);
2468 EXPECT_EQ(255u, dst_pixels_opt[3][3]);
2469 EXPECT_EQ(10u, dst_pixels_opt[4][0]);
2470 EXPECT_EQ(59u, dst_pixels_opt[4][1]);
2471 EXPECT_EQ(188u, dst_pixels_opt[4][2]);
2472 EXPECT_EQ(224u, dst_pixels_opt[4][3]);
2473
2474 for (int i = 0; i < 1280; ++i) {
2475 orig_pixels[i][0] = i;
2476 orig_pixels[i][1] = i / 2;
2477 orig_pixels[i][2] = i / 3;
2478 orig_pixels[i][3] = i;
2479 }
2480
2481 MaskCpuFlags(disable_cpu_flags_);
2482 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2483 &kWarmifyPolynomial[0], 1280, 1);
2484 MaskCpuFlags(benchmark_cpu_info_);
2485
2486 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2487 ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2488 &kWarmifyPolynomial[0], 1280, 1);
2489 }
2490
2491 for (int i = 0; i < 1280; ++i) {
2492 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2493 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2494 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2495 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2496 }
2497 }
2498
TestHalfFloatPlane(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale,int mask)2499 int TestHalfFloatPlane(int benchmark_width,
2500 int benchmark_height,
2501 int benchmark_iterations,
2502 int disable_cpu_flags,
2503 int benchmark_cpu_info,
2504 float scale,
2505 int mask) {
2506 int i, j;
2507 const int y_plane_size = benchmark_width * benchmark_height * 2;
2508
2509 align_buffer_page_end(orig_y, y_plane_size * 3);
2510 uint8_t* dst_opt = orig_y + y_plane_size;
2511 uint8_t* dst_c = orig_y + y_plane_size * 2;
2512
2513 MemRandomize(orig_y, y_plane_size);
2514 memset(dst_c, 0, y_plane_size);
2515 memset(dst_opt, 1, y_plane_size);
2516
2517 for (i = 0; i < y_plane_size / 2; ++i) {
2518 reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
2519 }
2520
2521 // Disable all optimizations.
2522 MaskCpuFlags(disable_cpu_flags);
2523 for (j = 0; j < benchmark_iterations; j++) {
2524 HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2525 reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
2526 scale, benchmark_width, benchmark_height);
2527 }
2528
2529 // Enable optimizations.
2530 MaskCpuFlags(benchmark_cpu_info);
2531 for (j = 0; j < benchmark_iterations; j++) {
2532 HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
2533 reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
2534 scale, benchmark_width, benchmark_height);
2535 }
2536
2537 int max_diff = 0;
2538 for (i = 0; i < y_plane_size / 2; ++i) {
2539 int abs_diff =
2540 abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
2541 static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
2542 if (abs_diff > max_diff) {
2543 max_diff = abs_diff;
2544 }
2545 }
2546
2547 free_aligned_buffer_page_end(orig_y);
2548 return max_diff;
2549 }
2550
2551 #if defined(__arm__)
EnableFlushDenormalToZero(void)2552 static void EnableFlushDenormalToZero(void) {
2553 uint32_t cw;
2554 __asm__ __volatile__(
2555 "vmrs %0, fpscr \n"
2556 "orr %0, %0, #0x1000000 \n"
2557 "vmsr fpscr, %0 \n"
2558 : "=r"(cw)::"memory");
2559 }
2560 #endif
2561
2562 // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
2563 // exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
2564 // happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
2565
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_denormal)2566 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
2567 // 32 bit arm rounding on denormal case is off by 1 compared to C.
2568 #if defined(__arm__)
2569 EnableFlushDenormalToZero();
2570 #endif
2571 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2572 benchmark_iterations_, disable_cpu_flags_,
2573 benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
2574 EXPECT_EQ(0, diff);
2575 }
2576
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_One)2577 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
2578 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2579 benchmark_iterations_, disable_cpu_flags_,
2580 benchmark_cpu_info_, 1.0f, 65535);
2581 EXPECT_LE(diff, 1);
2582 }
2583
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_16bit_Opt)2584 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
2585 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2586 benchmark_iterations_, disable_cpu_flags_,
2587 benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
2588 EXPECT_EQ(0, diff);
2589 }
2590
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_10bit_Opt)2591 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
2592 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2593 benchmark_iterations_, disable_cpu_flags_,
2594 benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
2595 EXPECT_EQ(0, diff);
2596 }
2597
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_9bit_Opt)2598 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
2599 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2600 benchmark_iterations_, disable_cpu_flags_,
2601 benchmark_cpu_info_, 1.0f / 512.0f, 511);
2602 EXPECT_EQ(0, diff);
2603 }
2604
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Opt)2605 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
2606 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2607 benchmark_iterations_, disable_cpu_flags_,
2608 benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
2609 EXPECT_EQ(0, diff);
2610 }
2611
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_Offby1)2612 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
2613 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2614 benchmark_iterations_, disable_cpu_flags_,
2615 benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
2616 EXPECT_EQ(0, diff);
2617 }
2618
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_One)2619 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
2620 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2621 benchmark_iterations_, disable_cpu_flags_,
2622 benchmark_cpu_info_, 1.0f, 2047);
2623 EXPECT_EQ(0, diff);
2624 }
2625
TEST_F(LibYUVPlanarTest,TestHalfFloatPlane_12bit_One)2626 TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
2627 int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
2628 benchmark_iterations_, disable_cpu_flags_,
2629 benchmark_cpu_info_, 1.0f, 4095);
2630 EXPECT_LE(diff, 1);
2631 }
2632
TestByteToFloat(int benchmark_width,int benchmark_height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,float scale)2633 float TestByteToFloat(int benchmark_width,
2634 int benchmark_height,
2635 int benchmark_iterations,
2636 int disable_cpu_flags,
2637 int benchmark_cpu_info,
2638 float scale) {
2639 int i, j;
2640 const int y_plane_size = benchmark_width * benchmark_height;
2641
2642 align_buffer_page_end(orig_y, y_plane_size * (1 + 4 + 4));
2643 float* dst_opt = reinterpret_cast<float*>(orig_y + y_plane_size);
2644 float* dst_c = reinterpret_cast<float*>(orig_y + y_plane_size * 5);
2645
2646 MemRandomize(orig_y, y_plane_size);
2647 memset(dst_c, 0, y_plane_size * 4);
2648 memset(dst_opt, 1, y_plane_size * 4);
2649
2650 // Disable all optimizations.
2651 MaskCpuFlags(disable_cpu_flags);
2652 ByteToFloat(orig_y, dst_c, scale, y_plane_size);
2653
2654 // Enable optimizations.
2655 MaskCpuFlags(benchmark_cpu_info);
2656 for (j = 0; j < benchmark_iterations; j++) {
2657 ByteToFloat(orig_y, dst_opt, scale, y_plane_size);
2658 }
2659
2660 float max_diff = 0;
2661 for (i = 0; i < y_plane_size; ++i) {
2662 float abs_diff = fabs(dst_c[i] - dst_opt[i]);
2663 if (abs_diff > max_diff) {
2664 max_diff = abs_diff;
2665 }
2666 }
2667
2668 free_aligned_buffer_page_end(orig_y);
2669 return max_diff;
2670 }
2671
TEST_F(LibYUVPlanarTest,TestByteToFloat)2672 TEST_F(LibYUVPlanarTest, TestByteToFloat) {
2673 float diff = TestByteToFloat(benchmark_width_, benchmark_height_,
2674 benchmark_iterations_, disable_cpu_flags_,
2675 benchmark_cpu_info_, 1.0f);
2676 EXPECT_EQ(0.f, diff);
2677 }
2678
TEST_F(LibYUVPlanarTest,TestARGBLumaColorTable)2679 TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
2680 SIMD_ALIGNED(uint8_t orig_pixels[1280][4]);
2681 SIMD_ALIGNED(uint8_t dst_pixels_opt[1280][4]);
2682 SIMD_ALIGNED(uint8_t dst_pixels_c[1280][4]);
2683 memset(orig_pixels, 0, sizeof(orig_pixels));
2684
2685 align_buffer_page_end(lumacolortable, 32768);
2686 int v = 0;
2687 for (int i = 0; i < 32768; ++i) {
2688 lumacolortable[i] = v;
2689 v += 3;
2690 }
2691 // Test blue
2692 orig_pixels[0][0] = 255u;
2693 orig_pixels[0][1] = 0u;
2694 orig_pixels[0][2] = 0u;
2695 orig_pixels[0][3] = 128u;
2696 // Test green
2697 orig_pixels[1][0] = 0u;
2698 orig_pixels[1][1] = 255u;
2699 orig_pixels[1][2] = 0u;
2700 orig_pixels[1][3] = 0u;
2701 // Test red
2702 orig_pixels[2][0] = 0u;
2703 orig_pixels[2][1] = 0u;
2704 orig_pixels[2][2] = 255u;
2705 orig_pixels[2][3] = 255u;
2706 // Test color
2707 orig_pixels[3][0] = 16u;
2708 orig_pixels[3][1] = 64u;
2709 orig_pixels[3][2] = 192u;
2710 orig_pixels[3][3] = 224u;
2711 // Do 16 to test asm version.
2712 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2713 &lumacolortable[0], 16, 1);
2714 EXPECT_EQ(253u, dst_pixels_opt[0][0]);
2715 EXPECT_EQ(0u, dst_pixels_opt[0][1]);
2716 EXPECT_EQ(0u, dst_pixels_opt[0][2]);
2717 EXPECT_EQ(128u, dst_pixels_opt[0][3]);
2718 EXPECT_EQ(0u, dst_pixels_opt[1][0]);
2719 EXPECT_EQ(253u, dst_pixels_opt[1][1]);
2720 EXPECT_EQ(0u, dst_pixels_opt[1][2]);
2721 EXPECT_EQ(0u, dst_pixels_opt[1][3]);
2722 EXPECT_EQ(0u, dst_pixels_opt[2][0]);
2723 EXPECT_EQ(0u, dst_pixels_opt[2][1]);
2724 EXPECT_EQ(253u, dst_pixels_opt[2][2]);
2725 EXPECT_EQ(255u, dst_pixels_opt[2][3]);
2726 EXPECT_EQ(48u, dst_pixels_opt[3][0]);
2727 EXPECT_EQ(192u, dst_pixels_opt[3][1]);
2728 EXPECT_EQ(64u, dst_pixels_opt[3][2]);
2729 EXPECT_EQ(224u, dst_pixels_opt[3][3]);
2730
2731 for (int i = 0; i < 1280; ++i) {
2732 orig_pixels[i][0] = i;
2733 orig_pixels[i][1] = i / 2;
2734 orig_pixels[i][2] = i / 3;
2735 orig_pixels[i][3] = i;
2736 }
2737
2738 MaskCpuFlags(disable_cpu_flags_);
2739 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_c[0][0], 0,
2740 lumacolortable, 1280, 1);
2741 MaskCpuFlags(benchmark_cpu_info_);
2742
2743 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
2744 ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0,
2745 lumacolortable, 1280, 1);
2746 }
2747 for (int i = 0; i < 1280; ++i) {
2748 EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]);
2749 EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]);
2750 EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]);
2751 EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]);
2752 }
2753
2754 free_aligned_buffer_page_end(lumacolortable);
2755 }
2756
TEST_F(LibYUVPlanarTest,TestARGBCopyAlpha)2757 TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
2758 const int kSize = benchmark_width_ * benchmark_height_ * 4;
2759 align_buffer_page_end(orig_pixels, kSize);
2760 align_buffer_page_end(dst_pixels_opt, kSize);
2761 align_buffer_page_end(dst_pixels_c, kSize);
2762
2763 MemRandomize(orig_pixels, kSize);
2764 MemRandomize(dst_pixels_opt, kSize);
2765 memcpy(dst_pixels_c, dst_pixels_opt, kSize);
2766
2767 MaskCpuFlags(disable_cpu_flags_);
2768 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_c,
2769 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2770 MaskCpuFlags(benchmark_cpu_info_);
2771
2772 for (int i = 0; i < benchmark_iterations_; ++i) {
2773 ARGBCopyAlpha(orig_pixels, benchmark_width_ * 4, dst_pixels_opt,
2774 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2775 }
2776 for (int i = 0; i < kSize; ++i) {
2777 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2778 }
2779
2780 free_aligned_buffer_page_end(dst_pixels_c);
2781 free_aligned_buffer_page_end(dst_pixels_opt);
2782 free_aligned_buffer_page_end(orig_pixels);
2783 }
2784
TEST_F(LibYUVPlanarTest,TestARGBExtractAlpha)2785 TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
2786 const int kPixels = benchmark_width_ * benchmark_height_;
2787 align_buffer_page_end(src_pixels, kPixels * 4);
2788 align_buffer_page_end(dst_pixels_opt, kPixels);
2789 align_buffer_page_end(dst_pixels_c, kPixels);
2790
2791 MemRandomize(src_pixels, kPixels * 4);
2792 MemRandomize(dst_pixels_opt, kPixels);
2793 memcpy(dst_pixels_c, dst_pixels_opt, kPixels);
2794
2795 MaskCpuFlags(disable_cpu_flags_);
2796 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2797 benchmark_width_, benchmark_width_, benchmark_height_);
2798 double c_time = get_time();
2799 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
2800 benchmark_width_, benchmark_width_, benchmark_height_);
2801 c_time = (get_time() - c_time);
2802
2803 MaskCpuFlags(benchmark_cpu_info_);
2804 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2805 benchmark_width_, benchmark_width_, benchmark_height_);
2806 double opt_time = get_time();
2807 for (int i = 0; i < benchmark_iterations_; ++i) {
2808 ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
2809 benchmark_width_, benchmark_width_, benchmark_height_);
2810 }
2811 opt_time = (get_time() - opt_time) / benchmark_iterations_;
2812 // Report performance of C vs OPT
2813 printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
2814 static_cast<int>(opt_time * 1e6));
2815 for (int i = 0; i < kPixels; ++i) {
2816 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2817 }
2818
2819 free_aligned_buffer_page_end(dst_pixels_c);
2820 free_aligned_buffer_page_end(dst_pixels_opt);
2821 free_aligned_buffer_page_end(src_pixels);
2822 }
2823
TEST_F(LibYUVPlanarTest,TestARGBCopyYToAlpha)2824 TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
2825 const int kPixels = benchmark_width_ * benchmark_height_;
2826 align_buffer_page_end(orig_pixels, kPixels);
2827 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
2828 align_buffer_page_end(dst_pixels_c, kPixels * 4);
2829
2830 MemRandomize(orig_pixels, kPixels);
2831 MemRandomize(dst_pixels_opt, kPixels * 4);
2832 memcpy(dst_pixels_c, dst_pixels_opt, kPixels * 4);
2833
2834 MaskCpuFlags(disable_cpu_flags_);
2835 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2836 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2837 double c_time = get_time();
2838 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
2839 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2840 c_time = (get_time() - c_time);
2841
2842 MaskCpuFlags(benchmark_cpu_info_);
2843 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2844 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2845 double opt_time = get_time();
2846 for (int i = 0; i < benchmark_iterations_; ++i) {
2847 ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
2848 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
2849 }
2850 opt_time = (get_time() - opt_time) / benchmark_iterations_;
2851
2852 // Report performance of C vs OPT
2853 printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
2854 static_cast<int>(opt_time * 1e6));
2855 for (int i = 0; i < kPixels * 4; ++i) {
2856 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2857 }
2858
2859 free_aligned_buffer_page_end(dst_pixels_c);
2860 free_aligned_buffer_page_end(dst_pixels_opt);
2861 free_aligned_buffer_page_end(orig_pixels);
2862 }
2863
TestARGBRect(int width,int height,int benchmark_iterations,int disable_cpu_flags,int benchmark_cpu_info,int invert,int off,int bpp)2864 static int TestARGBRect(int width,
2865 int height,
2866 int benchmark_iterations,
2867 int disable_cpu_flags,
2868 int benchmark_cpu_info,
2869 int invert,
2870 int off,
2871 int bpp) {
2872 if (width < 1) {
2873 width = 1;
2874 }
2875 const int kStride = width * bpp;
2876 const int kSize = kStride * height;
2877 const uint32_t v32 = fastrand() & (bpp == 4 ? 0xffffffff : 0xff);
2878
2879 align_buffer_page_end(dst_argb_c, kSize + off);
2880 align_buffer_page_end(dst_argb_opt, kSize + off);
2881
2882 MemRandomize(dst_argb_c + off, kSize);
2883 memcpy(dst_argb_opt + off, dst_argb_c + off, kSize);
2884
2885 MaskCpuFlags(disable_cpu_flags);
2886 if (bpp == 4) {
2887 ARGBRect(dst_argb_c + off, kStride, 0, 0, width, invert * height, v32);
2888 } else {
2889 SetPlane(dst_argb_c + off, kStride, width, invert * height, v32);
2890 }
2891
2892 MaskCpuFlags(benchmark_cpu_info);
2893 for (int i = 0; i < benchmark_iterations; ++i) {
2894 if (bpp == 4) {
2895 ARGBRect(dst_argb_opt + off, kStride, 0, 0, width, invert * height, v32);
2896 } else {
2897 SetPlane(dst_argb_opt + off, kStride, width, invert * height, v32);
2898 }
2899 }
2900 int max_diff = 0;
2901 for (int i = 0; i < kStride * height; ++i) {
2902 int abs_diff = abs(static_cast<int>(dst_argb_c[i + off]) -
2903 static_cast<int>(dst_argb_opt[i + off]));
2904 if (abs_diff > max_diff) {
2905 max_diff = abs_diff;
2906 }
2907 }
2908 free_aligned_buffer_page_end(dst_argb_c);
2909 free_aligned_buffer_page_end(dst_argb_opt);
2910 return max_diff;
2911 }
2912
TEST_F(LibYUVPlanarTest,ARGBRect_Any)2913 TEST_F(LibYUVPlanarTest, ARGBRect_Any) {
2914 int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2915 benchmark_iterations_, disable_cpu_flags_,
2916 benchmark_cpu_info_, +1, 0, 4);
2917 EXPECT_EQ(0, max_diff);
2918 }
2919
TEST_F(LibYUVPlanarTest,ARGBRect_Unaligned)2920 TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) {
2921 int max_diff =
2922 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2923 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4);
2924 EXPECT_EQ(0, max_diff);
2925 }
2926
TEST_F(LibYUVPlanarTest,ARGBRect_Invert)2927 TEST_F(LibYUVPlanarTest, ARGBRect_Invert) {
2928 int max_diff =
2929 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2930 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4);
2931 EXPECT_EQ(0, max_diff);
2932 }
2933
TEST_F(LibYUVPlanarTest,ARGBRect_Opt)2934 TEST_F(LibYUVPlanarTest, ARGBRect_Opt) {
2935 int max_diff =
2936 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2937 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4);
2938 EXPECT_EQ(0, max_diff);
2939 }
2940
TEST_F(LibYUVPlanarTest,SetPlane_Any)2941 TEST_F(LibYUVPlanarTest, SetPlane_Any) {
2942 int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_,
2943 benchmark_iterations_, disable_cpu_flags_,
2944 benchmark_cpu_info_, +1, 0, 1);
2945 EXPECT_EQ(0, max_diff);
2946 }
2947
TEST_F(LibYUVPlanarTest,SetPlane_Unaligned)2948 TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) {
2949 int max_diff =
2950 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2951 disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1);
2952 EXPECT_EQ(0, max_diff);
2953 }
2954
TEST_F(LibYUVPlanarTest,SetPlane_Invert)2955 TEST_F(LibYUVPlanarTest, SetPlane_Invert) {
2956 int max_diff =
2957 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2958 disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1);
2959 EXPECT_EQ(0, max_diff);
2960 }
2961
TEST_F(LibYUVPlanarTest,SetPlane_Opt)2962 TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
2963 int max_diff =
2964 TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_,
2965 disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1);
2966 EXPECT_EQ(0, max_diff);
2967 }
2968
TEST_F(LibYUVPlanarTest,MergeUVPlane_Opt)2969 TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
2970 const int kPixels = benchmark_width_ * benchmark_height_;
2971 align_buffer_page_end(src_pixels_u, kPixels);
2972 align_buffer_page_end(src_pixels_v, kPixels);
2973 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
2974 align_buffer_page_end(dst_pixels_c, kPixels * 2);
2975
2976 MemRandomize(src_pixels_u, kPixels);
2977 MemRandomize(src_pixels_v, kPixels);
2978 MemRandomize(dst_pixels_opt, kPixels * 2);
2979 MemRandomize(dst_pixels_c, kPixels * 2);
2980
2981 MaskCpuFlags(disable_cpu_flags_);
2982 MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2983 dst_pixels_c, benchmark_width_ * 2, benchmark_width_,
2984 benchmark_height_);
2985 MaskCpuFlags(benchmark_cpu_info_);
2986
2987 for (int i = 0; i < benchmark_iterations_; ++i) {
2988 MergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v, benchmark_width_,
2989 dst_pixels_opt, benchmark_width_ * 2, benchmark_width_,
2990 benchmark_height_);
2991 }
2992
2993 for (int i = 0; i < kPixels * 2; ++i) {
2994 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
2995 }
2996
2997 free_aligned_buffer_page_end(src_pixels_u);
2998 free_aligned_buffer_page_end(src_pixels_v);
2999 free_aligned_buffer_page_end(dst_pixels_opt);
3000 free_aligned_buffer_page_end(dst_pixels_c);
3001 }
3002
3003 // 16 bit channel split and merge
TEST_F(LibYUVPlanarTest,MergeUVPlane_16_Opt)3004 TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) {
3005 const int kPixels = benchmark_width_ * benchmark_height_;
3006 align_buffer_page_end(src_pixels_u, kPixels * 2);
3007 align_buffer_page_end(src_pixels_v, kPixels * 2);
3008 align_buffer_page_end(dst_pixels_opt, kPixels * 2 * 2);
3009 align_buffer_page_end(dst_pixels_c, kPixels * 2 * 2);
3010 MemRandomize(src_pixels_u, kPixels * 2);
3011 MemRandomize(src_pixels_v, kPixels * 2);
3012 MemRandomize(dst_pixels_opt, kPixels * 2 * 2);
3013 MemRandomize(dst_pixels_c, kPixels * 2 * 2);
3014
3015 MaskCpuFlags(disable_cpu_flags_);
3016 MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
3017 (const uint16_t*)src_pixels_v, benchmark_width_,
3018 (uint16_t*)dst_pixels_c, benchmark_width_ * 2,
3019 benchmark_width_, benchmark_height_, 12);
3020 MaskCpuFlags(benchmark_cpu_info_);
3021
3022 for (int i = 0; i < benchmark_iterations_; ++i) {
3023 MergeUVPlane_16((const uint16_t*)src_pixels_u, benchmark_width_,
3024 (const uint16_t*)src_pixels_v, benchmark_width_,
3025 (uint16_t*)dst_pixels_opt, benchmark_width_ * 2,
3026 benchmark_width_, benchmark_height_, 12);
3027 }
3028
3029 for (int i = 0; i < kPixels * 2 * 2; ++i) {
3030 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3031 }
3032 free_aligned_buffer_page_end(src_pixels_u);
3033 free_aligned_buffer_page_end(src_pixels_v);
3034 free_aligned_buffer_page_end(dst_pixels_opt);
3035 free_aligned_buffer_page_end(dst_pixels_c);
3036 }
3037
TEST_F(LibYUVPlanarTest,SplitUVPlane_Opt)3038 TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
3039 const int kPixels = benchmark_width_ * benchmark_height_;
3040 align_buffer_page_end(src_pixels, kPixels * 2);
3041 align_buffer_page_end(dst_pixels_u_c, kPixels);
3042 align_buffer_page_end(dst_pixels_v_c, kPixels);
3043 align_buffer_page_end(dst_pixels_u_opt, kPixels);
3044 align_buffer_page_end(dst_pixels_v_opt, kPixels);
3045
3046 MemRandomize(src_pixels, kPixels * 2);
3047 MemRandomize(dst_pixels_u_c, kPixels);
3048 MemRandomize(dst_pixels_v_c, kPixels);
3049 MemRandomize(dst_pixels_u_opt, kPixels);
3050 MemRandomize(dst_pixels_v_opt, kPixels);
3051
3052 MaskCpuFlags(disable_cpu_flags_);
3053 SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_c,
3054 benchmark_width_, dst_pixels_v_c, benchmark_width_,
3055 benchmark_width_, benchmark_height_);
3056 MaskCpuFlags(benchmark_cpu_info_);
3057
3058 for (int i = 0; i < benchmark_iterations_; ++i) {
3059 SplitUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_u_opt,
3060 benchmark_width_, dst_pixels_v_opt, benchmark_width_,
3061 benchmark_width_, benchmark_height_);
3062 }
3063
3064 for (int i = 0; i < kPixels; ++i) {
3065 EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
3066 EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
3067 }
3068
3069 free_aligned_buffer_page_end(src_pixels);
3070 free_aligned_buffer_page_end(dst_pixels_u_c);
3071 free_aligned_buffer_page_end(dst_pixels_v_c);
3072 free_aligned_buffer_page_end(dst_pixels_u_opt);
3073 free_aligned_buffer_page_end(dst_pixels_v_opt);
3074 }
3075
3076 // 16 bit channel split
TEST_F(LibYUVPlanarTest,SplitUVPlane_16_Opt)3077 TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) {
3078 const int kPixels = benchmark_width_ * benchmark_height_;
3079 align_buffer_page_end(src_pixels, kPixels * 2 * 2);
3080 align_buffer_page_end(dst_pixels_u_c, kPixels * 2);
3081 align_buffer_page_end(dst_pixels_v_c, kPixels * 2);
3082 align_buffer_page_end(dst_pixels_u_opt, kPixels * 2);
3083 align_buffer_page_end(dst_pixels_v_opt, kPixels * 2);
3084 MemRandomize(src_pixels, kPixels * 2 * 2);
3085 MemRandomize(dst_pixels_u_c, kPixels * 2);
3086 MemRandomize(dst_pixels_v_c, kPixels * 2);
3087 MemRandomize(dst_pixels_u_opt, kPixels * 2);
3088 MemRandomize(dst_pixels_v_opt, kPixels * 2);
3089
3090 MaskCpuFlags(disable_cpu_flags_);
3091 SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
3092 (uint16_t*)dst_pixels_u_c, benchmark_width_,
3093 (uint16_t*)dst_pixels_v_c, benchmark_width_, benchmark_width_,
3094 benchmark_height_, 10);
3095 MaskCpuFlags(benchmark_cpu_info_);
3096
3097 for (int i = 0; i < benchmark_iterations_; ++i) {
3098 SplitUVPlane_16((const uint16_t*)src_pixels, benchmark_width_ * 2,
3099 (uint16_t*)dst_pixels_u_opt, benchmark_width_,
3100 (uint16_t*)dst_pixels_v_opt, benchmark_width_,
3101 benchmark_width_, benchmark_height_, 10);
3102 }
3103
3104 for (int i = 0; i < kPixels * 2; ++i) {
3105 EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]);
3106 EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]);
3107 }
3108 free_aligned_buffer_page_end(src_pixels);
3109 free_aligned_buffer_page_end(dst_pixels_u_c);
3110 free_aligned_buffer_page_end(dst_pixels_v_c);
3111 free_aligned_buffer_page_end(dst_pixels_u_opt);
3112 free_aligned_buffer_page_end(dst_pixels_v_opt);
3113 }
3114
TEST_F(LibYUVPlanarTest,SwapUVPlane_Opt)3115 TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
3116 // Round count up to multiple of 16
3117 const int kPixels = benchmark_width_ * benchmark_height_;
3118 align_buffer_page_end(src_pixels, kPixels * 2);
3119 align_buffer_page_end(dst_pixels_opt, kPixels * 2);
3120 align_buffer_page_end(dst_pixels_c, kPixels * 2);
3121
3122 MemRandomize(src_pixels, kPixels * 2);
3123 MemRandomize(dst_pixels_opt, kPixels * 2);
3124 MemRandomize(dst_pixels_c, kPixels * 2);
3125
3126 MaskCpuFlags(disable_cpu_flags_);
3127 SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
3128 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3129 MaskCpuFlags(benchmark_cpu_info_);
3130
3131 for (int i = 0; i < benchmark_iterations_; ++i) {
3132 SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
3133 benchmark_width_ * 2, benchmark_width_, benchmark_height_);
3134 }
3135
3136 for (int i = 0; i < kPixels * 2; ++i) {
3137 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3138 }
3139
3140 free_aligned_buffer_page_end(src_pixels);
3141 free_aligned_buffer_page_end(dst_pixels_opt);
3142 free_aligned_buffer_page_end(dst_pixels_c);
3143 }
3144
TEST_F(LibYUVPlanarTest,MergeRGBPlane_Opt)3145 TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
3146 // Round count up to multiple of 16
3147 const int kPixels = benchmark_width_ * benchmark_height_;
3148 align_buffer_page_end(src_pixels, kPixels * 3);
3149 align_buffer_page_end(tmp_pixels_r, kPixels);
3150 align_buffer_page_end(tmp_pixels_g, kPixels);
3151 align_buffer_page_end(tmp_pixels_b, kPixels);
3152 align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3153 align_buffer_page_end(dst_pixels_c, kPixels * 3);
3154
3155 MemRandomize(src_pixels, kPixels * 3);
3156 MemRandomize(tmp_pixels_r, kPixels);
3157 MemRandomize(tmp_pixels_g, kPixels);
3158 MemRandomize(tmp_pixels_b, kPixels);
3159 MemRandomize(dst_pixels_opt, kPixels * 3);
3160 MemRandomize(dst_pixels_c, kPixels * 3);
3161
3162 MaskCpuFlags(disable_cpu_flags_);
3163 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3164 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3165 benchmark_width_, benchmark_width_, benchmark_height_);
3166 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3167 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3168 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3169 MaskCpuFlags(benchmark_cpu_info_);
3170
3171 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3172 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3173 benchmark_width_, benchmark_width_, benchmark_height_);
3174
3175 for (int i = 0; i < benchmark_iterations_; ++i) {
3176 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3177 benchmark_width_, tmp_pixels_b, benchmark_width_,
3178 dst_pixels_opt, benchmark_width_ * 3, benchmark_width_,
3179 benchmark_height_);
3180 }
3181
3182 for (int i = 0; i < kPixels * 3; ++i) {
3183 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3184 }
3185
3186 free_aligned_buffer_page_end(src_pixels);
3187 free_aligned_buffer_page_end(tmp_pixels_r);
3188 free_aligned_buffer_page_end(tmp_pixels_g);
3189 free_aligned_buffer_page_end(tmp_pixels_b);
3190 free_aligned_buffer_page_end(dst_pixels_opt);
3191 free_aligned_buffer_page_end(dst_pixels_c);
3192 }
3193
TEST_F(LibYUVPlanarTest,SplitRGBPlane_Opt)3194 TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
3195 // Round count up to multiple of 16
3196 const int kPixels = benchmark_width_ * benchmark_height_;
3197 align_buffer_page_end(src_pixels, kPixels * 3);
3198 align_buffer_page_end(tmp_pixels_r, kPixels);
3199 align_buffer_page_end(tmp_pixels_g, kPixels);
3200 align_buffer_page_end(tmp_pixels_b, kPixels);
3201 align_buffer_page_end(dst_pixels_opt, kPixels * 3);
3202 align_buffer_page_end(dst_pixels_c, kPixels * 3);
3203
3204 MemRandomize(src_pixels, kPixels * 3);
3205 MemRandomize(tmp_pixels_r, kPixels);
3206 MemRandomize(tmp_pixels_g, kPixels);
3207 MemRandomize(tmp_pixels_b, kPixels);
3208 MemRandomize(dst_pixels_opt, kPixels * 3);
3209 MemRandomize(dst_pixels_c, kPixels * 3);
3210
3211 MaskCpuFlags(disable_cpu_flags_);
3212 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3213 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3214 benchmark_width_, benchmark_width_, benchmark_height_);
3215 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3216 tmp_pixels_b, benchmark_width_, dst_pixels_c,
3217 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3218 MaskCpuFlags(benchmark_cpu_info_);
3219
3220 for (int i = 0; i < benchmark_iterations_; ++i) {
3221 SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_r,
3222 benchmark_width_, tmp_pixels_g, benchmark_width_,
3223 tmp_pixels_b, benchmark_width_, benchmark_width_,
3224 benchmark_height_);
3225 }
3226 MergeRGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3227 tmp_pixels_b, benchmark_width_, dst_pixels_opt,
3228 benchmark_width_ * 3, benchmark_width_, benchmark_height_);
3229
3230 for (int i = 0; i < kPixels * 3; ++i) {
3231 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3232 }
3233
3234 free_aligned_buffer_page_end(src_pixels);
3235 free_aligned_buffer_page_end(tmp_pixels_r);
3236 free_aligned_buffer_page_end(tmp_pixels_g);
3237 free_aligned_buffer_page_end(tmp_pixels_b);
3238 free_aligned_buffer_page_end(dst_pixels_opt);
3239 free_aligned_buffer_page_end(dst_pixels_c);
3240 }
3241
TEST_F(LibYUVPlanarTest,MergeARGBPlane_Opt)3242 TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) {
3243 const int kPixels = benchmark_width_ * benchmark_height_;
3244 align_buffer_page_end(src_pixels, kPixels * 4);
3245 align_buffer_page_end(tmp_pixels_r, kPixels);
3246 align_buffer_page_end(tmp_pixels_g, kPixels);
3247 align_buffer_page_end(tmp_pixels_b, kPixels);
3248 align_buffer_page_end(tmp_pixels_a, kPixels);
3249 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3250 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3251
3252 MemRandomize(src_pixels, kPixels * 4);
3253 MemRandomize(tmp_pixels_r, kPixels);
3254 MemRandomize(tmp_pixels_g, kPixels);
3255 MemRandomize(tmp_pixels_b, kPixels);
3256 MemRandomize(tmp_pixels_a, kPixels);
3257 MemRandomize(dst_pixels_opt, kPixels * 4);
3258 MemRandomize(dst_pixels_c, kPixels * 4);
3259
3260 MaskCpuFlags(disable_cpu_flags_);
3261 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3262 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3263 benchmark_width_, tmp_pixels_a, benchmark_width_,
3264 benchmark_width_, benchmark_height_);
3265 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3266 tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3267 dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3268 benchmark_height_);
3269
3270 MaskCpuFlags(benchmark_cpu_info_);
3271 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3272 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3273 benchmark_width_, tmp_pixels_a, benchmark_width_,
3274 benchmark_width_, benchmark_height_);
3275
3276 for (int i = 0; i < benchmark_iterations_; ++i) {
3277 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3278 benchmark_width_, tmp_pixels_b, benchmark_width_,
3279 tmp_pixels_a, benchmark_width_, dst_pixels_opt,
3280 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3281 }
3282
3283 for (int i = 0; i < kPixels * 4; ++i) {
3284 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3285 }
3286
3287 free_aligned_buffer_page_end(src_pixels);
3288 free_aligned_buffer_page_end(tmp_pixels_r);
3289 free_aligned_buffer_page_end(tmp_pixels_g);
3290 free_aligned_buffer_page_end(tmp_pixels_b);
3291 free_aligned_buffer_page_end(tmp_pixels_a);
3292 free_aligned_buffer_page_end(dst_pixels_opt);
3293 free_aligned_buffer_page_end(dst_pixels_c);
3294 }
3295
TEST_F(LibYUVPlanarTest,SplitARGBPlane_Opt)3296 TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) {
3297 const int kPixels = benchmark_width_ * benchmark_height_;
3298 align_buffer_page_end(src_pixels, kPixels * 4);
3299 align_buffer_page_end(tmp_pixels_r, kPixels);
3300 align_buffer_page_end(tmp_pixels_g, kPixels);
3301 align_buffer_page_end(tmp_pixels_b, kPixels);
3302 align_buffer_page_end(tmp_pixels_a, kPixels);
3303 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3304 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3305
3306 MemRandomize(src_pixels, kPixels * 4);
3307 MemRandomize(tmp_pixels_r, kPixels);
3308 MemRandomize(tmp_pixels_g, kPixels);
3309 MemRandomize(tmp_pixels_b, kPixels);
3310 MemRandomize(tmp_pixels_a, kPixels);
3311 MemRandomize(dst_pixels_opt, kPixels * 4);
3312 MemRandomize(dst_pixels_c, kPixels * 4);
3313
3314 MaskCpuFlags(disable_cpu_flags_);
3315 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3316 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3317 benchmark_width_, tmp_pixels_a, benchmark_width_,
3318 benchmark_width_, benchmark_height_);
3319 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3320 tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3321 dst_pixels_c, benchmark_width_ * 4, benchmark_width_,
3322 benchmark_height_);
3323
3324 MaskCpuFlags(benchmark_cpu_info_);
3325 for (int i = 0; i < benchmark_iterations_; ++i) {
3326 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3327 benchmark_width_, tmp_pixels_g, benchmark_width_,
3328 tmp_pixels_b, benchmark_width_, tmp_pixels_a,
3329 benchmark_width_, benchmark_width_, benchmark_height_);
3330 }
3331
3332 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3333 tmp_pixels_b, benchmark_width_, tmp_pixels_a, benchmark_width_,
3334 dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3335 benchmark_height_);
3336
3337 for (int i = 0; i < kPixels * 4; ++i) {
3338 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3339 }
3340
3341 free_aligned_buffer_page_end(src_pixels);
3342 free_aligned_buffer_page_end(tmp_pixels_r);
3343 free_aligned_buffer_page_end(tmp_pixels_g);
3344 free_aligned_buffer_page_end(tmp_pixels_b);
3345 free_aligned_buffer_page_end(tmp_pixels_a);
3346 free_aligned_buffer_page_end(dst_pixels_opt);
3347 free_aligned_buffer_page_end(dst_pixels_c);
3348 }
3349
TEST_F(LibYUVPlanarTest,MergeXRGBPlane_Opt)3350 TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
3351 const int kPixels = benchmark_width_ * benchmark_height_;
3352 align_buffer_page_end(src_pixels, kPixels * 4);
3353 align_buffer_page_end(tmp_pixels_r, kPixels);
3354 align_buffer_page_end(tmp_pixels_g, kPixels);
3355 align_buffer_page_end(tmp_pixels_b, kPixels);
3356 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3357 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3358
3359 MemRandomize(src_pixels, kPixels * 4);
3360 MemRandomize(tmp_pixels_r, kPixels);
3361 MemRandomize(tmp_pixels_g, kPixels);
3362 MemRandomize(tmp_pixels_b, kPixels);
3363 MemRandomize(dst_pixels_opt, kPixels * 4);
3364 MemRandomize(dst_pixels_c, kPixels * 4);
3365
3366 MaskCpuFlags(disable_cpu_flags_);
3367 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3368 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3369 benchmark_width_, NULL, 0, benchmark_width_,
3370 benchmark_height_);
3371 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3372 tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3373 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3374
3375 MaskCpuFlags(benchmark_cpu_info_);
3376 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3377 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3378 benchmark_width_, NULL, 0, benchmark_width_,
3379 benchmark_height_);
3380
3381 for (int i = 0; i < benchmark_iterations_; ++i) {
3382 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g,
3383 benchmark_width_, tmp_pixels_b, benchmark_width_, NULL, 0,
3384 dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
3385 benchmark_height_);
3386 }
3387
3388 for (int i = 0; i < kPixels * 4; ++i) {
3389 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3390 }
3391
3392 free_aligned_buffer_page_end(src_pixels);
3393 free_aligned_buffer_page_end(tmp_pixels_r);
3394 free_aligned_buffer_page_end(tmp_pixels_g);
3395 free_aligned_buffer_page_end(tmp_pixels_b);
3396 free_aligned_buffer_page_end(dst_pixels_opt);
3397 free_aligned_buffer_page_end(dst_pixels_c);
3398 }
3399
TEST_F(LibYUVPlanarTest,SplitXRGBPlane_Opt)3400 TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
3401 const int kPixels = benchmark_width_ * benchmark_height_;
3402 align_buffer_page_end(src_pixels, kPixels * 4);
3403 align_buffer_page_end(tmp_pixels_r, kPixels);
3404 align_buffer_page_end(tmp_pixels_g, kPixels);
3405 align_buffer_page_end(tmp_pixels_b, kPixels);
3406 align_buffer_page_end(dst_pixels_opt, kPixels * 4);
3407 align_buffer_page_end(dst_pixels_c, kPixels * 4);
3408
3409 MemRandomize(src_pixels, kPixels * 4);
3410 MemRandomize(tmp_pixels_r, kPixels);
3411 MemRandomize(tmp_pixels_g, kPixels);
3412 MemRandomize(tmp_pixels_b, kPixels);
3413 MemRandomize(dst_pixels_opt, kPixels * 4);
3414 MemRandomize(dst_pixels_c, kPixels * 4);
3415
3416 MaskCpuFlags(disable_cpu_flags_);
3417 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3418 benchmark_width_, tmp_pixels_g, benchmark_width_, tmp_pixels_b,
3419 benchmark_width_, NULL, 0, benchmark_width_,
3420 benchmark_height_);
3421 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3422 tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_c,
3423 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3424
3425 MaskCpuFlags(benchmark_cpu_info_);
3426 for (int i = 0; i < benchmark_iterations_; ++i) {
3427 SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_r,
3428 benchmark_width_, tmp_pixels_g, benchmark_width_,
3429 tmp_pixels_b, benchmark_width_, NULL, 0, benchmark_width_,
3430 benchmark_height_);
3431 }
3432
3433 MergeARGBPlane(tmp_pixels_r, benchmark_width_, tmp_pixels_g, benchmark_width_,
3434 tmp_pixels_b, benchmark_width_, NULL, 0, dst_pixels_opt,
3435 benchmark_width_ * 4, benchmark_width_, benchmark_height_);
3436
3437 for (int i = 0; i < kPixels * 4; ++i) {
3438 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
3439 }
3440
3441 free_aligned_buffer_page_end(src_pixels);
3442 free_aligned_buffer_page_end(tmp_pixels_r);
3443 free_aligned_buffer_page_end(tmp_pixels_g);
3444 free_aligned_buffer_page_end(tmp_pixels_b);
3445 free_aligned_buffer_page_end(dst_pixels_opt);
3446 free_aligned_buffer_page_end(dst_pixels_c);
3447 }
3448
3449 // Merge 4 channels
3450 #define TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3451 TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
3452 const int kWidth = W1280; \
3453 const int kPixels = kWidth * benchmark_height_; \
3454 align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3455 align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3456 align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3457 align_buffer_page_end(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
3458 align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3459 align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3460 MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3461 MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3462 MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3463 MemRandomize(src_memory_a, kPixels * sizeof(STYPE) + OFF); \
3464 memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
3465 memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
3466 STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3467 STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3468 STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3469 STYPE* src_pixels_a = reinterpret_cast<STYPE*>(src_memory_a + OFF); \
3470 DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3471 DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3472 MaskCpuFlags(disable_cpu_flags_); \
3473 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3474 kWidth, src_pixels_a, kWidth, dst_pixels_c, kWidth * 4, \
3475 kWidth, NEG benchmark_height_, DEPTH); \
3476 MaskCpuFlags(benchmark_cpu_info_); \
3477 for (int i = 0; i < benchmark_iterations_; ++i) { \
3478 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3479 kWidth, src_pixels_a, kWidth, dst_pixels_opt, kWidth * 4, \
3480 kWidth, NEG benchmark_height_, DEPTH); \
3481 } \
3482 for (int i = 0; i < kPixels * 4; ++i) { \
3483 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3484 } \
3485 free_aligned_buffer_page_end(src_memory_r); \
3486 free_aligned_buffer_page_end(src_memory_g); \
3487 free_aligned_buffer_page_end(src_memory_b); \
3488 free_aligned_buffer_page_end(src_memory_a); \
3489 free_aligned_buffer_page_end(dst_memory_c); \
3490 free_aligned_buffer_page_end(dst_memory_opt); \
3491 }
3492
3493 // Merge 3 channel RGB into 4 channel XRGB with opaque alpha
3494 #define TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3495 TEST_F(LibYUVPlanarTest, FUNC##Plane_Opaque_##DEPTH##N) { \
3496 const int kWidth = W1280; \
3497 const int kPixels = kWidth * benchmark_height_; \
3498 align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3499 align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3500 align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3501 align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3502 align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3503 MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3504 MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3505 MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3506 memset(dst_memory_c, 0, kPixels * 4 * sizeof(DTYPE)); \
3507 memset(dst_memory_opt, 0, kPixels * 4 * sizeof(DTYPE)); \
3508 STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3509 STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3510 STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3511 DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3512 DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3513 MaskCpuFlags(disable_cpu_flags_); \
3514 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3515 kWidth, NULL, 0, dst_pixels_c, kWidth * 4, kWidth, \
3516 NEG benchmark_height_, DEPTH); \
3517 MaskCpuFlags(benchmark_cpu_info_); \
3518 for (int i = 0; i < benchmark_iterations_; ++i) { \
3519 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3520 kWidth, NULL, 0, dst_pixels_opt, kWidth * 4, kWidth, \
3521 NEG benchmark_height_, DEPTH); \
3522 } \
3523 for (int i = 0; i < kPixels * 4; ++i) { \
3524 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3525 } \
3526 free_aligned_buffer_page_end(src_memory_r); \
3527 free_aligned_buffer_page_end(src_memory_g); \
3528 free_aligned_buffer_page_end(src_memory_b); \
3529 free_aligned_buffer_page_end(dst_memory_c); \
3530 free_aligned_buffer_page_end(dst_memory_opt); \
3531 }
3532
3533 #define TESTQPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
3534 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3535 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3536 2) \
3537 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3538 TESTQPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0) \
3539 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, \
3540 0) \
3541 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3542 2) \
3543 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3544 TESTQPLANAROTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3545
3546 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 10)
3547 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 12)
3548 TESTQPLANARTOP(MergeAR64, uint16_t, uint16_t, 16)
3549 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 10)
3550 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 12)
3551 TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16)
3552
3553 #define TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, W1280, N, NEG, OFF) \
3554 TEST_F(LibYUVPlanarTest, FUNC##Plane_##DEPTH##N) { \
3555 const int kWidth = W1280; \
3556 const int kPixels = kWidth * benchmark_height_; \
3557 align_buffer_page_end(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3558 align_buffer_page_end(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3559 align_buffer_page_end(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3560 align_buffer_page_end(dst_memory_c, kPixels * 4 * sizeof(DTYPE)); \
3561 align_buffer_page_end(dst_memory_opt, kPixels * 4 * sizeof(DTYPE)); \
3562 MemRandomize(src_memory_r, kPixels * sizeof(STYPE) + OFF); \
3563 MemRandomize(src_memory_g, kPixels * sizeof(STYPE) + OFF); \
3564 MemRandomize(src_memory_b, kPixels * sizeof(STYPE) + OFF); \
3565 STYPE* src_pixels_r = reinterpret_cast<STYPE*>(src_memory_r + OFF); \
3566 STYPE* src_pixels_g = reinterpret_cast<STYPE*>(src_memory_g + OFF); \
3567 STYPE* src_pixels_b = reinterpret_cast<STYPE*>(src_memory_b + OFF); \
3568 DTYPE* dst_pixels_c = reinterpret_cast<DTYPE*>(dst_memory_c); \
3569 DTYPE* dst_pixels_opt = reinterpret_cast<DTYPE*>(dst_memory_opt); \
3570 memset(dst_pixels_c, 1, kPixels * 4 * sizeof(DTYPE)); \
3571 memset(dst_pixels_opt, 2, kPixels * 4 * sizeof(DTYPE)); \
3572 MaskCpuFlags(disable_cpu_flags_); \
3573 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3574 kWidth, dst_pixels_c, kWidth * 4, kWidth, \
3575 NEG benchmark_height_, DEPTH); \
3576 MaskCpuFlags(benchmark_cpu_info_); \
3577 for (int i = 0; i < benchmark_iterations_; ++i) { \
3578 FUNC##Plane(src_pixels_r, kWidth, src_pixels_g, kWidth, src_pixels_b, \
3579 kWidth, dst_pixels_opt, kWidth * 4, kWidth, \
3580 NEG benchmark_height_, DEPTH); \
3581 } \
3582 for (int i = 0; i < kPixels * 4; ++i) { \
3583 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
3584 } \
3585 free_aligned_buffer_page_end(src_memory_r); \
3586 free_aligned_buffer_page_end(src_memory_g); \
3587 free_aligned_buffer_page_end(src_memory_b); \
3588 free_aligned_buffer_page_end(dst_memory_c); \
3589 free_aligned_buffer_page_end(dst_memory_opt); \
3590 }
3591
3592 #define TESTTPLANARTOP(FUNC, STYPE, DTYPE, DEPTH) \
3593 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_ + 1, _Any, +, 0) \
3594 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Unaligned, +, \
3595 2) \
3596 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Invert, -, 0) \
3597 TESTTPLANARTOPI(FUNC, STYPE, DTYPE, DEPTH, benchmark_width_, _Opt, +, 0)
3598
3599 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 10)
3600 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 12)
3601 TESTTPLANARTOP(MergeXR30, uint16_t, uint8_t, 16)
3602
3603 // TODO(fbarchard): improve test for platforms and cpu detect
3604 #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest,MergeUVRow_16_Opt)3605 TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
3606 // Round count up to multiple of 8
3607 const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
3608
3609 align_buffer_page_end(src_pixels_u, kPixels * 2);
3610 align_buffer_page_end(src_pixels_v, kPixels * 2);
3611 align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
3612 align_buffer_page_end(dst_pixels_uv_c, kPixels * 2 * 2);
3613
3614 MemRandomize(src_pixels_u, kPixels * 2);
3615 MemRandomize(src_pixels_v, kPixels * 2);
3616 memset(dst_pixels_uv_opt, 0, kPixels * 2 * 2);
3617 memset(dst_pixels_uv_c, 1, kPixels * 2 * 2);
3618
3619 MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3620 reinterpret_cast<const uint16_t*>(src_pixels_v),
3621 reinterpret_cast<uint16_t*>(dst_pixels_uv_c), 16, kPixels);
3622
3623 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3624 for (int i = 0; i < benchmark_iterations_; ++i) {
3625 if (has_avx2) {
3626 MergeUVRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_u),
3627 reinterpret_cast<const uint16_t*>(src_pixels_v),
3628 reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3629 kPixels);
3630 } else {
3631 MergeUVRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_u),
3632 reinterpret_cast<const uint16_t*>(src_pixels_v),
3633 reinterpret_cast<uint16_t*>(dst_pixels_uv_opt), 16,
3634 kPixels);
3635 }
3636 }
3637
3638 for (int i = 0; i < kPixels * 2 * 2; ++i) {
3639 EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]);
3640 }
3641
3642 free_aligned_buffer_page_end(src_pixels_u);
3643 free_aligned_buffer_page_end(src_pixels_v);
3644 free_aligned_buffer_page_end(dst_pixels_uv_opt);
3645 free_aligned_buffer_page_end(dst_pixels_uv_c);
3646 }
3647 #endif
3648
3649 // TODO(fbarchard): Improve test for more platforms.
3650 #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest,MultiplyRow_16_Opt)3651 TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
3652 // Round count up to multiple of 32
3653 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3654
3655 align_buffer_page_end(src_pixels_y, kPixels * 2);
3656 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3657 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3658
3659 MemRandomize(src_pixels_y, kPixels * 2);
3660 memset(dst_pixels_y_opt, 0, kPixels * 2);
3661 memset(dst_pixels_y_c, 1, kPixels * 2);
3662
3663 MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3664 reinterpret_cast<uint16_t*>(dst_pixels_y_c), 64, kPixels);
3665
3666 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3667 for (int i = 0; i < benchmark_iterations_; ++i) {
3668 if (has_avx2) {
3669 MultiplyRow_16_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3670 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3671 kPixels);
3672 } else {
3673 MultiplyRow_16_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3674 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 64,
3675 kPixels);
3676 }
3677 }
3678
3679 for (int i = 0; i < kPixels * 2; ++i) {
3680 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3681 }
3682
3683 free_aligned_buffer_page_end(src_pixels_y);
3684 free_aligned_buffer_page_end(dst_pixels_y_opt);
3685 free_aligned_buffer_page_end(dst_pixels_y_c);
3686 }
3687 #endif // HAS_MULTIPLYROW_16_AVX2
3688
TEST_F(LibYUVPlanarTest,Convert16To8Plane)3689 TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
3690 const int kPixels = benchmark_width_ * benchmark_height_;
3691 align_buffer_page_end(src_pixels_y, kPixels * 2);
3692 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3693 align_buffer_page_end(dst_pixels_y_c, kPixels);
3694
3695 MemRandomize(src_pixels_y, kPixels * 2);
3696 memset(dst_pixels_y_opt, 0, kPixels);
3697 memset(dst_pixels_y_c, 1, kPixels);
3698
3699 MaskCpuFlags(disable_cpu_flags_);
3700 Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3701 benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
3702 benchmark_width_, benchmark_height_);
3703 MaskCpuFlags(benchmark_cpu_info_);
3704
3705 for (int i = 0; i < benchmark_iterations_; ++i) {
3706 Convert16To8Plane(reinterpret_cast<const uint16_t*>(src_pixels_y),
3707 benchmark_width_, dst_pixels_y_opt, benchmark_width_,
3708 16384, benchmark_width_, benchmark_height_);
3709 }
3710
3711 for (int i = 0; i < kPixels; ++i) {
3712 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3713 }
3714
3715 free_aligned_buffer_page_end(src_pixels_y);
3716 free_aligned_buffer_page_end(dst_pixels_y_opt);
3717 free_aligned_buffer_page_end(dst_pixels_y_c);
3718 }
3719
TEST_F(LibYUVPlanarTest,YUY2ToY)3720 TEST_F(LibYUVPlanarTest, YUY2ToY) {
3721 const int kPixels = benchmark_width_ * benchmark_height_;
3722 align_buffer_page_end(src_pixels_y, kPixels * 2);
3723 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3724 align_buffer_page_end(dst_pixels_y_c, kPixels);
3725
3726 MemRandomize(src_pixels_y, kPixels * 2);
3727 memset(dst_pixels_y_opt, 0, kPixels);
3728 memset(dst_pixels_y_c, 1, kPixels);
3729
3730 MaskCpuFlags(disable_cpu_flags_);
3731 YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3732 benchmark_width_, benchmark_height_);
3733 MaskCpuFlags(benchmark_cpu_info_);
3734
3735 for (int i = 0; i < benchmark_iterations_; ++i) {
3736 YUY2ToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3737 benchmark_width_, benchmark_width_, benchmark_height_);
3738 }
3739
3740 for (int i = 0; i < kPixels; ++i) {
3741 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3742 }
3743
3744 free_aligned_buffer_page_end(src_pixels_y);
3745 free_aligned_buffer_page_end(dst_pixels_y_opt);
3746 free_aligned_buffer_page_end(dst_pixels_y_c);
3747 }
3748
TEST_F(LibYUVPlanarTest,UYVYToY)3749 TEST_F(LibYUVPlanarTest, UYVYToY) {
3750 const int kPixels = benchmark_width_ * benchmark_height_;
3751 align_buffer_page_end(src_pixels_y, kPixels * 2);
3752 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3753 align_buffer_page_end(dst_pixels_y_c, kPixels);
3754
3755 MemRandomize(src_pixels_y, kPixels * 2);
3756 memset(dst_pixels_y_opt, 0, kPixels);
3757 memset(dst_pixels_y_c, 1, kPixels);
3758
3759 MaskCpuFlags(disable_cpu_flags_);
3760 UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_c, benchmark_width_,
3761 benchmark_width_, benchmark_height_);
3762 MaskCpuFlags(benchmark_cpu_info_);
3763
3764 for (int i = 0; i < benchmark_iterations_; ++i) {
3765 UYVYToY(src_pixels_y, benchmark_width_ * 2, dst_pixels_y_opt,
3766 benchmark_width_, benchmark_width_, benchmark_height_);
3767 }
3768
3769 for (int i = 0; i < kPixels; ++i) {
3770 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3771 }
3772
3773 free_aligned_buffer_page_end(src_pixels_y);
3774 free_aligned_buffer_page_end(dst_pixels_y_opt);
3775 free_aligned_buffer_page_end(dst_pixels_y_c);
3776 }
3777
3778 #ifdef ENABLE_ROW_TESTS
3779 // TODO(fbarchard): Improve test for more platforms.
3780 #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert16To8Row_Opt)3781 TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
3782 // AVX2 does multiple of 32, so round count up
3783 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3784 align_buffer_page_end(src_pixels_y, kPixels * 2);
3785 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3786 align_buffer_page_end(dst_pixels_y_c, kPixels);
3787
3788 MemRandomize(src_pixels_y, kPixels * 2);
3789 // clamp source range to 10 bits.
3790 for (int i = 0; i < kPixels; ++i) {
3791 reinterpret_cast<uint16_t*>(src_pixels_y)[i] &= 1023;
3792 }
3793
3794 memset(dst_pixels_y_opt, 0, kPixels);
3795 memset(dst_pixels_y_c, 1, kPixels);
3796
3797 Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3798 dst_pixels_y_c, 16384, kPixels);
3799
3800 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3801 int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
3802 for (int i = 0; i < benchmark_iterations_; ++i) {
3803 if (has_avx2) {
3804 Convert16To8Row_AVX2(reinterpret_cast<const uint16_t*>(src_pixels_y),
3805 dst_pixels_y_opt, 16384, kPixels);
3806 } else if (has_ssse3) {
3807 Convert16To8Row_SSSE3(reinterpret_cast<const uint16_t*>(src_pixels_y),
3808 dst_pixels_y_opt, 16384, kPixels);
3809 } else {
3810 Convert16To8Row_C(reinterpret_cast<const uint16_t*>(src_pixels_y),
3811 dst_pixels_y_opt, 16384, kPixels);
3812 }
3813 }
3814
3815 for (int i = 0; i < kPixels; ++i) {
3816 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3817 }
3818
3819 free_aligned_buffer_page_end(src_pixels_y);
3820 free_aligned_buffer_page_end(dst_pixels_y_opt);
3821 free_aligned_buffer_page_end(dst_pixels_y_c);
3822 }
3823 #endif // HAS_CONVERT16TO8ROW_AVX2
3824
3825 #ifdef HAS_UYVYTOYROW_NEON
TEST_F(LibYUVPlanarTest,UYVYToYRow_Opt)3826 TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) {
3827 // NEON does multiple of 16, so round count up
3828 const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
3829 align_buffer_page_end(src_pixels_y, kPixels * 2);
3830 align_buffer_page_end(dst_pixels_y_opt, kPixels);
3831 align_buffer_page_end(dst_pixels_y_c, kPixels);
3832
3833 MemRandomize(src_pixels_y, kPixels * 2);
3834 memset(dst_pixels_y_opt, 0, kPixels);
3835 memset(dst_pixels_y_c, 1, kPixels);
3836
3837 UYVYToYRow_C(src_pixels_y, dst_pixels_y_c, kPixels);
3838
3839 for (int i = 0; i < benchmark_iterations_; ++i) {
3840 UYVYToYRow_NEON(src_pixels_y, dst_pixels_y_opt, kPixels);
3841 }
3842
3843 for (int i = 0; i < kPixels; ++i) {
3844 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3845 }
3846
3847 free_aligned_buffer_page_end(src_pixels_y);
3848 free_aligned_buffer_page_end(dst_pixels_y_opt);
3849 free_aligned_buffer_page_end(dst_pixels_y_c);
3850 }
3851 #endif // HAS_UYVYTOYROW_NEON
3852
3853 #endif // ENABLE_ROW_TESTS
3854
TEST_F(LibYUVPlanarTest,Convert8To16Plane)3855 TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
3856 const int kPixels = benchmark_width_ * benchmark_height_;
3857 align_buffer_page_end(src_pixels_y, kPixels);
3858 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3859 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3860
3861 MemRandomize(src_pixels_y, kPixels);
3862 memset(dst_pixels_y_opt, 0, kPixels * 2);
3863 memset(dst_pixels_y_c, 1, kPixels * 2);
3864
3865 MaskCpuFlags(disable_cpu_flags_);
3866 Convert8To16Plane(src_pixels_y, benchmark_width_,
3867 reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3868 benchmark_width_, 1024, benchmark_width_,
3869 benchmark_height_);
3870 MaskCpuFlags(benchmark_cpu_info_);
3871
3872 for (int i = 0; i < benchmark_iterations_; ++i) {
3873 Convert8To16Plane(src_pixels_y, benchmark_width_,
3874 reinterpret_cast<uint16_t*>(dst_pixels_y_opt),
3875 benchmark_width_, 1024, benchmark_width_,
3876 benchmark_height_);
3877 }
3878
3879 for (int i = 0; i < kPixels * 2; ++i) {
3880 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3881 }
3882
3883 free_aligned_buffer_page_end(src_pixels_y);
3884 free_aligned_buffer_page_end(dst_pixels_y_opt);
3885 free_aligned_buffer_page_end(dst_pixels_y_c);
3886 }
3887
3888 #ifdef ENABLE_ROW_TESTS
3889 // TODO(fbarchard): Improve test for more platforms.
3890 #ifdef HAS_CONVERT8TO16ROW_AVX2
TEST_F(LibYUVPlanarTest,Convert8To16Row_Opt)3891 TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) {
3892 const int kPixels = (benchmark_width_ * benchmark_height_ + 31) & ~31;
3893 align_buffer_page_end(src_pixels_y, kPixels);
3894 align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
3895 align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
3896
3897 MemRandomize(src_pixels_y, kPixels);
3898 memset(dst_pixels_y_opt, 0, kPixels * 2);
3899 memset(dst_pixels_y_c, 1, kPixels * 2);
3900
3901 Convert8To16Row_C(src_pixels_y, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
3902 1024, kPixels);
3903
3904 int has_avx2 = TestCpuFlag(kCpuHasAVX2);
3905 int has_sse2 = TestCpuFlag(kCpuHasSSE2);
3906 for (int i = 0; i < benchmark_iterations_; ++i) {
3907 if (has_avx2) {
3908 Convert8To16Row_AVX2(src_pixels_y,
3909 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3910 kPixels);
3911 } else if (has_sse2) {
3912 Convert8To16Row_SSE2(src_pixels_y,
3913 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3914 kPixels);
3915 } else {
3916 Convert8To16Row_C(src_pixels_y,
3917 reinterpret_cast<uint16_t*>(dst_pixels_y_opt), 1024,
3918 kPixels);
3919 }
3920 }
3921
3922 for (int i = 0; i < kPixels * 2; ++i) {
3923 EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
3924 }
3925
3926 free_aligned_buffer_page_end(src_pixels_y);
3927 free_aligned_buffer_page_end(dst_pixels_y_opt);
3928 free_aligned_buffer_page_end(dst_pixels_y_c);
3929 }
3930 #endif // HAS_CONVERT8TO16ROW_AVX2
3931
TestScaleMaxSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)3932 float TestScaleMaxSamples(int benchmark_width,
3933 int benchmark_height,
3934 int benchmark_iterations,
3935 float scale,
3936 bool opt) {
3937 int i, j;
3938 float max_c, max_opt = 0.f;
3939 // NEON does multiple of 8, so round count up
3940 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
3941 align_buffer_page_end(orig_y, kPixels * 4 * 3 + 48);
3942 uint8_t* dst_c = orig_y + kPixels * 4 + 16;
3943 uint8_t* dst_opt = orig_y + kPixels * 4 * 2 + 32;
3944
3945 // Randomize works but may contain some denormals affecting performance.
3946 // MemRandomize(orig_y, kPixels * 4);
3947 // large values are problematic. audio is really -1 to 1.
3948 for (i = 0; i < kPixels; ++i) {
3949 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
3950 }
3951 memset(dst_c, 0, kPixels * 4);
3952 memset(dst_opt, 1, kPixels * 4);
3953
3954 max_c = ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3955 reinterpret_cast<float*>(dst_c), scale, kPixels);
3956
3957 for (j = 0; j < benchmark_iterations; j++) {
3958 if (opt) {
3959 #ifdef HAS_SCALESUMSAMPLES_NEON
3960 max_opt = ScaleMaxSamples_NEON(reinterpret_cast<float*>(orig_y),
3961 reinterpret_cast<float*>(dst_opt), scale,
3962 kPixels);
3963 #else
3964 max_opt =
3965 ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3966 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3967 #endif
3968 } else {
3969 max_opt =
3970 ScaleMaxSamples_C(reinterpret_cast<float*>(orig_y),
3971 reinterpret_cast<float*>(dst_opt), scale, kPixels);
3972 }
3973 }
3974
3975 float max_diff = FAbs(max_opt - max_c);
3976 for (i = 0; i < kPixels; ++i) {
3977 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
3978 (reinterpret_cast<float*>(dst_opt)[i]));
3979 if (abs_diff > max_diff) {
3980 max_diff = abs_diff;
3981 }
3982 }
3983
3984 free_aligned_buffer_page_end(orig_y);
3985 return max_diff;
3986 }
3987
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_C)3988 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) {
3989 float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3990 benchmark_iterations_, 1.2f, false);
3991 EXPECT_EQ(0, diff);
3992 }
3993
TEST_F(LibYUVPlanarTest,TestScaleMaxSamples_Opt)3994 TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) {
3995 float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_,
3996 benchmark_iterations_, 1.2f, true);
3997 EXPECT_EQ(0, diff);
3998 }
3999
TestScaleSumSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)4000 float TestScaleSumSamples(int benchmark_width,
4001 int benchmark_height,
4002 int benchmark_iterations,
4003 float scale,
4004 bool opt) {
4005 int i, j;
4006 float sum_c, sum_opt = 0.f;
4007 // NEON does multiple of 8, so round count up
4008 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
4009 align_buffer_page_end(orig_y, kPixels * 4 * 3);
4010 uint8_t* dst_c = orig_y + kPixels * 4;
4011 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4012
4013 // Randomize works but may contain some denormals affecting performance.
4014 // MemRandomize(orig_y, kPixels * 4);
4015 // large values are problematic. audio is really -1 to 1.
4016 for (i = 0; i < kPixels; ++i) {
4017 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4018 }
4019 memset(dst_c, 0, kPixels * 4);
4020 memset(dst_opt, 1, kPixels * 4);
4021
4022 sum_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4023 reinterpret_cast<float*>(dst_c), scale, kPixels);
4024
4025 for (j = 0; j < benchmark_iterations; j++) {
4026 if (opt) {
4027 #ifdef HAS_SCALESUMSAMPLES_NEON
4028 sum_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
4029 reinterpret_cast<float*>(dst_opt), scale,
4030 kPixels);
4031 #else
4032 sum_opt =
4033 ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4034 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4035 #endif
4036 } else {
4037 sum_opt =
4038 ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
4039 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4040 }
4041 }
4042
4043 float mse_opt = sum_opt / kPixels * 4;
4044 float mse_c = sum_c / kPixels * 4;
4045 float mse_error = FAbs(mse_opt - mse_c) / mse_c;
4046
4047 // If the sum of a float is more than 4 million, small adds are round down on
4048 // float and produce different results with vectorized sum vs scalar sum.
4049 // Ignore the difference if the sum is large.
4050 float max_diff = 0.f;
4051 if (mse_error > 0.0001 && sum_c < 4000000) { // allow .01% difference of mse
4052 max_diff = mse_error;
4053 }
4054
4055 for (i = 0; i < kPixels; ++i) {
4056 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4057 (reinterpret_cast<float*>(dst_opt)[i]));
4058 if (abs_diff > max_diff) {
4059 max_diff = abs_diff;
4060 }
4061 }
4062
4063 free_aligned_buffer_page_end(orig_y);
4064 return max_diff;
4065 }
4066
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_C)4067 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
4068 float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
4069 benchmark_iterations_, 1.2f, false);
4070 EXPECT_EQ(0, diff);
4071 }
4072
TEST_F(LibYUVPlanarTest,TestScaleSumSamples_Opt)4073 TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
4074 float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
4075 benchmark_iterations_, 1.2f, true);
4076 EXPECT_EQ(0, diff);
4077 }
4078
TestScaleSamples(int benchmark_width,int benchmark_height,int benchmark_iterations,float scale,bool opt)4079 float TestScaleSamples(int benchmark_width,
4080 int benchmark_height,
4081 int benchmark_iterations,
4082 float scale,
4083 bool opt) {
4084 int i, j;
4085 // NEON does multiple of 8, so round count up
4086 const int kPixels = (benchmark_width * benchmark_height + 7) & ~7;
4087 align_buffer_page_end(orig_y, kPixels * 4 * 3);
4088 uint8_t* dst_c = orig_y + kPixels * 4;
4089 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4090
4091 // Randomize works but may contain some denormals affecting performance.
4092 // MemRandomize(orig_y, kPixels * 4);
4093 // large values are problematic. audio is really -1 to 1.
4094 for (i = 0; i < kPixels; ++i) {
4095 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4096 }
4097 memset(dst_c, 0, kPixels * 4);
4098 memset(dst_opt, 1, kPixels * 4);
4099
4100 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4101 reinterpret_cast<float*>(dst_c), scale, kPixels);
4102
4103 for (j = 0; j < benchmark_iterations; j++) {
4104 if (opt) {
4105 #ifdef HAS_SCALESUMSAMPLES_NEON
4106 ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
4107 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4108 #else
4109 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4110 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4111 #endif
4112 } else {
4113 ScaleSamples_C(reinterpret_cast<float*>(orig_y),
4114 reinterpret_cast<float*>(dst_opt), scale, kPixels);
4115 }
4116 }
4117
4118 float max_diff = 0.f;
4119 for (i = 0; i < kPixels; ++i) {
4120 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4121 (reinterpret_cast<float*>(dst_opt)[i]));
4122 if (abs_diff > max_diff) {
4123 max_diff = abs_diff;
4124 }
4125 }
4126
4127 free_aligned_buffer_page_end(orig_y);
4128 return max_diff;
4129 }
4130
TEST_F(LibYUVPlanarTest,TestScaleSamples_C)4131 TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
4132 float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4133 benchmark_iterations_, 1.2f, false);
4134 EXPECT_EQ(0, diff);
4135 }
4136
TEST_F(LibYUVPlanarTest,TestScaleSamples_Opt)4137 TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
4138 float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
4139 benchmark_iterations_, 1.2f, true);
4140 EXPECT_EQ(0, diff);
4141 }
4142
TestCopySamples(int benchmark_width,int benchmark_height,int benchmark_iterations,bool opt)4143 float TestCopySamples(int benchmark_width,
4144 int benchmark_height,
4145 int benchmark_iterations,
4146 bool opt) {
4147 int i, j;
4148 // NEON does multiple of 16 floats, so round count up
4149 const int kPixels = (benchmark_width * benchmark_height + 15) & ~15;
4150 align_buffer_page_end(orig_y, kPixels * 4 * 3);
4151 uint8_t* dst_c = orig_y + kPixels * 4;
4152 uint8_t* dst_opt = orig_y + kPixels * 4 * 2;
4153
4154 // Randomize works but may contain some denormals affecting performance.
4155 // MemRandomize(orig_y, kPixels * 4);
4156 // large values are problematic. audio is really -1 to 1.
4157 for (i = 0; i < kPixels; ++i) {
4158 (reinterpret_cast<float*>(orig_y))[i] = sinf(static_cast<float>(i) * 0.1f);
4159 }
4160 memset(dst_c, 0, kPixels * 4);
4161 memset(dst_opt, 1, kPixels * 4);
4162
4163 memcpy(reinterpret_cast<void*>(dst_c), reinterpret_cast<void*>(orig_y),
4164 kPixels * 4);
4165
4166 for (j = 0; j < benchmark_iterations; j++) {
4167 if (opt) {
4168 #ifdef HAS_COPYROW_NEON
4169 CopyRow_NEON(orig_y, dst_opt, kPixels * 4);
4170 #else
4171 CopyRow_C(orig_y, dst_opt, kPixels * 4);
4172 #endif
4173 } else {
4174 CopyRow_C(orig_y, dst_opt, kPixels * 4);
4175 }
4176 }
4177
4178 float max_diff = 0.f;
4179 for (i = 0; i < kPixels; ++i) {
4180 float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
4181 (reinterpret_cast<float*>(dst_opt)[i]));
4182 if (abs_diff > max_diff) {
4183 max_diff = abs_diff;
4184 }
4185 }
4186
4187 free_aligned_buffer_page_end(orig_y);
4188 return max_diff;
4189 }
4190
TEST_F(LibYUVPlanarTest,TestCopySamples_C)4191 TEST_F(LibYUVPlanarTest, TestCopySamples_C) {
4192 float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4193 benchmark_iterations_, false);
4194 EXPECT_EQ(0, diff);
4195 }
4196
TEST_F(LibYUVPlanarTest,TestCopySamples_Opt)4197 TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) {
4198 float diff = TestCopySamples(benchmark_width_, benchmark_height_,
4199 benchmark_iterations_, true);
4200 EXPECT_EQ(0, diff);
4201 }
4202
4203 extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width);
4204 extern "C" void GaussRow_C(const uint32_t* src, uint16_t* dst, int width);
4205
TEST_F(LibYUVPlanarTest,TestGaussRow_Opt)4206 TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
4207 SIMD_ALIGNED(uint32_t orig_pixels[1280 + 8]);
4208 SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
4209 SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
4210
4211 memset(orig_pixels, 0, sizeof(orig_pixels));
4212 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4213 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4214
4215 for (int i = 0; i < 1280 + 8; ++i) {
4216 orig_pixels[i] = i * 256;
4217 }
4218 GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4219 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4220 #if !defined(LIBYUV_DISABLE_NEON) && \
4221 (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4222 int has_neon = TestCpuFlag(kCpuHasNEON);
4223 if (has_neon) {
4224 GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4225 } else {
4226 GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4227 }
4228 #else
4229 GaussRow_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4230 #endif
4231 }
4232
4233 for (int i = 0; i < 1280; ++i) {
4234 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4235 }
4236
4237 EXPECT_EQ(dst_pixels_c[0],
4238 static_cast<uint16_t>(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1));
4239 EXPECT_EQ(dst_pixels_c[639], static_cast<uint16_t>(10256));
4240 }
4241
4242 extern "C" void GaussCol_NEON(const uint16_t* src0,
4243 const uint16_t* src1,
4244 const uint16_t* src2,
4245 const uint16_t* src3,
4246 const uint16_t* src4,
4247 uint32_t* dst,
4248 int width);
4249
4250 extern "C" void GaussCol_C(const uint16_t* src0,
4251 const uint16_t* src1,
4252 const uint16_t* src2,
4253 const uint16_t* src3,
4254 const uint16_t* src4,
4255 uint32_t* dst,
4256 int width);
4257
TEST_F(LibYUVPlanarTest,TestGaussCol_Opt)4258 TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
4259 SIMD_ALIGNED(uint16_t orig_pixels[1280 * 5]);
4260 SIMD_ALIGNED(uint32_t dst_pixels_c[1280]);
4261 SIMD_ALIGNED(uint32_t dst_pixels_opt[1280]);
4262
4263 memset(orig_pixels, 0, sizeof(orig_pixels));
4264 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4265 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4266
4267 for (int i = 0; i < 1280 * 5; ++i) {
4268 orig_pixels[i] = static_cast<float>(i);
4269 }
4270 GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4271 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4], &dst_pixels_c[0],
4272 1280);
4273 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4274 #if !defined(LIBYUV_DISABLE_NEON) && \
4275 (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
4276 int has_neon = TestCpuFlag(kCpuHasNEON);
4277 if (has_neon) {
4278 GaussCol_NEON(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4279 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4280 &dst_pixels_opt[0], 1280);
4281 } else {
4282 GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4283 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4284 &dst_pixels_opt[0], 1280);
4285 }
4286 #else
4287 GaussCol_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4288 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4289 &dst_pixels_opt[0], 1280);
4290 #endif
4291 }
4292
4293 for (int i = 0; i < 1280; ++i) {
4294 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4295 }
4296 }
4297
TEST_F(LibYUVPlanarTest,TestGaussRow_F32_Opt)4298 TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) {
4299 SIMD_ALIGNED(float orig_pixels[1280 + 4]);
4300 SIMD_ALIGNED(float dst_pixels_c[1280]);
4301 SIMD_ALIGNED(float dst_pixels_opt[1280]);
4302
4303 memset(orig_pixels, 0, sizeof(orig_pixels));
4304 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4305 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4306
4307 for (int i = 0; i < 1280 + 4; ++i) {
4308 orig_pixels[i] = static_cast<float>(i);
4309 }
4310 GaussRow_F32_C(&orig_pixels[0], &dst_pixels_c[0], 1280);
4311 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4312 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4313 int has_neon = TestCpuFlag(kCpuHasNEON);
4314 if (has_neon) {
4315 GaussRow_F32_NEON(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4316 } else {
4317 GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4318 }
4319 #else
4320 GaussRow_F32_C(&orig_pixels[0], &dst_pixels_opt[0], 1280);
4321 #endif
4322 }
4323
4324 for (int i = 0; i < 1280; ++i) {
4325 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4326 }
4327 }
4328
TEST_F(LibYUVPlanarTest,TestGaussCol_F32_Opt)4329 TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) {
4330 SIMD_ALIGNED(float dst_pixels_c[1280]);
4331 SIMD_ALIGNED(float dst_pixels_opt[1280]);
4332 align_buffer_page_end(orig_pixels_buf, 1280 * 5 * 4); // 5 rows
4333 float* orig_pixels = reinterpret_cast<float*>(orig_pixels_buf);
4334
4335 memset(orig_pixels, 0, 1280 * 5 * 4);
4336 memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
4337 memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
4338
4339 for (int i = 0; i < 1280 * 5; ++i) {
4340 orig_pixels[i] = static_cast<float>(i);
4341 }
4342 GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4343 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4344 &dst_pixels_c[0], 1280);
4345 for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
4346 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
4347 int has_neon = TestCpuFlag(kCpuHasNEON);
4348 if (has_neon) {
4349 GaussCol_F32_NEON(&orig_pixels[0], &orig_pixels[1280],
4350 &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4351 &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4352 } else {
4353 GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280],
4354 &orig_pixels[1280 * 2], &orig_pixels[1280 * 3],
4355 &orig_pixels[1280 * 4], &dst_pixels_opt[0], 1280);
4356 }
4357 #else
4358 GaussCol_F32_C(&orig_pixels[0], &orig_pixels[1280], &orig_pixels[1280 * 2],
4359 &orig_pixels[1280 * 3], &orig_pixels[1280 * 4],
4360 &dst_pixels_opt[0], 1280);
4361 #endif
4362 }
4363
4364 for (int i = 0; i < 1280; ++i) {
4365 EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
4366 }
4367 free_aligned_buffer_page_end(orig_pixels_buf);
4368 }
4369
TEST_F(LibYUVPlanarTest,SwapUVRow)4370 TEST_F(LibYUVPlanarTest, SwapUVRow) {
4371 const int kPixels = benchmark_width_ * benchmark_height_;
4372 void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
4373 SwapUVRow_C;
4374
4375 align_buffer_page_end(src_pixels_vu, kPixels * 2);
4376 align_buffer_page_end(dst_pixels_uv, kPixels * 2);
4377 MemRandomize(src_pixels_vu, kPixels * 2);
4378 memset(dst_pixels_uv, 1, kPixels * 2);
4379
4380 #if defined(HAS_SWAPUVROW_NEON)
4381 if (TestCpuFlag(kCpuHasNEON)) {
4382 SwapUVRow = SwapUVRow_Any_NEON;
4383 if (IS_ALIGNED(kPixels, 16)) {
4384 SwapUVRow = SwapUVRow_NEON;
4385 }
4386 }
4387 #endif
4388
4389 for (int j = 0; j < benchmark_iterations_; j++) {
4390 SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
4391 }
4392 for (int i = 0; i < kPixels; ++i) {
4393 EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
4394 EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
4395 }
4396
4397 free_aligned_buffer_page_end(src_pixels_vu);
4398 free_aligned_buffer_page_end(dst_pixels_uv);
4399 }
4400 #endif // ENABLE_ROW_TESTS
4401
TEST_F(LibYUVPlanarTest,TestGaussPlane_F32)4402 TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) {
4403 const int kSize = benchmark_width_ * benchmark_height_ * 4;
4404 align_buffer_page_end(orig_pixels, kSize);
4405 align_buffer_page_end(dst_pixels_opt, kSize);
4406 align_buffer_page_end(dst_pixels_c, kSize);
4407
4408 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4409 ((float*)(orig_pixels))[i] = (i & 1023) * 3.14f;
4410 }
4411 memset(dst_pixels_opt, 1, kSize);
4412 memset(dst_pixels_c, 2, kSize);
4413
4414 MaskCpuFlags(disable_cpu_flags_);
4415 GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4416 (float*)(dst_pixels_c), benchmark_width_, benchmark_width_,
4417 benchmark_height_);
4418 MaskCpuFlags(benchmark_cpu_info_);
4419
4420 for (int i = 0; i < benchmark_iterations_; ++i) {
4421 GaussPlane_F32((const float*)(orig_pixels), benchmark_width_,
4422 (float*)(dst_pixels_opt), benchmark_width_, benchmark_width_,
4423 benchmark_height_);
4424 }
4425 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4426 EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f)
4427 << i;
4428 }
4429
4430 free_aligned_buffer_page_end(dst_pixels_c);
4431 free_aligned_buffer_page_end(dst_pixels_opt);
4432 free_aligned_buffer_page_end(orig_pixels);
4433 }
4434
TEST_F(LibYUVPlanarTest,HalfMergeUVPlane_Opt)4435 TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) {
4436 int dst_width = (benchmark_width_ + 1) / 2;
4437 int dst_height = (benchmark_height_ + 1) / 2;
4438 align_buffer_page_end(src_pixels_u, benchmark_width_ * benchmark_height_);
4439 align_buffer_page_end(src_pixels_v, benchmark_width_ * benchmark_height_);
4440 align_buffer_page_end(tmp_pixels_u, dst_width * dst_height);
4441 align_buffer_page_end(tmp_pixels_v, dst_width * dst_height);
4442 align_buffer_page_end(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4443 align_buffer_page_end(dst_pixels_uv_c, dst_width * 2 * dst_height);
4444
4445 MemRandomize(src_pixels_u, benchmark_width_ * benchmark_height_);
4446 MemRandomize(src_pixels_v, benchmark_width_ * benchmark_height_);
4447 MemRandomize(tmp_pixels_u, dst_width * dst_height);
4448 MemRandomize(tmp_pixels_v, dst_width * dst_height);
4449 MemRandomize(dst_pixels_uv_opt, dst_width * 2 * dst_height);
4450 MemRandomize(dst_pixels_uv_c, dst_width * 2 * dst_height);
4451
4452 MaskCpuFlags(disable_cpu_flags_);
4453 HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4454 benchmark_width_, dst_pixels_uv_c, dst_width * 2,
4455 benchmark_width_, benchmark_height_);
4456 MaskCpuFlags(benchmark_cpu_info_);
4457
4458 for (int i = 0; i < benchmark_iterations_; ++i) {
4459 HalfMergeUVPlane(src_pixels_u, benchmark_width_, src_pixels_v,
4460 benchmark_width_, dst_pixels_uv_opt, dst_width * 2,
4461 benchmark_width_, benchmark_height_);
4462 }
4463
4464 for (int i = 0; i < dst_width * 2 * dst_height; ++i) {
4465 EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]);
4466 }
4467
4468 free_aligned_buffer_page_end(src_pixels_u);
4469 free_aligned_buffer_page_end(src_pixels_v);
4470 free_aligned_buffer_page_end(tmp_pixels_u);
4471 free_aligned_buffer_page_end(tmp_pixels_v);
4472 free_aligned_buffer_page_end(dst_pixels_uv_opt);
4473 free_aligned_buffer_page_end(dst_pixels_uv_c);
4474 }
4475
TEST_F(LibYUVPlanarTest,NV12Copy)4476 TEST_F(LibYUVPlanarTest, NV12Copy) {
4477 const int halfwidth = (benchmark_width_ + 1) >> 1;
4478 const int halfheight = (benchmark_height_ + 1) >> 1;
4479 align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4480 align_buffer_page_end(src_uv, halfwidth * 2 * halfheight);
4481 align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4482 align_buffer_page_end(dst_uv, halfwidth * 2 * halfheight);
4483
4484 MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4485 MemRandomize(src_uv, halfwidth * 2 * halfheight);
4486 MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4487 MemRandomize(dst_uv, halfwidth * 2 * halfheight);
4488
4489 for (int i = 0; i < benchmark_iterations_; ++i) {
4490 NV12Copy(src_y, benchmark_width_, src_uv, halfwidth * 2, dst_y,
4491 benchmark_width_, dst_uv, halfwidth * 2, benchmark_width_,
4492 benchmark_height_);
4493 }
4494
4495 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4496 EXPECT_EQ(src_y[i], dst_y[i]);
4497 }
4498 for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4499 EXPECT_EQ(src_uv[i], dst_uv[i]);
4500 }
4501
4502 free_aligned_buffer_page_end(src_y);
4503 free_aligned_buffer_page_end(src_uv);
4504 free_aligned_buffer_page_end(dst_y);
4505 free_aligned_buffer_page_end(dst_uv);
4506 }
4507
TEST_F(LibYUVPlanarTest,NV21Copy)4508 TEST_F(LibYUVPlanarTest, NV21Copy) {
4509 const int halfwidth = (benchmark_width_ + 1) >> 1;
4510 const int halfheight = (benchmark_height_ + 1) >> 1;
4511 align_buffer_page_end(src_y, benchmark_width_ * benchmark_height_);
4512 align_buffer_page_end(src_vu, halfwidth * 2 * halfheight);
4513 align_buffer_page_end(dst_y, benchmark_width_ * benchmark_height_);
4514 align_buffer_page_end(dst_vu, halfwidth * 2 * halfheight);
4515
4516 MemRandomize(src_y, benchmark_width_ * benchmark_height_);
4517 MemRandomize(src_vu, halfwidth * 2 * halfheight);
4518 MemRandomize(dst_y, benchmark_width_ * benchmark_height_);
4519 MemRandomize(dst_vu, halfwidth * 2 * halfheight);
4520
4521 for (int i = 0; i < benchmark_iterations_; ++i) {
4522 NV21Copy(src_y, benchmark_width_, src_vu, halfwidth * 2, dst_y,
4523 benchmark_width_, dst_vu, halfwidth * 2, benchmark_width_,
4524 benchmark_height_);
4525 }
4526
4527 for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
4528 EXPECT_EQ(src_y[i], dst_y[i]);
4529 }
4530 for (int i = 0; i < halfwidth * 2 * halfheight; ++i) {
4531 EXPECT_EQ(src_vu[i], dst_vu[i]);
4532 }
4533
4534 free_aligned_buffer_page_end(src_y);
4535 free_aligned_buffer_page_end(src_vu);
4536 free_aligned_buffer_page_end(dst_y);
4537 free_aligned_buffer_page_end(dst_vu);
4538 }
4539
4540 #if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \
4541 defined(__aarch64__)
4542
TEST_F(LibYUVPlanarTest,TestConvertFP16ToFP32)4543 TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) {
4544 int i, j;
4545 const int y_plane_size = benchmark_width_ * benchmark_height_;
4546
4547 align_buffer_page_end(orig_f, y_plane_size * 4);
4548 align_buffer_page_end(orig_y, y_plane_size * 2);
4549 align_buffer_page_end(dst_opt, y_plane_size * 4);
4550 align_buffer_page_end(rec_opt, y_plane_size * 2);
4551
4552 for (i = 0; i < y_plane_size; ++i) {
4553 ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
4554 }
4555 memset(orig_y, 1, y_plane_size * 2);
4556 memset(dst_opt, 2, y_plane_size * 4);
4557 memset(rec_opt, 3, y_plane_size * 2);
4558
4559 ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
4560 y_plane_size);
4561
4562 for (j = 0; j < benchmark_iterations_; j++) {
4563 ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt,
4564 y_plane_size);
4565 }
4566
4567 ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
4568 y_plane_size);
4569
4570 for (i = 0; i < y_plane_size; ++i) {
4571 EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
4572 }
4573
4574 free_aligned_buffer_page_end(orig_f);
4575 free_aligned_buffer_page_end(orig_y);
4576 free_aligned_buffer_page_end(dst_opt);
4577 free_aligned_buffer_page_end(rec_opt);
4578 }
4579
TEST_F(LibYUVPlanarTest,TestConvertFP16ToFP32Column)4580 TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) {
4581 int i, j;
4582 const int y_plane_size = benchmark_width_ * benchmark_height_;
4583
4584 align_buffer_page_end(orig_f, y_plane_size * 4);
4585 align_buffer_page_end(orig_y, y_plane_size * 2);
4586 align_buffer_page_end(dst_opt, y_plane_size * 4);
4587 align_buffer_page_end(rec_opt, y_plane_size * 2);
4588
4589 for (i = 0; i < y_plane_size; ++i) {
4590 ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
4591 }
4592 memset(orig_y, 1, y_plane_size * 2);
4593 memset(dst_opt, 2, y_plane_size * 4);
4594 memset(rec_opt, 3, y_plane_size * 2);
4595
4596 ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
4597 y_plane_size);
4598
4599 for (j = 0; j < benchmark_iterations_; j++) {
4600 ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt,
4601 y_plane_size);
4602 }
4603
4604 ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
4605 y_plane_size);
4606
4607 for (i = 0; i < y_plane_size; ++i) {
4608 EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
4609 }
4610
4611 free_aligned_buffer_page_end(orig_f);
4612 free_aligned_buffer_page_end(orig_y);
4613 free_aligned_buffer_page_end(dst_opt);
4614 free_aligned_buffer_page_end(rec_opt);
4615 }
4616
4617 #endif // defined(ENABLE_ROW_TESTS) && defined(__aarch64__)
4618
4619 } // namespace libyuv
4620