xref: /aosp_15_r20/external/webp/sharpyuv/sharpyuv.c (revision b2055c353e87c8814eb2b6b1b11112a1562253bd)
1 // Copyright 2022 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Sharp RGB to YUV conversion.
11 //
12 // Author: Skal ([email protected])
13 
14 #include "sharpyuv/sharpyuv.h"
15 
16 #include <assert.h>
17 #include <limits.h>
18 #include <stddef.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include "src/webp/types.h"
23 #include "sharpyuv/sharpyuv_cpu.h"
24 #include "sharpyuv/sharpyuv_dsp.h"
25 #include "sharpyuv/sharpyuv_gamma.h"
26 
27 //------------------------------------------------------------------------------
28 
SharpYuvGetVersion(void)29 int SharpYuvGetVersion(void) {
30   return SHARPYUV_VERSION;
31 }
32 
33 //------------------------------------------------------------------------------
34 // Sharp RGB->YUV conversion
35 
36 static const int kNumIterations = 4;
37 
38 #define YUV_FIX 16  // fixed-point precision for RGB->YUV
39 static const int kYuvHalf = 1 << (YUV_FIX - 1);
40 
41 // Max bit depth so that intermediate calculations fit in 16 bits.
42 static const int kMaxBitDepth = 14;
43 
44 // Returns the precision shift to use based on the input rgb_bit_depth.
GetPrecisionShift(int rgb_bit_depth)45 static int GetPrecisionShift(int rgb_bit_depth) {
46   // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
47   // bits if needed.
48   return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
49                                                : (kMaxBitDepth - rgb_bit_depth);
50 }
51 
52 typedef int16_t fixed_t;      // signed type with extra precision for UV
53 typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
54 
55 //------------------------------------------------------------------------------
56 
clip_8b(fixed_t v)57 static uint8_t clip_8b(fixed_t v) {
58   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
59 }
60 
clip(fixed_t v,int max)61 static uint16_t clip(fixed_t v, int max) {
62   return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
63 }
64 
clip_bit_depth(int y,int bit_depth)65 static fixed_y_t clip_bit_depth(int y, int bit_depth) {
66   const int max = (1 << bit_depth) - 1;
67   return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
68 }
69 
70 //------------------------------------------------------------------------------
71 
RGBToGray(int64_t r,int64_t g,int64_t b)72 static int RGBToGray(int64_t r, int64_t g, int64_t b) {
73   const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
74   return (int)(luma >> YUV_FIX);
75 }
76 
ScaleDown(uint16_t a,uint16_t b,uint16_t c,uint16_t d,int rgb_bit_depth,SharpYuvTransferFunctionType transfer_type)77 static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
78                           int rgb_bit_depth,
79                           SharpYuvTransferFunctionType transfer_type) {
80   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
81   const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type);
82   const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type);
83   const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type);
84   const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type);
85   return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth,
86                                transfer_type);
87 }
88 
UpdateW(const fixed_y_t * src,fixed_y_t * dst,int w,int rgb_bit_depth,SharpYuvTransferFunctionType transfer_type)89 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
90                                 int rgb_bit_depth,
91                                 SharpYuvTransferFunctionType transfer_type) {
92   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
93   int i = 0;
94   do {
95     const uint32_t R =
96         SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type);
97     const uint32_t G =
98         SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type);
99     const uint32_t B =
100         SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type);
101     const uint32_t Y = RGBToGray(R, G, B);
102     dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type);
103   } while (++i < w);
104 }
105 
UpdateChroma(const fixed_y_t * src1,const fixed_y_t * src2,fixed_t * dst,int uv_w,int rgb_bit_depth,SharpYuvTransferFunctionType transfer_type)106 static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
107                          fixed_t* dst, int uv_w, int rgb_bit_depth,
108                          SharpYuvTransferFunctionType transfer_type) {
109   int i = 0;
110   do {
111     const int r =
112         ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
113                   src2[0 * uv_w + 1], rgb_bit_depth, transfer_type);
114     const int g =
115         ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
116                   src2[2 * uv_w + 1], rgb_bit_depth, transfer_type);
117     const int b =
118         ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
119                   src2[4 * uv_w + 1], rgb_bit_depth, transfer_type);
120     const int W = RGBToGray(r, g, b);
121     dst[0 * uv_w] = (fixed_t)(r - W);
122     dst[1 * uv_w] = (fixed_t)(g - W);
123     dst[2 * uv_w] = (fixed_t)(b - W);
124     dst  += 1;
125     src1 += 2;
126     src2 += 2;
127   } while (++i < uv_w);
128 }
129 
StoreGray(const fixed_y_t * rgb,fixed_y_t * y,int w)130 static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
131   int i = 0;
132   assert(w > 0);
133   do {
134     y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
135   } while (++i < w);
136 }
137 
138 //------------------------------------------------------------------------------
139 
Filter2(int A,int B,int W0,int bit_depth)140 static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
141   const int v0 = (A * 3 + B + 2) >> 2;
142   return clip_bit_depth(v0 + W0, bit_depth);
143 }
144 
145 //------------------------------------------------------------------------------
146 
Shift(int v,int shift)147 static WEBP_INLINE int Shift(int v, int shift) {
148   return (shift >= 0) ? (v << shift) : (v >> -shift);
149 }
150 
ImportOneRow(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int rgb_step,int rgb_bit_depth,int pic_width,fixed_y_t * const dst)151 static void ImportOneRow(const uint8_t* const r_ptr,
152                          const uint8_t* const g_ptr,
153                          const uint8_t* const b_ptr,
154                          int rgb_step,
155                          int rgb_bit_depth,
156                          int pic_width,
157                          fixed_y_t* const dst) {
158   // Convert the rgb_step from a number of bytes to a number of uint8_t or
159   // uint16_t values depending the bit depth.
160   const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
161   int i = 0;
162   const int w = (pic_width + 1) & ~1;
163   do {
164     const int off = i * step;
165     const int shift = GetPrecisionShift(rgb_bit_depth);
166     if (rgb_bit_depth == 8) {
167       dst[i + 0 * w] = Shift(r_ptr[off], shift);
168       dst[i + 1 * w] = Shift(g_ptr[off], shift);
169       dst[i + 2 * w] = Shift(b_ptr[off], shift);
170     } else {
171       dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
172       dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
173       dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
174     }
175   } while (++i < pic_width);
176   if (pic_width & 1) {  // replicate rightmost pixel
177     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
178     dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
179     dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
180   }
181 }
182 
InterpolateTwoRows(const fixed_y_t * const best_y,const fixed_t * prev_uv,const fixed_t * cur_uv,const fixed_t * next_uv,int w,fixed_y_t * out1,fixed_y_t * out2,int rgb_bit_depth)183 static void InterpolateTwoRows(const fixed_y_t* const best_y,
184                                const fixed_t* prev_uv,
185                                const fixed_t* cur_uv,
186                                const fixed_t* next_uv,
187                                int w,
188                                fixed_y_t* out1,
189                                fixed_y_t* out2,
190                                int rgb_bit_depth) {
191   const int uv_w = w >> 1;
192   const int len = (w - 1) >> 1;   // length to filter
193   int k = 3;
194   const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
195   while (k-- > 0) {   // process each R/G/B segments in turn
196     // special boundary case for i==0
197     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
198     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
199 
200     SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
201                       bit_depth);
202     SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
203                       bit_depth);
204 
205     // special boundary case for i == w - 1 when w is even
206     if (!(w & 1)) {
207       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
208                             best_y[w - 1 + 0], bit_depth);
209       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
210                             best_y[w - 1 + w], bit_depth);
211     }
212     out1 += w;
213     out2 += w;
214     prev_uv += uv_w;
215     cur_uv  += uv_w;
216     next_uv += uv_w;
217   }
218 }
219 
RGBToYUVComponent(int r,int g,int b,const int coeffs[4],int sfix)220 static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
221                                          const int coeffs[4], int sfix) {
222   const int srounder = 1 << (YUV_FIX + sfix - 1);
223   const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
224                    coeffs[3] + srounder;
225   return (luma >> (YUV_FIX + sfix));
226 }
227 
ConvertWRGBToYUV(const fixed_y_t * best_y,const fixed_t * best_uv,uint8_t * y_ptr,int y_stride,uint8_t * u_ptr,int u_stride,uint8_t * v_ptr,int v_stride,int rgb_bit_depth,int yuv_bit_depth,int width,int height,const SharpYuvConversionMatrix * yuv_matrix)228 static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
229                             uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
230                             int u_stride, uint8_t* v_ptr, int v_stride,
231                             int rgb_bit_depth,
232                             int yuv_bit_depth, int width, int height,
233                             const SharpYuvConversionMatrix* yuv_matrix) {
234   int i, j;
235   const fixed_t* const best_uv_base = best_uv;
236   const int w = (width + 1) & ~1;
237   const int h = (height + 1) & ~1;
238   const int uv_w = w >> 1;
239   const int uv_h = h >> 1;
240   const int sfix = GetPrecisionShift(rgb_bit_depth);
241   const int yuv_max = (1 << yuv_bit_depth) - 1;
242 
243   best_uv = best_uv_base;
244   j = 0;
245   do {
246     i = 0;
247     do {
248       const int off = (i >> 1);
249       const int W = best_y[i];
250       const int r = best_uv[off + 0 * uv_w] + W;
251       const int g = best_uv[off + 1 * uv_w] + W;
252       const int b = best_uv[off + 2 * uv_w] + W;
253       const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
254       if (yuv_bit_depth <= 8) {
255         y_ptr[i] = clip_8b(y);
256       } else {
257         ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
258       }
259     } while (++i < width);
260     best_y += w;
261     best_uv += (j & 1) * 3 * uv_w;
262     y_ptr += y_stride;
263   } while (++j < height);
264 
265   best_uv = best_uv_base;
266   j = 0;
267   do {
268     i = 0;
269     do {
270       // Note r, g and b values here are off by W, but a constant offset on all
271       // 3 components doesn't change the value of u and v with a YCbCr matrix.
272       const int r = best_uv[i + 0 * uv_w];
273       const int g = best_uv[i + 1 * uv_w];
274       const int b = best_uv[i + 2 * uv_w];
275       const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
276       const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
277       if (yuv_bit_depth <= 8) {
278         u_ptr[i] = clip_8b(u);
279         v_ptr[i] = clip_8b(v);
280       } else {
281         ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
282         ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
283       }
284     } while (++i < uv_w);
285     best_uv += 3 * uv_w;
286     u_ptr += u_stride;
287     v_ptr += v_stride;
288   } while (++j < uv_h);
289   return 1;
290 }
291 
292 //------------------------------------------------------------------------------
293 // Main function
294 
SafeMalloc(uint64_t nmemb,size_t size)295 static void* SafeMalloc(uint64_t nmemb, size_t size) {
296   const uint64_t total_size = nmemb * (uint64_t)size;
297   if (total_size != (size_t)total_size) return NULL;
298   return malloc((size_t)total_size);
299 }
300 
301 #define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((uint64_t)(W) * (H), sizeof(T)))
302 
DoSharpArgbToYuv(const uint8_t * r_ptr,const uint8_t * g_ptr,const uint8_t * b_ptr,int rgb_step,int rgb_stride,int rgb_bit_depth,uint8_t * y_ptr,int y_stride,uint8_t * u_ptr,int u_stride,uint8_t * v_ptr,int v_stride,int yuv_bit_depth,int width,int height,const SharpYuvConversionMatrix * yuv_matrix,SharpYuvTransferFunctionType transfer_type)303 static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
304                             const uint8_t* b_ptr, int rgb_step, int rgb_stride,
305                             int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
306                             uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
307                             int v_stride, int yuv_bit_depth, int width,
308                             int height,
309                             const SharpYuvConversionMatrix* yuv_matrix,
310                             SharpYuvTransferFunctionType transfer_type) {
311   // we expand the right/bottom border if needed
312   const int w = (width + 1) & ~1;
313   const int h = (height + 1) & ~1;
314   const int uv_w = w >> 1;
315   const int uv_h = h >> 1;
316   const int y_bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
317   uint64_t prev_diff_y_sum = ~0;
318   int j, iter;
319 
320   // TODO(skal): allocate one big memory chunk. But for now, it's easier
321   // for valgrind debugging to have several chunks.
322   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
323   fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
324   fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
325   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
326   fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
327   fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
328   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
329   fixed_y_t* best_y = best_y_base;
330   fixed_y_t* target_y = target_y_base;
331   fixed_t* best_uv = best_uv_base;
332   fixed_t* target_uv = target_uv_base;
333   const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
334   int ok;
335   assert(w > 0);
336   assert(h > 0);
337 
338   if (best_y_base == NULL || best_uv_base == NULL ||
339       target_y_base == NULL || target_uv_base == NULL ||
340       best_rgb_y == NULL || best_rgb_uv == NULL ||
341       tmp_buffer == NULL) {
342     ok = 0;
343     goto End;
344   }
345 
346   // Import RGB samples to W/RGB representation.
347   for (j = 0; j < height; j += 2) {
348     const int is_last_row = (j == height - 1);
349     fixed_y_t* const src1 = tmp_buffer + 0 * w;
350     fixed_y_t* const src2 = tmp_buffer + 3 * w;
351 
352     // prepare two rows of input
353     ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
354                  src1);
355     if (!is_last_row) {
356       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
357                    rgb_step, rgb_bit_depth, width, src2);
358     } else {
359       memcpy(src2, src1, 3 * w * sizeof(*src2));
360     }
361     StoreGray(src1, best_y + 0, w);
362     StoreGray(src2, best_y + w, w);
363 
364     UpdateW(src1, target_y, w, rgb_bit_depth, transfer_type);
365     UpdateW(src2, target_y + w, w, rgb_bit_depth, transfer_type);
366     UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth, transfer_type);
367     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
368     best_y += 2 * w;
369     best_uv += 3 * uv_w;
370     target_y += 2 * w;
371     target_uv += 3 * uv_w;
372     r_ptr += 2 * rgb_stride;
373     g_ptr += 2 * rgb_stride;
374     b_ptr += 2 * rgb_stride;
375   }
376 
377   // Iterate and resolve clipping conflicts.
378   for (iter = 0; iter < kNumIterations; ++iter) {
379     const fixed_t* cur_uv = best_uv_base;
380     const fixed_t* prev_uv = best_uv_base;
381     uint64_t diff_y_sum = 0;
382 
383     best_y = best_y_base;
384     best_uv = best_uv_base;
385     target_y = target_y_base;
386     target_uv = target_uv_base;
387     j = 0;
388     do {
389       fixed_y_t* const src1 = tmp_buffer + 0 * w;
390       fixed_y_t* const src2 = tmp_buffer + 3 * w;
391       {
392         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
393         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
394                            src1, src2, rgb_bit_depth);
395         prev_uv = cur_uv;
396         cur_uv = next_uv;
397       }
398 
399       UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type);
400       UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type);
401       UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth, transfer_type);
402 
403       // update two rows of Y and one row of RGB
404       diff_y_sum +=
405           SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, y_bit_depth);
406       SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
407 
408       best_y += 2 * w;
409       best_uv += 3 * uv_w;
410       target_y += 2 * w;
411       target_uv += 3 * uv_w;
412       j += 2;
413     } while (j < h);
414     // test exit condition
415     if (iter > 0) {
416       if (diff_y_sum < diff_y_threshold) break;
417       if (diff_y_sum > prev_diff_y_sum) break;
418     }
419     prev_diff_y_sum = diff_y_sum;
420   }
421 
422   // final reconstruction
423   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
424                         u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
425                         width, height, yuv_matrix);
426 
427  End:
428   free(best_y_base);
429   free(best_uv_base);
430   free(target_y_base);
431   free(target_uv_base);
432   free(best_rgb_y);
433   free(best_rgb_uv);
434   free(tmp_buffer);
435   return ok;
436 }
437 
438 #undef SAFE_ALLOC
439 
440 #if defined(WEBP_USE_THREAD) && !defined(_WIN32)
441 #include <pthread.h>  // NOLINT
442 
443 #define LOCK_ACCESS \
444     static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
445     if (pthread_mutex_lock(&sharpyuv_lock)) return
446 #define UNLOCK_ACCESS_AND_RETURN                  \
447     do {                                          \
448       (void)pthread_mutex_unlock(&sharpyuv_lock); \
449       return;                                     \
450     } while (0)
451 #else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
452 #define LOCK_ACCESS do {} while (0)
453 #define UNLOCK_ACCESS_AND_RETURN return
454 #endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
455 
456 // Hidden exported init function.
457 // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed,
458 // users can declare it as extern and call it with an alternate VP8CPUInfo
459 // function.
460 extern VP8CPUInfo SharpYuvGetCPUInfo;
461 SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func);
SharpYuvInit(VP8CPUInfo cpu_info_func)462 void SharpYuvInit(VP8CPUInfo cpu_info_func) {
463   static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
464       (VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
465   LOCK_ACCESS;
466   // Only update SharpYuvGetCPUInfo when called from external code to avoid a
467   // race on reading the value in SharpYuvConvert().
468   if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) {
469     SharpYuvGetCPUInfo = cpu_info_func;
470   }
471   if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) {
472     UNLOCK_ACCESS_AND_RETURN;
473   }
474 
475   SharpYuvInitDsp();
476   SharpYuvInitGammaTables();
477 
478   sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo;
479   UNLOCK_ACCESS_AND_RETURN;
480 }
481 
SharpYuvConvert(const void * r_ptr,const void * g_ptr,const void * b_ptr,int rgb_step,int rgb_stride,int rgb_bit_depth,void * y_ptr,int y_stride,void * u_ptr,int u_stride,void * v_ptr,int v_stride,int yuv_bit_depth,int width,int height,const SharpYuvConversionMatrix * yuv_matrix)482 int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
483                     int rgb_step, int rgb_stride, int rgb_bit_depth,
484                     void* y_ptr, int y_stride, void* u_ptr, int u_stride,
485                     void* v_ptr, int v_stride, int yuv_bit_depth, int width,
486                     int height, const SharpYuvConversionMatrix* yuv_matrix) {
487   SharpYuvOptions options;
488   options.yuv_matrix = yuv_matrix;
489   options.transfer_type = kSharpYuvTransferFunctionSrgb;
490   return SharpYuvConvertWithOptions(
491       r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride,
492       u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options);
493 }
494 
SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix * yuv_matrix,SharpYuvOptions * options,int version)495 int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix,
496                                 SharpYuvOptions* options, int version) {
497   const int major = (version >> 24);
498   const int minor = (version >> 16) & 0xff;
499   if (options == NULL || yuv_matrix == NULL ||
500       (major == SHARPYUV_VERSION_MAJOR && major == 0 &&
501        minor != SHARPYUV_VERSION_MINOR) ||
502       (major != SHARPYUV_VERSION_MAJOR)) {
503     return 0;
504   }
505   options->yuv_matrix = yuv_matrix;
506   options->transfer_type = kSharpYuvTransferFunctionSrgb;
507   return 1;
508 }
509 
SharpYuvConvertWithOptions(const void * r_ptr,const void * g_ptr,const void * b_ptr,int rgb_step,int rgb_stride,int rgb_bit_depth,void * y_ptr,int y_stride,void * u_ptr,int u_stride,void * v_ptr,int v_stride,int yuv_bit_depth,int width,int height,const SharpYuvOptions * options)510 int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
511                                const void* b_ptr, int rgb_step, int rgb_stride,
512                                int rgb_bit_depth, void* y_ptr, int y_stride,
513                                void* u_ptr, int u_stride, void* v_ptr,
514                                int v_stride, int yuv_bit_depth, int width,
515                                int height, const SharpYuvOptions* options) {
516   const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix;
517   SharpYuvTransferFunctionType transfer_type = options->transfer_type;
518   SharpYuvConversionMatrix scaled_matrix;
519   const int rgb_max = (1 << rgb_bit_depth) - 1;
520   const int rgb_round = 1 << (rgb_bit_depth - 1);
521   const int yuv_max = (1 << yuv_bit_depth) - 1;
522   const int sfix = GetPrecisionShift(rgb_bit_depth);
523 
524   if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
525       r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
526       u_ptr == NULL || v_ptr == NULL) {
527     return 0;
528   }
529   if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
530       rgb_bit_depth != 16) {
531     return 0;
532   }
533   if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
534     return 0;
535   }
536   if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride % 2 != 0)) {
537     // Step/stride should be even for uint16_t buffers.
538     return 0;
539   }
540   if (yuv_bit_depth > 8 &&
541       (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
542     // Stride should be even for uint16_t buffers.
543     return 0;
544   }
545   // The address of the function pointer is used to avoid a read race.
546   SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo);
547 
548   // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
549   // rgb->yuv conversion matrix.
550   if (rgb_bit_depth == yuv_bit_depth) {
551     memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
552   } else {
553     int i;
554     for (i = 0; i < 3; ++i) {
555       scaled_matrix.rgb_to_y[i] =
556           (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
557       scaled_matrix.rgb_to_u[i] =
558           (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
559       scaled_matrix.rgb_to_v[i] =
560           (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
561     }
562   }
563   // Also incorporate precision change scaling.
564   scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
565   scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
566   scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
567 
568   return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
569                           rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
570                           v_ptr, v_stride, yuv_bit_depth, width, height,
571                           &scaled_matrix, transfer_type);
572 }
573 
574 //------------------------------------------------------------------------------
575