xref: /aosp_15_r20/external/webp/src/dsp/lossless.c (revision b2055c353e87c8814eb2b6b1b11112a1562253bd)
1*b2055c35SXin Li // Copyright 2012 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // Image transforms and color space conversion methods for lossless decoder.
11*b2055c35SXin Li //
12*b2055c35SXin Li // Authors: Vikas Arora ([email protected])
13*b2055c35SXin Li //          Jyrki Alakuijala ([email protected])
14*b2055c35SXin Li //          Urvang Joshi ([email protected])
15*b2055c35SXin Li 
16*b2055c35SXin Li #include "src/dsp/dsp.h"
17*b2055c35SXin Li 
18*b2055c35SXin Li #include <assert.h>
19*b2055c35SXin Li #include <math.h>
20*b2055c35SXin Li #include <stdlib.h>
21*b2055c35SXin Li #include "src/dec/vp8li_dec.h"
22*b2055c35SXin Li #include "src/utils/endian_inl_utils.h"
23*b2055c35SXin Li #include "src/dsp/lossless.h"
24*b2055c35SXin Li #include "src/dsp/lossless_common.h"
25*b2055c35SXin Li 
26*b2055c35SXin Li //------------------------------------------------------------------------------
27*b2055c35SXin Li // Image transforms.
28*b2055c35SXin Li 
Average2(uint32_t a0,uint32_t a1)29*b2055c35SXin Li static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
30*b2055c35SXin Li   return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
31*b2055c35SXin Li }
32*b2055c35SXin Li 
Average3(uint32_t a0,uint32_t a1,uint32_t a2)33*b2055c35SXin Li static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
34*b2055c35SXin Li   return Average2(Average2(a0, a2), a1);
35*b2055c35SXin Li }
36*b2055c35SXin Li 
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)37*b2055c35SXin Li static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
38*b2055c35SXin Li                                      uint32_t a2, uint32_t a3) {
39*b2055c35SXin Li   return Average2(Average2(a0, a1), Average2(a2, a3));
40*b2055c35SXin Li }
41*b2055c35SXin Li 
Clip255(uint32_t a)42*b2055c35SXin Li static WEBP_INLINE uint32_t Clip255(uint32_t a) {
43*b2055c35SXin Li   if (a < 256) {
44*b2055c35SXin Li     return a;
45*b2055c35SXin Li   }
46*b2055c35SXin Li   // return 0, when a is a negative integer.
47*b2055c35SXin Li   // return 255, when a is positive.
48*b2055c35SXin Li   return ~a >> 24;
49*b2055c35SXin Li }
50*b2055c35SXin Li 
AddSubtractComponentFull(int a,int b,int c)51*b2055c35SXin Li static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
52*b2055c35SXin Li   return Clip255((uint32_t)(a + b - c));
53*b2055c35SXin Li }
54*b2055c35SXin Li 
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)55*b2055c35SXin Li static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
56*b2055c35SXin Li                                                    uint32_t c2) {
57*b2055c35SXin Li   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
58*b2055c35SXin Li   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
59*b2055c35SXin Li                                          (c1 >> 16) & 0xff,
60*b2055c35SXin Li                                          (c2 >> 16) & 0xff);
61*b2055c35SXin Li   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
62*b2055c35SXin Li                                          (c1 >> 8) & 0xff,
63*b2055c35SXin Li                                          (c2 >> 8) & 0xff);
64*b2055c35SXin Li   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
65*b2055c35SXin Li   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
66*b2055c35SXin Li }
67*b2055c35SXin Li 
AddSubtractComponentHalf(int a,int b)68*b2055c35SXin Li static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
69*b2055c35SXin Li   return Clip255((uint32_t)(a + (a - b) / 2));
70*b2055c35SXin Li }
71*b2055c35SXin Li 
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)72*b2055c35SXin Li static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
73*b2055c35SXin Li                                                    uint32_t c2) {
74*b2055c35SXin Li   const uint32_t ave = Average2(c0, c1);
75*b2055c35SXin Li   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
76*b2055c35SXin Li   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
77*b2055c35SXin Li   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
78*b2055c35SXin Li   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
79*b2055c35SXin Li   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
80*b2055c35SXin Li }
81*b2055c35SXin Li 
82*b2055c35SXin Li // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
83*b2055c35SXin Li // inlined.
84*b2055c35SXin Li #if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409
85*b2055c35SXin Li # define LOCAL_INLINE __attribute__ ((noinline))
86*b2055c35SXin Li #else
87*b2055c35SXin Li # define LOCAL_INLINE WEBP_INLINE
88*b2055c35SXin Li #endif
89*b2055c35SXin Li 
Sub3(int a,int b,int c)90*b2055c35SXin Li static LOCAL_INLINE int Sub3(int a, int b, int c) {
91*b2055c35SXin Li   const int pb = b - c;
92*b2055c35SXin Li   const int pa = a - c;
93*b2055c35SXin Li   return abs(pb) - abs(pa);
94*b2055c35SXin Li }
95*b2055c35SXin Li 
96*b2055c35SXin Li #undef LOCAL_INLINE
97*b2055c35SXin Li 
Select(uint32_t a,uint32_t b,uint32_t c)98*b2055c35SXin Li static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
99*b2055c35SXin Li   const int pa_minus_pb =
100*b2055c35SXin Li       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
101*b2055c35SXin Li       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
102*b2055c35SXin Li       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
103*b2055c35SXin Li       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
104*b2055c35SXin Li   return (pa_minus_pb <= 0) ? a : b;
105*b2055c35SXin Li }
106*b2055c35SXin Li 
107*b2055c35SXin Li //------------------------------------------------------------------------------
108*b2055c35SXin Li // Predictors
109*b2055c35SXin Li 
VP8LPredictor0_C(const uint32_t * const left,const uint32_t * const top)110*b2055c35SXin Li uint32_t VP8LPredictor0_C(const uint32_t* const left,
111*b2055c35SXin Li                           const uint32_t* const top) {
112*b2055c35SXin Li   (void)top;
113*b2055c35SXin Li   (void)left;
114*b2055c35SXin Li   return ARGB_BLACK;
115*b2055c35SXin Li }
VP8LPredictor1_C(const uint32_t * const left,const uint32_t * const top)116*b2055c35SXin Li uint32_t VP8LPredictor1_C(const uint32_t* const left,
117*b2055c35SXin Li                           const uint32_t* const top) {
118*b2055c35SXin Li   (void)top;
119*b2055c35SXin Li   return *left;
120*b2055c35SXin Li }
VP8LPredictor2_C(const uint32_t * const left,const uint32_t * const top)121*b2055c35SXin Li uint32_t VP8LPredictor2_C(const uint32_t* const left,
122*b2055c35SXin Li                           const uint32_t* const top) {
123*b2055c35SXin Li   (void)left;
124*b2055c35SXin Li   return top[0];
125*b2055c35SXin Li }
VP8LPredictor3_C(const uint32_t * const left,const uint32_t * const top)126*b2055c35SXin Li uint32_t VP8LPredictor3_C(const uint32_t* const left,
127*b2055c35SXin Li                           const uint32_t* const top) {
128*b2055c35SXin Li   (void)left;
129*b2055c35SXin Li   return top[1];
130*b2055c35SXin Li }
VP8LPredictor4_C(const uint32_t * const left,const uint32_t * const top)131*b2055c35SXin Li uint32_t VP8LPredictor4_C(const uint32_t* const left,
132*b2055c35SXin Li                           const uint32_t* const top) {
133*b2055c35SXin Li   (void)left;
134*b2055c35SXin Li   return top[-1];
135*b2055c35SXin Li }
VP8LPredictor5_C(const uint32_t * const left,const uint32_t * const top)136*b2055c35SXin Li uint32_t VP8LPredictor5_C(const uint32_t* const left,
137*b2055c35SXin Li                           const uint32_t* const top) {
138*b2055c35SXin Li   const uint32_t pred = Average3(*left, top[0], top[1]);
139*b2055c35SXin Li   return pred;
140*b2055c35SXin Li }
VP8LPredictor6_C(const uint32_t * const left,const uint32_t * const top)141*b2055c35SXin Li uint32_t VP8LPredictor6_C(const uint32_t* const left,
142*b2055c35SXin Li                           const uint32_t* const top) {
143*b2055c35SXin Li   const uint32_t pred = Average2(*left, top[-1]);
144*b2055c35SXin Li   return pred;
145*b2055c35SXin Li }
VP8LPredictor7_C(const uint32_t * const left,const uint32_t * const top)146*b2055c35SXin Li uint32_t VP8LPredictor7_C(const uint32_t* const left,
147*b2055c35SXin Li                           const uint32_t* const top) {
148*b2055c35SXin Li   const uint32_t pred = Average2(*left, top[0]);
149*b2055c35SXin Li   return pred;
150*b2055c35SXin Li }
VP8LPredictor8_C(const uint32_t * const left,const uint32_t * const top)151*b2055c35SXin Li uint32_t VP8LPredictor8_C(const uint32_t* const left,
152*b2055c35SXin Li                           const uint32_t* const top) {
153*b2055c35SXin Li   const uint32_t pred = Average2(top[-1], top[0]);
154*b2055c35SXin Li   (void)left;
155*b2055c35SXin Li   return pred;
156*b2055c35SXin Li }
VP8LPredictor9_C(const uint32_t * const left,const uint32_t * const top)157*b2055c35SXin Li uint32_t VP8LPredictor9_C(const uint32_t* const left,
158*b2055c35SXin Li                           const uint32_t* const top) {
159*b2055c35SXin Li   const uint32_t pred = Average2(top[0], top[1]);
160*b2055c35SXin Li   (void)left;
161*b2055c35SXin Li   return pred;
162*b2055c35SXin Li }
VP8LPredictor10_C(const uint32_t * const left,const uint32_t * const top)163*b2055c35SXin Li uint32_t VP8LPredictor10_C(const uint32_t* const left,
164*b2055c35SXin Li                            const uint32_t* const top) {
165*b2055c35SXin Li   const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
166*b2055c35SXin Li   return pred;
167*b2055c35SXin Li }
VP8LPredictor11_C(const uint32_t * const left,const uint32_t * const top)168*b2055c35SXin Li uint32_t VP8LPredictor11_C(const uint32_t* const left,
169*b2055c35SXin Li                            const uint32_t* const top) {
170*b2055c35SXin Li   const uint32_t pred = Select(top[0], *left, top[-1]);
171*b2055c35SXin Li   return pred;
172*b2055c35SXin Li }
VP8LPredictor12_C(const uint32_t * const left,const uint32_t * const top)173*b2055c35SXin Li uint32_t VP8LPredictor12_C(const uint32_t* const left,
174*b2055c35SXin Li                            const uint32_t* const top) {
175*b2055c35SXin Li   const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
176*b2055c35SXin Li   return pred;
177*b2055c35SXin Li }
VP8LPredictor13_C(const uint32_t * const left,const uint32_t * const top)178*b2055c35SXin Li uint32_t VP8LPredictor13_C(const uint32_t* const left,
179*b2055c35SXin Li                            const uint32_t* const top) {
180*b2055c35SXin Li   const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
181*b2055c35SXin Li   return pred;
182*b2055c35SXin Li }
183*b2055c35SXin Li 
PredictorAdd0_C(const uint32_t * in,const uint32_t * upper,int num_pixels,uint32_t * out)184*b2055c35SXin Li static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
185*b2055c35SXin Li                             int num_pixels, uint32_t* out) {
186*b2055c35SXin Li   int x;
187*b2055c35SXin Li   (void)upper;
188*b2055c35SXin Li   for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
189*b2055c35SXin Li }
PredictorAdd1_C(const uint32_t * in,const uint32_t * upper,int num_pixels,uint32_t * out)190*b2055c35SXin Li static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
191*b2055c35SXin Li                             int num_pixels, uint32_t* out) {
192*b2055c35SXin Li   int i;
193*b2055c35SXin Li   uint32_t left = out[-1];
194*b2055c35SXin Li   (void)upper;
195*b2055c35SXin Li   for (i = 0; i < num_pixels; ++i) {
196*b2055c35SXin Li     out[i] = left = VP8LAddPixels(in[i], left);
197*b2055c35SXin Li   }
198*b2055c35SXin Li }
GENERATE_PREDICTOR_ADD(VP8LPredictor2_C,PredictorAdd2_C)199*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor2_C, PredictorAdd2_C)
200*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor3_C, PredictorAdd3_C)
201*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor4_C, PredictorAdd4_C)
202*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor5_C, PredictorAdd5_C)
203*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor6_C, PredictorAdd6_C)
204*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor7_C, PredictorAdd7_C)
205*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor8_C, PredictorAdd8_C)
206*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor9_C, PredictorAdd9_C)
207*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor10_C, PredictorAdd10_C)
208*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor11_C, PredictorAdd11_C)
209*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor12_C, PredictorAdd12_C)
210*b2055c35SXin Li GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C)
211*b2055c35SXin Li 
212*b2055c35SXin Li //------------------------------------------------------------------------------
213*b2055c35SXin Li 
214*b2055c35SXin Li // Inverse prediction.
215*b2055c35SXin Li static void PredictorInverseTransform_C(const VP8LTransform* const transform,
216*b2055c35SXin Li                                         int y_start, int y_end,
217*b2055c35SXin Li                                         const uint32_t* in, uint32_t* out) {
218*b2055c35SXin Li   const int width = transform->xsize_;
219*b2055c35SXin Li   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
220*b2055c35SXin Li     PredictorAdd0_C(in, NULL, 1, out);
221*b2055c35SXin Li     PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
222*b2055c35SXin Li     in += width;
223*b2055c35SXin Li     out += width;
224*b2055c35SXin Li     ++y_start;
225*b2055c35SXin Li   }
226*b2055c35SXin Li 
227*b2055c35SXin Li   {
228*b2055c35SXin Li     int y = y_start;
229*b2055c35SXin Li     const int tile_width = 1 << transform->bits_;
230*b2055c35SXin Li     const int mask = tile_width - 1;
231*b2055c35SXin Li     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
232*b2055c35SXin Li     const uint32_t* pred_mode_base =
233*b2055c35SXin Li         transform->data_ + (y >> transform->bits_) * tiles_per_row;
234*b2055c35SXin Li 
235*b2055c35SXin Li     while (y < y_end) {
236*b2055c35SXin Li       const uint32_t* pred_mode_src = pred_mode_base;
237*b2055c35SXin Li       int x = 1;
238*b2055c35SXin Li       // First pixel follows the T (mode=2) mode.
239*b2055c35SXin Li       PredictorAdd2_C(in, out - width, 1, out);
240*b2055c35SXin Li       // .. the rest:
241*b2055c35SXin Li       while (x < width) {
242*b2055c35SXin Li         const VP8LPredictorAddSubFunc pred_func =
243*b2055c35SXin Li             VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
244*b2055c35SXin Li         int x_end = (x & ~mask) + tile_width;
245*b2055c35SXin Li         if (x_end > width) x_end = width;
246*b2055c35SXin Li         pred_func(in + x, out + x - width, x_end - x, out + x);
247*b2055c35SXin Li         x = x_end;
248*b2055c35SXin Li       }
249*b2055c35SXin Li       in += width;
250*b2055c35SXin Li       out += width;
251*b2055c35SXin Li       ++y;
252*b2055c35SXin Li       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
253*b2055c35SXin Li         pred_mode_base += tiles_per_row;
254*b2055c35SXin Li       }
255*b2055c35SXin Li     }
256*b2055c35SXin Li   }
257*b2055c35SXin Li }
258*b2055c35SXin Li 
259*b2055c35SXin Li // Add green to blue and red channels (i.e. perform the inverse transform of
260*b2055c35SXin Li // 'subtract green').
VP8LAddGreenToBlueAndRed_C(const uint32_t * src,int num_pixels,uint32_t * dst)261*b2055c35SXin Li void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
262*b2055c35SXin Li                                 uint32_t* dst) {
263*b2055c35SXin Li   int i;
264*b2055c35SXin Li   for (i = 0; i < num_pixels; ++i) {
265*b2055c35SXin Li     const uint32_t argb = src[i];
266*b2055c35SXin Li     const uint32_t green = ((argb >> 8) & 0xff);
267*b2055c35SXin Li     uint32_t red_blue = (argb & 0x00ff00ffu);
268*b2055c35SXin Li     red_blue += (green << 16) | green;
269*b2055c35SXin Li     red_blue &= 0x00ff00ffu;
270*b2055c35SXin Li     dst[i] = (argb & 0xff00ff00u) | red_blue;
271*b2055c35SXin Li   }
272*b2055c35SXin Li }
273*b2055c35SXin Li 
ColorTransformDelta(int8_t color_pred,int8_t color)274*b2055c35SXin Li static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
275*b2055c35SXin Li                                            int8_t color) {
276*b2055c35SXin Li   return ((int)color_pred * color) >> 5;
277*b2055c35SXin Li }
278*b2055c35SXin Li 
ColorCodeToMultipliers(uint32_t color_code,VP8LMultipliers * const m)279*b2055c35SXin Li static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
280*b2055c35SXin Li                                                VP8LMultipliers* const m) {
281*b2055c35SXin Li   m->green_to_red_  = (color_code >>  0) & 0xff;
282*b2055c35SXin Li   m->green_to_blue_ = (color_code >>  8) & 0xff;
283*b2055c35SXin Li   m->red_to_blue_   = (color_code >> 16) & 0xff;
284*b2055c35SXin Li }
285*b2055c35SXin Li 
VP8LTransformColorInverse_C(const VP8LMultipliers * const m,const uint32_t * src,int num_pixels,uint32_t * dst)286*b2055c35SXin Li void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
287*b2055c35SXin Li                                  const uint32_t* src, int num_pixels,
288*b2055c35SXin Li                                  uint32_t* dst) {
289*b2055c35SXin Li   int i;
290*b2055c35SXin Li   for (i = 0; i < num_pixels; ++i) {
291*b2055c35SXin Li     const uint32_t argb = src[i];
292*b2055c35SXin Li     const int8_t green = (int8_t)(argb >> 8);
293*b2055c35SXin Li     const uint32_t red = argb >> 16;
294*b2055c35SXin Li     int new_red = red & 0xff;
295*b2055c35SXin Li     int new_blue = argb & 0xff;
296*b2055c35SXin Li     new_red += ColorTransformDelta((int8_t)m->green_to_red_, green);
297*b2055c35SXin Li     new_red &= 0xff;
298*b2055c35SXin Li     new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green);
299*b2055c35SXin Li     new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red);
300*b2055c35SXin Li     new_blue &= 0xff;
301*b2055c35SXin Li     dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
302*b2055c35SXin Li   }
303*b2055c35SXin Li }
304*b2055c35SXin Li 
305*b2055c35SXin Li // Color space inverse transform.
ColorSpaceInverseTransform_C(const VP8LTransform * const transform,int y_start,int y_end,const uint32_t * src,uint32_t * dst)306*b2055c35SXin Li static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
307*b2055c35SXin Li                                          int y_start, int y_end,
308*b2055c35SXin Li                                          const uint32_t* src, uint32_t* dst) {
309*b2055c35SXin Li   const int width = transform->xsize_;
310*b2055c35SXin Li   const int tile_width = 1 << transform->bits_;
311*b2055c35SXin Li   const int mask = tile_width - 1;
312*b2055c35SXin Li   const int safe_width = width & ~mask;
313*b2055c35SXin Li   const int remaining_width = width - safe_width;
314*b2055c35SXin Li   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
315*b2055c35SXin Li   int y = y_start;
316*b2055c35SXin Li   const uint32_t* pred_row =
317*b2055c35SXin Li       transform->data_ + (y >> transform->bits_) * tiles_per_row;
318*b2055c35SXin Li 
319*b2055c35SXin Li   while (y < y_end) {
320*b2055c35SXin Li     const uint32_t* pred = pred_row;
321*b2055c35SXin Li     VP8LMultipliers m = { 0, 0, 0 };
322*b2055c35SXin Li     const uint32_t* const src_safe_end = src + safe_width;
323*b2055c35SXin Li     const uint32_t* const src_end = src + width;
324*b2055c35SXin Li     while (src < src_safe_end) {
325*b2055c35SXin Li       ColorCodeToMultipliers(*pred++, &m);
326*b2055c35SXin Li       VP8LTransformColorInverse(&m, src, tile_width, dst);
327*b2055c35SXin Li       src += tile_width;
328*b2055c35SXin Li       dst += tile_width;
329*b2055c35SXin Li     }
330*b2055c35SXin Li     if (src < src_end) {  // Left-overs using C-version.
331*b2055c35SXin Li       ColorCodeToMultipliers(*pred++, &m);
332*b2055c35SXin Li       VP8LTransformColorInverse(&m, src, remaining_width, dst);
333*b2055c35SXin Li       src += remaining_width;
334*b2055c35SXin Li       dst += remaining_width;
335*b2055c35SXin Li     }
336*b2055c35SXin Li     ++y;
337*b2055c35SXin Li     if ((y & mask) == 0) pred_row += tiles_per_row;
338*b2055c35SXin Li   }
339*b2055c35SXin Li }
340*b2055c35SXin Li 
341*b2055c35SXin Li // Separate out pixels packed together using pixel-bundling.
342*b2055c35SXin Li // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
343*b2055c35SXin Li #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
344*b2055c35SXin Li                             GET_INDEX, GET_VALUE)                              \
345*b2055c35SXin Li static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
346*b2055c35SXin Li                    TYPE* dst, int y_start, int y_end, int width) {             \
347*b2055c35SXin Li   int y;                                                                       \
348*b2055c35SXin Li   for (y = y_start; y < y_end; ++y) {                                          \
349*b2055c35SXin Li     int x;                                                                     \
350*b2055c35SXin Li     for (x = 0; x < width; ++x) {                                              \
351*b2055c35SXin Li       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
352*b2055c35SXin Li     }                                                                          \
353*b2055c35SXin Li   }                                                                            \
354*b2055c35SXin Li }                                                                              \
355*b2055c35SXin Li STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
356*b2055c35SXin Li                            int y_start, int y_end, const TYPE* src,            \
357*b2055c35SXin Li                            TYPE* dst) {                                        \
358*b2055c35SXin Li   int y;                                                                       \
359*b2055c35SXin Li   const int bits_per_pixel = 8 >> transform->bits_;                            \
360*b2055c35SXin Li   const int width = transform->xsize_;                                         \
361*b2055c35SXin Li   const uint32_t* const color_map = transform->data_;                          \
362*b2055c35SXin Li   if (bits_per_pixel < 8) {                                                    \
363*b2055c35SXin Li     const int pixels_per_byte = 1 << transform->bits_;                         \
364*b2055c35SXin Li     const int count_mask = pixels_per_byte - 1;                                \
365*b2055c35SXin Li     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
366*b2055c35SXin Li     for (y = y_start; y < y_end; ++y) {                                        \
367*b2055c35SXin Li       uint32_t packed_pixels = 0;                                              \
368*b2055c35SXin Li       int x;                                                                   \
369*b2055c35SXin Li       for (x = 0; x < width; ++x) {                                            \
370*b2055c35SXin Li         /* We need to load fresh 'packed_pixels' once every                */  \
371*b2055c35SXin Li         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
372*b2055c35SXin Li         /* is a power of 2, so can just use a mask for that, instead of    */  \
373*b2055c35SXin Li         /* decrementing a counter.                                         */  \
374*b2055c35SXin Li         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
375*b2055c35SXin Li         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
376*b2055c35SXin Li         packed_pixels >>= bits_per_pixel;                                      \
377*b2055c35SXin Li       }                                                                        \
378*b2055c35SXin Li     }                                                                          \
379*b2055c35SXin Li   } else {                                                                     \
380*b2055c35SXin Li     VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
381*b2055c35SXin Li   }                                                                            \
382*b2055c35SXin Li }
383*b2055c35SXin Li 
384*b2055c35SXin Li COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
385*b2055c35SXin Li                     uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
386*b2055c35SXin Li COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
387*b2055c35SXin Li                     uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
388*b2055c35SXin Li 
389*b2055c35SXin Li #undef COLOR_INDEX_INVERSE
390*b2055c35SXin Li 
VP8LInverseTransform(const VP8LTransform * const transform,int row_start,int row_end,const uint32_t * const in,uint32_t * const out)391*b2055c35SXin Li void VP8LInverseTransform(const VP8LTransform* const transform,
392*b2055c35SXin Li                           int row_start, int row_end,
393*b2055c35SXin Li                           const uint32_t* const in, uint32_t* const out) {
394*b2055c35SXin Li   const int width = transform->xsize_;
395*b2055c35SXin Li   assert(row_start < row_end);
396*b2055c35SXin Li   assert(row_end <= transform->ysize_);
397*b2055c35SXin Li   switch (transform->type_) {
398*b2055c35SXin Li     case SUBTRACT_GREEN_TRANSFORM:
399*b2055c35SXin Li       VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
400*b2055c35SXin Li       break;
401*b2055c35SXin Li     case PREDICTOR_TRANSFORM:
402*b2055c35SXin Li       PredictorInverseTransform_C(transform, row_start, row_end, in, out);
403*b2055c35SXin Li       if (row_end != transform->ysize_) {
404*b2055c35SXin Li         // The last predicted row in this iteration will be the top-pred row
405*b2055c35SXin Li         // for the first row in next iteration.
406*b2055c35SXin Li         memcpy(out - width, out + (row_end - row_start - 1) * width,
407*b2055c35SXin Li                width * sizeof(*out));
408*b2055c35SXin Li       }
409*b2055c35SXin Li       break;
410*b2055c35SXin Li     case CROSS_COLOR_TRANSFORM:
411*b2055c35SXin Li       ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
412*b2055c35SXin Li       break;
413*b2055c35SXin Li     case COLOR_INDEXING_TRANSFORM:
414*b2055c35SXin Li       if (in == out && transform->bits_ > 0) {
415*b2055c35SXin Li         // Move packed pixels to the end of unpacked region, so that unpacking
416*b2055c35SXin Li         // can occur seamlessly.
417*b2055c35SXin Li         // Also, note that this is the only transform that applies on
418*b2055c35SXin Li         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
419*b2055c35SXin Li         // transforms work on effective width of xsize_.
420*b2055c35SXin Li         const int out_stride = (row_end - row_start) * width;
421*b2055c35SXin Li         const int in_stride = (row_end - row_start) *
422*b2055c35SXin Li             VP8LSubSampleSize(transform->xsize_, transform->bits_);
423*b2055c35SXin Li         uint32_t* const src = out + out_stride - in_stride;
424*b2055c35SXin Li         memmove(src, out, in_stride * sizeof(*src));
425*b2055c35SXin Li         ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
426*b2055c35SXin Li       } else {
427*b2055c35SXin Li         ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
428*b2055c35SXin Li       }
429*b2055c35SXin Li       break;
430*b2055c35SXin Li   }
431*b2055c35SXin Li }
432*b2055c35SXin Li 
433*b2055c35SXin Li //------------------------------------------------------------------------------
434*b2055c35SXin Li // Color space conversion.
435*b2055c35SXin Li 
is_big_endian(void)436*b2055c35SXin Li static int is_big_endian(void) {
437*b2055c35SXin Li   static const union {
438*b2055c35SXin Li     uint16_t w;
439*b2055c35SXin Li     uint8_t b[2];
440*b2055c35SXin Li   } tmp = { 1 };
441*b2055c35SXin Li   return (tmp.b[0] != 1);
442*b2055c35SXin Li }
443*b2055c35SXin Li 
VP8LConvertBGRAToRGB_C(const uint32_t * src,int num_pixels,uint8_t * dst)444*b2055c35SXin Li void VP8LConvertBGRAToRGB_C(const uint32_t* src,
445*b2055c35SXin Li                             int num_pixels, uint8_t* dst) {
446*b2055c35SXin Li   const uint32_t* const src_end = src + num_pixels;
447*b2055c35SXin Li   while (src < src_end) {
448*b2055c35SXin Li     const uint32_t argb = *src++;
449*b2055c35SXin Li     *dst++ = (argb >> 16) & 0xff;
450*b2055c35SXin Li     *dst++ = (argb >>  8) & 0xff;
451*b2055c35SXin Li     *dst++ = (argb >>  0) & 0xff;
452*b2055c35SXin Li   }
453*b2055c35SXin Li }
454*b2055c35SXin Li 
VP8LConvertBGRAToRGBA_C(const uint32_t * src,int num_pixels,uint8_t * dst)455*b2055c35SXin Li void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
456*b2055c35SXin Li                              int num_pixels, uint8_t* dst) {
457*b2055c35SXin Li   const uint32_t* const src_end = src + num_pixels;
458*b2055c35SXin Li   while (src < src_end) {
459*b2055c35SXin Li     const uint32_t argb = *src++;
460*b2055c35SXin Li     *dst++ = (argb >> 16) & 0xff;
461*b2055c35SXin Li     *dst++ = (argb >>  8) & 0xff;
462*b2055c35SXin Li     *dst++ = (argb >>  0) & 0xff;
463*b2055c35SXin Li     *dst++ = (argb >> 24) & 0xff;
464*b2055c35SXin Li   }
465*b2055c35SXin Li }
466*b2055c35SXin Li 
VP8LConvertBGRAToRGBA4444_C(const uint32_t * src,int num_pixels,uint8_t * dst)467*b2055c35SXin Li void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
468*b2055c35SXin Li                                  int num_pixels, uint8_t* dst) {
469*b2055c35SXin Li   const uint32_t* const src_end = src + num_pixels;
470*b2055c35SXin Li   while (src < src_end) {
471*b2055c35SXin Li     const uint32_t argb = *src++;
472*b2055c35SXin Li     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
473*b2055c35SXin Li     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
474*b2055c35SXin Li #if (WEBP_SWAP_16BIT_CSP == 1)
475*b2055c35SXin Li     *dst++ = ba;
476*b2055c35SXin Li     *dst++ = rg;
477*b2055c35SXin Li #else
478*b2055c35SXin Li     *dst++ = rg;
479*b2055c35SXin Li     *dst++ = ba;
480*b2055c35SXin Li #endif
481*b2055c35SXin Li   }
482*b2055c35SXin Li }
483*b2055c35SXin Li 
VP8LConvertBGRAToRGB565_C(const uint32_t * src,int num_pixels,uint8_t * dst)484*b2055c35SXin Li void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
485*b2055c35SXin Li                                int num_pixels, uint8_t* dst) {
486*b2055c35SXin Li   const uint32_t* const src_end = src + num_pixels;
487*b2055c35SXin Li   while (src < src_end) {
488*b2055c35SXin Li     const uint32_t argb = *src++;
489*b2055c35SXin Li     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
490*b2055c35SXin Li     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
491*b2055c35SXin Li #if (WEBP_SWAP_16BIT_CSP == 1)
492*b2055c35SXin Li     *dst++ = gb;
493*b2055c35SXin Li     *dst++ = rg;
494*b2055c35SXin Li #else
495*b2055c35SXin Li     *dst++ = rg;
496*b2055c35SXin Li     *dst++ = gb;
497*b2055c35SXin Li #endif
498*b2055c35SXin Li   }
499*b2055c35SXin Li }
500*b2055c35SXin Li 
VP8LConvertBGRAToBGR_C(const uint32_t * src,int num_pixels,uint8_t * dst)501*b2055c35SXin Li void VP8LConvertBGRAToBGR_C(const uint32_t* src,
502*b2055c35SXin Li                             int num_pixels, uint8_t* dst) {
503*b2055c35SXin Li   const uint32_t* const src_end = src + num_pixels;
504*b2055c35SXin Li   while (src < src_end) {
505*b2055c35SXin Li     const uint32_t argb = *src++;
506*b2055c35SXin Li     *dst++ = (argb >>  0) & 0xff;
507*b2055c35SXin Li     *dst++ = (argb >>  8) & 0xff;
508*b2055c35SXin Li     *dst++ = (argb >> 16) & 0xff;
509*b2055c35SXin Li   }
510*b2055c35SXin Li }
511*b2055c35SXin Li 
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)512*b2055c35SXin Li static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
513*b2055c35SXin Li                        int swap_on_big_endian) {
514*b2055c35SXin Li   if (is_big_endian() == swap_on_big_endian) {
515*b2055c35SXin Li     const uint32_t* const src_end = src + num_pixels;
516*b2055c35SXin Li     while (src < src_end) {
517*b2055c35SXin Li       const uint32_t argb = *src++;
518*b2055c35SXin Li       WebPUint32ToMem(dst, BSwap32(argb));
519*b2055c35SXin Li       dst += sizeof(argb);
520*b2055c35SXin Li     }
521*b2055c35SXin Li   } else {
522*b2055c35SXin Li     memcpy(dst, src, num_pixels * sizeof(*src));
523*b2055c35SXin Li   }
524*b2055c35SXin Li }
525*b2055c35SXin Li 
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)526*b2055c35SXin Li void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
527*b2055c35SXin Li                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
528*b2055c35SXin Li   switch (out_colorspace) {
529*b2055c35SXin Li     case MODE_RGB:
530*b2055c35SXin Li       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
531*b2055c35SXin Li       break;
532*b2055c35SXin Li     case MODE_RGBA:
533*b2055c35SXin Li       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
534*b2055c35SXin Li       break;
535*b2055c35SXin Li     case MODE_rgbA:
536*b2055c35SXin Li       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
537*b2055c35SXin Li       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
538*b2055c35SXin Li       break;
539*b2055c35SXin Li     case MODE_BGR:
540*b2055c35SXin Li       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
541*b2055c35SXin Li       break;
542*b2055c35SXin Li     case MODE_BGRA:
543*b2055c35SXin Li       CopyOrSwap(in_data, num_pixels, rgba, 1);
544*b2055c35SXin Li       break;
545*b2055c35SXin Li     case MODE_bgrA:
546*b2055c35SXin Li       CopyOrSwap(in_data, num_pixels, rgba, 1);
547*b2055c35SXin Li       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
548*b2055c35SXin Li       break;
549*b2055c35SXin Li     case MODE_ARGB:
550*b2055c35SXin Li       CopyOrSwap(in_data, num_pixels, rgba, 0);
551*b2055c35SXin Li       break;
552*b2055c35SXin Li     case MODE_Argb:
553*b2055c35SXin Li       CopyOrSwap(in_data, num_pixels, rgba, 0);
554*b2055c35SXin Li       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
555*b2055c35SXin Li       break;
556*b2055c35SXin Li     case MODE_RGBA_4444:
557*b2055c35SXin Li       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
558*b2055c35SXin Li       break;
559*b2055c35SXin Li     case MODE_rgbA_4444:
560*b2055c35SXin Li       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
561*b2055c35SXin Li       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
562*b2055c35SXin Li       break;
563*b2055c35SXin Li     case MODE_RGB_565:
564*b2055c35SXin Li       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
565*b2055c35SXin Li       break;
566*b2055c35SXin Li     default:
567*b2055c35SXin Li       assert(0);          // Code flow should not reach here.
568*b2055c35SXin Li   }
569*b2055c35SXin Li }
570*b2055c35SXin Li 
571*b2055c35SXin Li //------------------------------------------------------------------------------
572*b2055c35SXin Li 
573*b2055c35SXin Li VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
574*b2055c35SXin Li VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
575*b2055c35SXin Li VP8LPredictorFunc VP8LPredictors[16];
576*b2055c35SXin Li 
577*b2055c35SXin Li // exposed plain-C implementations
578*b2055c35SXin Li VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
579*b2055c35SXin Li 
580*b2055c35SXin Li VP8LTransformColorInverseFunc VP8LTransformColorInverse;
581*b2055c35SXin Li 
582*b2055c35SXin Li VP8LConvertFunc VP8LConvertBGRAToRGB;
583*b2055c35SXin Li VP8LConvertFunc VP8LConvertBGRAToRGBA;
584*b2055c35SXin Li VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
585*b2055c35SXin Li VP8LConvertFunc VP8LConvertBGRAToRGB565;
586*b2055c35SXin Li VP8LConvertFunc VP8LConvertBGRAToBGR;
587*b2055c35SXin Li 
588*b2055c35SXin Li VP8LMapARGBFunc VP8LMapColor32b;
589*b2055c35SXin Li VP8LMapAlphaFunc VP8LMapColor8b;
590*b2055c35SXin Li 
591*b2055c35SXin Li extern VP8CPUInfo VP8GetCPUInfo;
592*b2055c35SXin Li extern void VP8LDspInitSSE2(void);
593*b2055c35SXin Li extern void VP8LDspInitSSE41(void);
594*b2055c35SXin Li extern void VP8LDspInitNEON(void);
595*b2055c35SXin Li extern void VP8LDspInitMIPSdspR2(void);
596*b2055c35SXin Li extern void VP8LDspInitMSA(void);
597*b2055c35SXin Li 
598*b2055c35SXin Li #define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
599*b2055c35SXin Li   (OUT)[0] = IN##0_C;                                     \
600*b2055c35SXin Li   (OUT)[1] = IN##1_C;                                     \
601*b2055c35SXin Li   (OUT)[2] = IN##2_C;                                     \
602*b2055c35SXin Li   (OUT)[3] = IN##3_C;                                     \
603*b2055c35SXin Li   (OUT)[4] = IN##4_C;                                     \
604*b2055c35SXin Li   (OUT)[5] = IN##5_C;                                     \
605*b2055c35SXin Li   (OUT)[6] = IN##6_C;                                     \
606*b2055c35SXin Li   (OUT)[7] = IN##7_C;                                     \
607*b2055c35SXin Li   (OUT)[8] = IN##8_C;                                     \
608*b2055c35SXin Li   (OUT)[9] = IN##9_C;                                     \
609*b2055c35SXin Li   (OUT)[10] = IN##10_C;                                   \
610*b2055c35SXin Li   (OUT)[11] = IN##11_C;                                   \
611*b2055c35SXin Li   (OUT)[12] = IN##12_C;                                   \
612*b2055c35SXin Li   (OUT)[13] = IN##13_C;                                   \
613*b2055c35SXin Li   (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
614*b2055c35SXin Li   (OUT)[15] = IN##0_C;                                    \
615*b2055c35SXin Li } while (0);
616*b2055c35SXin Li 
WEBP_DSP_INIT_FUNC(VP8LDspInit)617*b2055c35SXin Li WEBP_DSP_INIT_FUNC(VP8LDspInit) {
618*b2055c35SXin Li   COPY_PREDICTOR_ARRAY(VP8LPredictor, VP8LPredictors)
619*b2055c35SXin Li   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
620*b2055c35SXin Li   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
621*b2055c35SXin Li 
622*b2055c35SXin Li #if !WEBP_NEON_OMIT_C_CODE
623*b2055c35SXin Li   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
624*b2055c35SXin Li 
625*b2055c35SXin Li   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
626*b2055c35SXin Li 
627*b2055c35SXin Li   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
628*b2055c35SXin Li   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
629*b2055c35SXin Li   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
630*b2055c35SXin Li #endif
631*b2055c35SXin Li 
632*b2055c35SXin Li   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
633*b2055c35SXin Li   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
634*b2055c35SXin Li 
635*b2055c35SXin Li   VP8LMapColor32b = MapARGB_C;
636*b2055c35SXin Li   VP8LMapColor8b = MapAlpha_C;
637*b2055c35SXin Li 
638*b2055c35SXin Li   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
639*b2055c35SXin Li   if (VP8GetCPUInfo != NULL) {
640*b2055c35SXin Li #if defined(WEBP_HAVE_SSE2)
641*b2055c35SXin Li     if (VP8GetCPUInfo(kSSE2)) {
642*b2055c35SXin Li       VP8LDspInitSSE2();
643*b2055c35SXin Li #if defined(WEBP_HAVE_SSE41)
644*b2055c35SXin Li       if (VP8GetCPUInfo(kSSE4_1)) {
645*b2055c35SXin Li         VP8LDspInitSSE41();
646*b2055c35SXin Li       }
647*b2055c35SXin Li #endif
648*b2055c35SXin Li     }
649*b2055c35SXin Li #endif
650*b2055c35SXin Li #if defined(WEBP_USE_MIPS_DSP_R2)
651*b2055c35SXin Li     if (VP8GetCPUInfo(kMIPSdspR2)) {
652*b2055c35SXin Li       VP8LDspInitMIPSdspR2();
653*b2055c35SXin Li     }
654*b2055c35SXin Li #endif
655*b2055c35SXin Li #if defined(WEBP_USE_MSA)
656*b2055c35SXin Li     if (VP8GetCPUInfo(kMSA)) {
657*b2055c35SXin Li       VP8LDspInitMSA();
658*b2055c35SXin Li     }
659*b2055c35SXin Li #endif
660*b2055c35SXin Li   }
661*b2055c35SXin Li 
662*b2055c35SXin Li #if defined(WEBP_HAVE_NEON)
663*b2055c35SXin Li   if (WEBP_NEON_OMIT_C_CODE ||
664*b2055c35SXin Li       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
665*b2055c35SXin Li     VP8LDspInitNEON();
666*b2055c35SXin Li   }
667*b2055c35SXin Li #endif
668*b2055c35SXin Li 
669*b2055c35SXin Li   assert(VP8LAddGreenToBlueAndRed != NULL);
670*b2055c35SXin Li   assert(VP8LTransformColorInverse != NULL);
671*b2055c35SXin Li   assert(VP8LConvertBGRAToRGBA != NULL);
672*b2055c35SXin Li   assert(VP8LConvertBGRAToRGB != NULL);
673*b2055c35SXin Li   assert(VP8LConvertBGRAToBGR != NULL);
674*b2055c35SXin Li   assert(VP8LConvertBGRAToRGBA4444 != NULL);
675*b2055c35SXin Li   assert(VP8LConvertBGRAToRGB565 != NULL);
676*b2055c35SXin Li   assert(VP8LMapColor32b != NULL);
677*b2055c35SXin Li   assert(VP8LMapColor8b != NULL);
678*b2055c35SXin Li }
679*b2055c35SXin Li #undef COPY_PREDICTOR_ARRAY
680*b2055c35SXin Li 
681*b2055c35SXin Li //------------------------------------------------------------------------------
682