xref: /aosp_15_r20/external/webp/src/enc/analysis_enc.c (revision b2055c353e87c8814eb2b6b1b11112a1562253bd)
1*b2055c35SXin Li // Copyright 2011 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // Macroblock analysis
11*b2055c35SXin Li //
12*b2055c35SXin Li // Author: Skal ([email protected])
13*b2055c35SXin Li 
14*b2055c35SXin Li #include <stdlib.h>
15*b2055c35SXin Li #include <string.h>
16*b2055c35SXin Li #include <assert.h>
17*b2055c35SXin Li 
18*b2055c35SXin Li #include "src/enc/vp8i_enc.h"
19*b2055c35SXin Li #include "src/enc/cost_enc.h"
20*b2055c35SXin Li #include "src/utils/utils.h"
21*b2055c35SXin Li 
22*b2055c35SXin Li #define MAX_ITERS_K_MEANS  6
23*b2055c35SXin Li 
24*b2055c35SXin Li //------------------------------------------------------------------------------
25*b2055c35SXin Li // Smooth the segment map by replacing isolated block by the majority of its
26*b2055c35SXin Li // neighbours.
27*b2055c35SXin Li 
SmoothSegmentMap(VP8Encoder * const enc)28*b2055c35SXin Li static void SmoothSegmentMap(VP8Encoder* const enc) {
29*b2055c35SXin Li   int n, x, y;
30*b2055c35SXin Li   const int w = enc->mb_w_;
31*b2055c35SXin Li   const int h = enc->mb_h_;
32*b2055c35SXin Li   const int majority_cnt_3_x_3_grid = 5;
33*b2055c35SXin Li   uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
34*b2055c35SXin Li   assert((uint64_t)(w * h) == (uint64_t)w * h);   // no overflow, as per spec
35*b2055c35SXin Li 
36*b2055c35SXin Li   if (tmp == NULL) return;
37*b2055c35SXin Li   for (y = 1; y < h - 1; ++y) {
38*b2055c35SXin Li     for (x = 1; x < w - 1; ++x) {
39*b2055c35SXin Li       int cnt[NUM_MB_SEGMENTS] = { 0 };
40*b2055c35SXin Li       const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
41*b2055c35SXin Li       int majority_seg = mb->segment_;
42*b2055c35SXin Li       // Check the 8 neighbouring segment values.
43*b2055c35SXin Li       cnt[mb[-w - 1].segment_]++;  // top-left
44*b2055c35SXin Li       cnt[mb[-w + 0].segment_]++;  // top
45*b2055c35SXin Li       cnt[mb[-w + 1].segment_]++;  // top-right
46*b2055c35SXin Li       cnt[mb[   - 1].segment_]++;  // left
47*b2055c35SXin Li       cnt[mb[   + 1].segment_]++;  // right
48*b2055c35SXin Li       cnt[mb[ w - 1].segment_]++;  // bottom-left
49*b2055c35SXin Li       cnt[mb[ w + 0].segment_]++;  // bottom
50*b2055c35SXin Li       cnt[mb[ w + 1].segment_]++;  // bottom-right
51*b2055c35SXin Li       for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
52*b2055c35SXin Li         if (cnt[n] >= majority_cnt_3_x_3_grid) {
53*b2055c35SXin Li           majority_seg = n;
54*b2055c35SXin Li           break;
55*b2055c35SXin Li         }
56*b2055c35SXin Li       }
57*b2055c35SXin Li       tmp[x + y * w] = majority_seg;
58*b2055c35SXin Li     }
59*b2055c35SXin Li   }
60*b2055c35SXin Li   for (y = 1; y < h - 1; ++y) {
61*b2055c35SXin Li     for (x = 1; x < w - 1; ++x) {
62*b2055c35SXin Li       VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
63*b2055c35SXin Li       mb->segment_ = tmp[x + y * w];
64*b2055c35SXin Li     }
65*b2055c35SXin Li   }
66*b2055c35SXin Li   WebPSafeFree(tmp);
67*b2055c35SXin Li }
68*b2055c35SXin Li 
69*b2055c35SXin Li //------------------------------------------------------------------------------
70*b2055c35SXin Li // set segment susceptibility alpha_ / beta_
71*b2055c35SXin Li 
clip(int v,int m,int M)72*b2055c35SXin Li static WEBP_INLINE int clip(int v, int m, int M) {
73*b2055c35SXin Li   return (v < m) ? m : (v > M) ? M : v;
74*b2055c35SXin Li }
75*b2055c35SXin Li 
SetSegmentAlphas(VP8Encoder * const enc,const int centers[NUM_MB_SEGMENTS],int mid)76*b2055c35SXin Li static void SetSegmentAlphas(VP8Encoder* const enc,
77*b2055c35SXin Li                              const int centers[NUM_MB_SEGMENTS],
78*b2055c35SXin Li                              int mid) {
79*b2055c35SXin Li   const int nb = enc->segment_hdr_.num_segments_;
80*b2055c35SXin Li   int min = centers[0], max = centers[0];
81*b2055c35SXin Li   int n;
82*b2055c35SXin Li 
83*b2055c35SXin Li   if (nb > 1) {
84*b2055c35SXin Li     for (n = 0; n < nb; ++n) {
85*b2055c35SXin Li       if (min > centers[n]) min = centers[n];
86*b2055c35SXin Li       if (max < centers[n]) max = centers[n];
87*b2055c35SXin Li     }
88*b2055c35SXin Li   }
89*b2055c35SXin Li   if (max == min) max = min + 1;
90*b2055c35SXin Li   assert(mid <= max && mid >= min);
91*b2055c35SXin Li   for (n = 0; n < nb; ++n) {
92*b2055c35SXin Li     const int alpha = 255 * (centers[n] - mid) / (max - min);
93*b2055c35SXin Li     const int beta = 255 * (centers[n] - min) / (max - min);
94*b2055c35SXin Li     enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
95*b2055c35SXin Li     enc->dqm_[n].beta_ = clip(beta, 0, 255);
96*b2055c35SXin Li   }
97*b2055c35SXin Li }
98*b2055c35SXin Li 
99*b2055c35SXin Li //------------------------------------------------------------------------------
100*b2055c35SXin Li // Compute susceptibility based on DCT-coeff histograms:
101*b2055c35SXin Li // the higher, the "easier" the macroblock is to compress.
102*b2055c35SXin Li 
103*b2055c35SXin Li #define MAX_ALPHA 255                // 8b of precision for susceptibilities.
104*b2055c35SXin Li #define ALPHA_SCALE (2 * MAX_ALPHA)  // scaling factor for alpha.
105*b2055c35SXin Li #define DEFAULT_ALPHA (-1)
106*b2055c35SXin Li #define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
107*b2055c35SXin Li 
FinalAlphaValue(int alpha)108*b2055c35SXin Li static int FinalAlphaValue(int alpha) {
109*b2055c35SXin Li   alpha = MAX_ALPHA - alpha;
110*b2055c35SXin Li   return clip(alpha, 0, MAX_ALPHA);
111*b2055c35SXin Li }
112*b2055c35SXin Li 
GetAlpha(const VP8Histogram * const histo)113*b2055c35SXin Li static int GetAlpha(const VP8Histogram* const histo) {
114*b2055c35SXin Li   // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
115*b2055c35SXin Li   // values which happen to be mostly noise. This leaves the maximum precision
116*b2055c35SXin Li   // for handling the useful small values which contribute most.
117*b2055c35SXin Li   const int max_value = histo->max_value;
118*b2055c35SXin Li   const int last_non_zero = histo->last_non_zero;
119*b2055c35SXin Li   const int alpha =
120*b2055c35SXin Li       (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
121*b2055c35SXin Li   return alpha;
122*b2055c35SXin Li }
123*b2055c35SXin Li 
InitHistogram(VP8Histogram * const histo)124*b2055c35SXin Li static void InitHistogram(VP8Histogram* const histo) {
125*b2055c35SXin Li   histo->max_value = 0;
126*b2055c35SXin Li   histo->last_non_zero = 1;
127*b2055c35SXin Li }
128*b2055c35SXin Li 
129*b2055c35SXin Li //------------------------------------------------------------------------------
130*b2055c35SXin Li // Simplified k-Means, to assign Nb segments based on alpha-histogram
131*b2055c35SXin Li 
AssignSegments(VP8Encoder * const enc,const int alphas[MAX_ALPHA+1])132*b2055c35SXin Li static void AssignSegments(VP8Encoder* const enc,
133*b2055c35SXin Li                            const int alphas[MAX_ALPHA + 1]) {
134*b2055c35SXin Li   // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
135*b2055c35SXin Li   // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
136*b2055c35SXin Li   // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
137*b2055c35SXin Li   const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ?
138*b2055c35SXin Li                  enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
139*b2055c35SXin Li   int centers[NUM_MB_SEGMENTS];
140*b2055c35SXin Li   int weighted_average = 0;
141*b2055c35SXin Li   int map[MAX_ALPHA + 1];
142*b2055c35SXin Li   int a, n, k;
143*b2055c35SXin Li   int min_a = 0, max_a = MAX_ALPHA, range_a;
144*b2055c35SXin Li   // 'int' type is ok for histo, and won't overflow
145*b2055c35SXin Li   int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
146*b2055c35SXin Li 
147*b2055c35SXin Li   assert(nb >= 1);
148*b2055c35SXin Li   assert(nb <= NUM_MB_SEGMENTS);
149*b2055c35SXin Li 
150*b2055c35SXin Li   // bracket the input
151*b2055c35SXin Li   for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
152*b2055c35SXin Li   min_a = n;
153*b2055c35SXin Li   for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
154*b2055c35SXin Li   max_a = n;
155*b2055c35SXin Li   range_a = max_a - min_a;
156*b2055c35SXin Li 
157*b2055c35SXin Li   // Spread initial centers evenly
158*b2055c35SXin Li   for (k = 0, n = 1; k < nb; ++k, n += 2) {
159*b2055c35SXin Li     assert(n < 2 * nb);
160*b2055c35SXin Li     centers[k] = min_a + (n * range_a) / (2 * nb);
161*b2055c35SXin Li   }
162*b2055c35SXin Li 
163*b2055c35SXin Li   for (k = 0; k < MAX_ITERS_K_MEANS; ++k) {     // few iters are enough
164*b2055c35SXin Li     int total_weight;
165*b2055c35SXin Li     int displaced;
166*b2055c35SXin Li     // Reset stats
167*b2055c35SXin Li     for (n = 0; n < nb; ++n) {
168*b2055c35SXin Li       accum[n] = 0;
169*b2055c35SXin Li       dist_accum[n] = 0;
170*b2055c35SXin Li     }
171*b2055c35SXin Li     // Assign nearest center for each 'a'
172*b2055c35SXin Li     n = 0;    // track the nearest center for current 'a'
173*b2055c35SXin Li     for (a = min_a; a <= max_a; ++a) {
174*b2055c35SXin Li       if (alphas[a]) {
175*b2055c35SXin Li         while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
176*b2055c35SXin Li           n++;
177*b2055c35SXin Li         }
178*b2055c35SXin Li         map[a] = n;
179*b2055c35SXin Li         // accumulate contribution into best centroid
180*b2055c35SXin Li         dist_accum[n] += a * alphas[a];
181*b2055c35SXin Li         accum[n] += alphas[a];
182*b2055c35SXin Li       }
183*b2055c35SXin Li     }
184*b2055c35SXin Li     // All point are classified. Move the centroids to the
185*b2055c35SXin Li     // center of their respective cloud.
186*b2055c35SXin Li     displaced = 0;
187*b2055c35SXin Li     weighted_average = 0;
188*b2055c35SXin Li     total_weight = 0;
189*b2055c35SXin Li     for (n = 0; n < nb; ++n) {
190*b2055c35SXin Li       if (accum[n]) {
191*b2055c35SXin Li         const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
192*b2055c35SXin Li         displaced += abs(centers[n] - new_center);
193*b2055c35SXin Li         centers[n] = new_center;
194*b2055c35SXin Li         weighted_average += new_center * accum[n];
195*b2055c35SXin Li         total_weight += accum[n];
196*b2055c35SXin Li       }
197*b2055c35SXin Li     }
198*b2055c35SXin Li     weighted_average = (weighted_average + total_weight / 2) / total_weight;
199*b2055c35SXin Li     if (displaced < 5) break;   // no need to keep on looping...
200*b2055c35SXin Li   }
201*b2055c35SXin Li 
202*b2055c35SXin Li   // Map each original value to the closest centroid
203*b2055c35SXin Li   for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
204*b2055c35SXin Li     VP8MBInfo* const mb = &enc->mb_info_[n];
205*b2055c35SXin Li     const int alpha = mb->alpha_;
206*b2055c35SXin Li     mb->segment_ = map[alpha];
207*b2055c35SXin Li     mb->alpha_ = centers[map[alpha]];  // for the record.
208*b2055c35SXin Li   }
209*b2055c35SXin Li 
210*b2055c35SXin Li   if (nb > 1) {
211*b2055c35SXin Li     const int smooth = (enc->config_->preprocessing & 1);
212*b2055c35SXin Li     if (smooth) SmoothSegmentMap(enc);
213*b2055c35SXin Li   }
214*b2055c35SXin Li 
215*b2055c35SXin Li   SetSegmentAlphas(enc, centers, weighted_average);  // pick some alphas.
216*b2055c35SXin Li }
217*b2055c35SXin Li 
218*b2055c35SXin Li //------------------------------------------------------------------------------
219*b2055c35SXin Li // Macroblock analysis: collect histogram for each mode, deduce the maximal
220*b2055c35SXin Li // susceptibility and set best modes for this macroblock.
221*b2055c35SXin Li // Segment assignment is done later.
222*b2055c35SXin Li 
223*b2055c35SXin Li // Number of modes to inspect for alpha_ evaluation. We don't need to test all
224*b2055c35SXin Li // the possible modes during the analysis phase: we risk falling into a local
225*b2055c35SXin Li // optimum, or be subject to boundary effect
226*b2055c35SXin Li #define MAX_INTRA16_MODE 2
227*b2055c35SXin Li #define MAX_INTRA4_MODE  2
228*b2055c35SXin Li #define MAX_UV_MODE      2
229*b2055c35SXin Li 
MBAnalyzeBestIntra16Mode(VP8EncIterator * const it)230*b2055c35SXin Li static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
231*b2055c35SXin Li   const int max_mode = MAX_INTRA16_MODE;
232*b2055c35SXin Li   int mode;
233*b2055c35SXin Li   int best_alpha = DEFAULT_ALPHA;
234*b2055c35SXin Li   int best_mode = 0;
235*b2055c35SXin Li 
236*b2055c35SXin Li   VP8MakeLuma16Preds(it);
237*b2055c35SXin Li   for (mode = 0; mode < max_mode; ++mode) {
238*b2055c35SXin Li     VP8Histogram histo;
239*b2055c35SXin Li     int alpha;
240*b2055c35SXin Li 
241*b2055c35SXin Li     InitHistogram(&histo);
242*b2055c35SXin Li     VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC,
243*b2055c35SXin Li                         it->yuv_p_ + VP8I16ModeOffsets[mode],
244*b2055c35SXin Li                         0, 16, &histo);
245*b2055c35SXin Li     alpha = GetAlpha(&histo);
246*b2055c35SXin Li     if (IS_BETTER_ALPHA(alpha, best_alpha)) {
247*b2055c35SXin Li       best_alpha = alpha;
248*b2055c35SXin Li       best_mode = mode;
249*b2055c35SXin Li     }
250*b2055c35SXin Li   }
251*b2055c35SXin Li   VP8SetIntra16Mode(it, best_mode);
252*b2055c35SXin Li   return best_alpha;
253*b2055c35SXin Li }
254*b2055c35SXin Li 
FastMBAnalyze(VP8EncIterator * const it)255*b2055c35SXin Li static int FastMBAnalyze(VP8EncIterator* const it) {
256*b2055c35SXin Li   // Empirical cut-off value, should be around 16 (~=block size). We use the
257*b2055c35SXin Li   // [8-17] range and favor intra4 at high quality, intra16 for low quality.
258*b2055c35SXin Li   const int q = (int)it->enc_->config_->quality;
259*b2055c35SXin Li   const uint32_t kThreshold = 8 + (17 - 8) * q / 100;
260*b2055c35SXin Li   int k;
261*b2055c35SXin Li   uint32_t dc[16], m, m2;
262*b2055c35SXin Li   for (k = 0; k < 16; k += 4) {
263*b2055c35SXin Li     VP8Mean16x4(it->yuv_in_ + Y_OFF_ENC + k * BPS, &dc[k]);
264*b2055c35SXin Li   }
265*b2055c35SXin Li   for (m = 0, m2 = 0, k = 0; k < 16; ++k) {
266*b2055c35SXin Li     m += dc[k];
267*b2055c35SXin Li     m2 += dc[k] * dc[k];
268*b2055c35SXin Li   }
269*b2055c35SXin Li   if (kThreshold * m2 < m * m) {
270*b2055c35SXin Li     VP8SetIntra16Mode(it, 0);   // DC16
271*b2055c35SXin Li   } else {
272*b2055c35SXin Li     const uint8_t modes[16] = { 0 };  // DC4
273*b2055c35SXin Li     VP8SetIntra4Mode(it, modes);
274*b2055c35SXin Li   }
275*b2055c35SXin Li   return 0;
276*b2055c35SXin Li }
277*b2055c35SXin Li 
MBAnalyzeBestUVMode(VP8EncIterator * const it)278*b2055c35SXin Li static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
279*b2055c35SXin Li   int best_alpha = DEFAULT_ALPHA;
280*b2055c35SXin Li   int smallest_alpha = 0;
281*b2055c35SXin Li   int best_mode = 0;
282*b2055c35SXin Li   const int max_mode = MAX_UV_MODE;
283*b2055c35SXin Li   int mode;
284*b2055c35SXin Li 
285*b2055c35SXin Li   VP8MakeChroma8Preds(it);
286*b2055c35SXin Li   for (mode = 0; mode < max_mode; ++mode) {
287*b2055c35SXin Li     VP8Histogram histo;
288*b2055c35SXin Li     int alpha;
289*b2055c35SXin Li     InitHistogram(&histo);
290*b2055c35SXin Li     VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC,
291*b2055c35SXin Li                         it->yuv_p_ + VP8UVModeOffsets[mode],
292*b2055c35SXin Li                         16, 16 + 4 + 4, &histo);
293*b2055c35SXin Li     alpha = GetAlpha(&histo);
294*b2055c35SXin Li     if (IS_BETTER_ALPHA(alpha, best_alpha)) {
295*b2055c35SXin Li       best_alpha = alpha;
296*b2055c35SXin Li     }
297*b2055c35SXin Li     // The best prediction mode tends to be the one with the smallest alpha.
298*b2055c35SXin Li     if (mode == 0 || alpha < smallest_alpha) {
299*b2055c35SXin Li       smallest_alpha = alpha;
300*b2055c35SXin Li       best_mode = mode;
301*b2055c35SXin Li     }
302*b2055c35SXin Li   }
303*b2055c35SXin Li   VP8SetIntraUVMode(it, best_mode);
304*b2055c35SXin Li   return best_alpha;
305*b2055c35SXin Li }
306*b2055c35SXin Li 
MBAnalyze(VP8EncIterator * const it,int alphas[MAX_ALPHA+1],int * const alpha,int * const uv_alpha)307*b2055c35SXin Li static void MBAnalyze(VP8EncIterator* const it,
308*b2055c35SXin Li                       int alphas[MAX_ALPHA + 1],
309*b2055c35SXin Li                       int* const alpha, int* const uv_alpha) {
310*b2055c35SXin Li   const VP8Encoder* const enc = it->enc_;
311*b2055c35SXin Li   int best_alpha, best_uv_alpha;
312*b2055c35SXin Li 
313*b2055c35SXin Li   VP8SetIntra16Mode(it, 0);  // default: Intra16, DC_PRED
314*b2055c35SXin Li   VP8SetSkip(it, 0);         // not skipped
315*b2055c35SXin Li   VP8SetSegment(it, 0);      // default segment, spec-wise.
316*b2055c35SXin Li 
317*b2055c35SXin Li   if (enc->method_ <= 1) {
318*b2055c35SXin Li     best_alpha = FastMBAnalyze(it);
319*b2055c35SXin Li   } else {
320*b2055c35SXin Li     best_alpha = MBAnalyzeBestIntra16Mode(it);
321*b2055c35SXin Li   }
322*b2055c35SXin Li   best_uv_alpha = MBAnalyzeBestUVMode(it);
323*b2055c35SXin Li 
324*b2055c35SXin Li   // Final susceptibility mix
325*b2055c35SXin Li   best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
326*b2055c35SXin Li   best_alpha = FinalAlphaValue(best_alpha);
327*b2055c35SXin Li   alphas[best_alpha]++;
328*b2055c35SXin Li   it->mb_->alpha_ = best_alpha;   // for later remapping.
329*b2055c35SXin Li 
330*b2055c35SXin Li   // Accumulate for later complexity analysis.
331*b2055c35SXin Li   *alpha += best_alpha;   // mixed susceptibility (not just luma)
332*b2055c35SXin Li   *uv_alpha += best_uv_alpha;
333*b2055c35SXin Li }
334*b2055c35SXin Li 
DefaultMBInfo(VP8MBInfo * const mb)335*b2055c35SXin Li static void DefaultMBInfo(VP8MBInfo* const mb) {
336*b2055c35SXin Li   mb->type_ = 1;     // I16x16
337*b2055c35SXin Li   mb->uv_mode_ = 0;
338*b2055c35SXin Li   mb->skip_ = 0;     // not skipped
339*b2055c35SXin Li   mb->segment_ = 0;  // default segment
340*b2055c35SXin Li   mb->alpha_ = 0;
341*b2055c35SXin Li }
342*b2055c35SXin Li 
343*b2055c35SXin Li //------------------------------------------------------------------------------
344*b2055c35SXin Li // Main analysis loop:
345*b2055c35SXin Li // Collect all susceptibilities for each macroblock and record their
346*b2055c35SXin Li // distribution in alphas[]. Segments is assigned a-posteriori, based on
347*b2055c35SXin Li // this histogram.
348*b2055c35SXin Li // We also pick an intra16 prediction mode, which shouldn't be considered
349*b2055c35SXin Li // final except for fast-encode settings. We can also pick some intra4 modes
350*b2055c35SXin Li // and decide intra4/intra16, but that's usually almost always a bad choice at
351*b2055c35SXin Li // this stage.
352*b2055c35SXin Li 
ResetAllMBInfo(VP8Encoder * const enc)353*b2055c35SXin Li static void ResetAllMBInfo(VP8Encoder* const enc) {
354*b2055c35SXin Li   int n;
355*b2055c35SXin Li   for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
356*b2055c35SXin Li     DefaultMBInfo(&enc->mb_info_[n]);
357*b2055c35SXin Li   }
358*b2055c35SXin Li   // Default susceptibilities.
359*b2055c35SXin Li   enc->dqm_[0].alpha_ = 0;
360*b2055c35SXin Li   enc->dqm_[0].beta_ = 0;
361*b2055c35SXin Li   // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
362*b2055c35SXin Li   enc->alpha_ = 0;
363*b2055c35SXin Li   enc->uv_alpha_ = 0;
364*b2055c35SXin Li   WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
365*b2055c35SXin Li }
366*b2055c35SXin Li 
367*b2055c35SXin Li // struct used to collect job result
368*b2055c35SXin Li typedef struct {
369*b2055c35SXin Li   WebPWorker worker;
370*b2055c35SXin Li   int alphas[MAX_ALPHA + 1];
371*b2055c35SXin Li   int alpha, uv_alpha;
372*b2055c35SXin Li   VP8EncIterator it;
373*b2055c35SXin Li   int delta_progress;
374*b2055c35SXin Li } SegmentJob;
375*b2055c35SXin Li 
376*b2055c35SXin Li // main work call
DoSegmentsJob(void * arg1,void * arg2)377*b2055c35SXin Li static int DoSegmentsJob(void* arg1, void* arg2) {
378*b2055c35SXin Li   SegmentJob* const job = (SegmentJob*)arg1;
379*b2055c35SXin Li   VP8EncIterator* const it = (VP8EncIterator*)arg2;
380*b2055c35SXin Li   int ok = 1;
381*b2055c35SXin Li   if (!VP8IteratorIsDone(it)) {
382*b2055c35SXin Li     uint8_t tmp[32 + WEBP_ALIGN_CST];
383*b2055c35SXin Li     uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
384*b2055c35SXin Li     do {
385*b2055c35SXin Li       // Let's pretend we have perfect lossless reconstruction.
386*b2055c35SXin Li       VP8IteratorImport(it, scratch);
387*b2055c35SXin Li       MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
388*b2055c35SXin Li       ok = VP8IteratorProgress(it, job->delta_progress);
389*b2055c35SXin Li     } while (ok && VP8IteratorNext(it));
390*b2055c35SXin Li   }
391*b2055c35SXin Li   return ok;
392*b2055c35SXin Li }
393*b2055c35SXin Li 
394*b2055c35SXin Li #ifdef WEBP_USE_THREAD
MergeJobs(const SegmentJob * const src,SegmentJob * const dst)395*b2055c35SXin Li static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
396*b2055c35SXin Li   int i;
397*b2055c35SXin Li   for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
398*b2055c35SXin Li   dst->alpha += src->alpha;
399*b2055c35SXin Li   dst->uv_alpha += src->uv_alpha;
400*b2055c35SXin Li }
401*b2055c35SXin Li #endif
402*b2055c35SXin Li 
403*b2055c35SXin Li // initialize the job struct with some tasks to perform
InitSegmentJob(VP8Encoder * const enc,SegmentJob * const job,int start_row,int end_row)404*b2055c35SXin Li static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
405*b2055c35SXin Li                            int start_row, int end_row) {
406*b2055c35SXin Li   WebPGetWorkerInterface()->Init(&job->worker);
407*b2055c35SXin Li   job->worker.data1 = job;
408*b2055c35SXin Li   job->worker.data2 = &job->it;
409*b2055c35SXin Li   job->worker.hook = DoSegmentsJob;
410*b2055c35SXin Li   VP8IteratorInit(enc, &job->it);
411*b2055c35SXin Li   VP8IteratorSetRow(&job->it, start_row);
412*b2055c35SXin Li   VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
413*b2055c35SXin Li   memset(job->alphas, 0, sizeof(job->alphas));
414*b2055c35SXin Li   job->alpha = 0;
415*b2055c35SXin Li   job->uv_alpha = 0;
416*b2055c35SXin Li   // only one of both jobs can record the progress, since we don't
417*b2055c35SXin Li   // expect the user's hook to be multi-thread safe
418*b2055c35SXin Li   job->delta_progress = (start_row == 0) ? 20 : 0;
419*b2055c35SXin Li }
420*b2055c35SXin Li 
421*b2055c35SXin Li // main entry point
VP8EncAnalyze(VP8Encoder * const enc)422*b2055c35SXin Li int VP8EncAnalyze(VP8Encoder* const enc) {
423*b2055c35SXin Li   int ok = 1;
424*b2055c35SXin Li   const int do_segments =
425*b2055c35SXin Li       enc->config_->emulate_jpeg_size ||   // We need the complexity evaluation.
426*b2055c35SXin Li       (enc->segment_hdr_.num_segments_ > 1) ||
427*b2055c35SXin Li       (enc->method_ <= 1);  // for method 0 - 1, we need preds_[] to be filled.
428*b2055c35SXin Li   if (do_segments) {
429*b2055c35SXin Li     const int last_row = enc->mb_h_;
430*b2055c35SXin Li     const int total_mb = last_row * enc->mb_w_;
431*b2055c35SXin Li #ifdef WEBP_USE_THREAD
432*b2055c35SXin Li     // We give a little more than a half work to the main thread.
433*b2055c35SXin Li     const int split_row = (9 * last_row + 15) >> 4;
434*b2055c35SXin Li     const int kMinSplitRow = 2;  // minimal rows needed for mt to be worth it
435*b2055c35SXin Li     const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
436*b2055c35SXin Li #else
437*b2055c35SXin Li     const int do_mt = 0;
438*b2055c35SXin Li #endif
439*b2055c35SXin Li     const WebPWorkerInterface* const worker_interface =
440*b2055c35SXin Li         WebPGetWorkerInterface();
441*b2055c35SXin Li     SegmentJob main_job;
442*b2055c35SXin Li     if (do_mt) {
443*b2055c35SXin Li #ifdef WEBP_USE_THREAD
444*b2055c35SXin Li       SegmentJob side_job;
445*b2055c35SXin Li       // Note the use of '&' instead of '&&' because we must call the functions
446*b2055c35SXin Li       // no matter what.
447*b2055c35SXin Li       InitSegmentJob(enc, &main_job, 0, split_row);
448*b2055c35SXin Li       InitSegmentJob(enc, &side_job, split_row, last_row);
449*b2055c35SXin Li       // we don't need to call Reset() on main_job.worker, since we're calling
450*b2055c35SXin Li       // WebPWorkerExecute() on it
451*b2055c35SXin Li       ok &= worker_interface->Reset(&side_job.worker);
452*b2055c35SXin Li       // launch the two jobs in parallel
453*b2055c35SXin Li       if (ok) {
454*b2055c35SXin Li         worker_interface->Launch(&side_job.worker);
455*b2055c35SXin Li         worker_interface->Execute(&main_job.worker);
456*b2055c35SXin Li         ok &= worker_interface->Sync(&side_job.worker);
457*b2055c35SXin Li         ok &= worker_interface->Sync(&main_job.worker);
458*b2055c35SXin Li       }
459*b2055c35SXin Li       worker_interface->End(&side_job.worker);
460*b2055c35SXin Li       if (ok) MergeJobs(&side_job, &main_job);  // merge results together
461*b2055c35SXin Li #endif  // WEBP_USE_THREAD
462*b2055c35SXin Li     } else {
463*b2055c35SXin Li       // Even for single-thread case, we use the generic Worker tools.
464*b2055c35SXin Li       InitSegmentJob(enc, &main_job, 0, last_row);
465*b2055c35SXin Li       worker_interface->Execute(&main_job.worker);
466*b2055c35SXin Li       ok &= worker_interface->Sync(&main_job.worker);
467*b2055c35SXin Li     }
468*b2055c35SXin Li     worker_interface->End(&main_job.worker);
469*b2055c35SXin Li     if (ok) {
470*b2055c35SXin Li       enc->alpha_ = main_job.alpha / total_mb;
471*b2055c35SXin Li       enc->uv_alpha_ = main_job.uv_alpha / total_mb;
472*b2055c35SXin Li       AssignSegments(enc, main_job.alphas);
473*b2055c35SXin Li     }
474*b2055c35SXin Li   } else {   // Use only one default segment.
475*b2055c35SXin Li     ResetAllMBInfo(enc);
476*b2055c35SXin Li   }
477*b2055c35SXin Li   if (!ok) {
478*b2055c35SXin Li     return WebPEncodingSetError(enc->pic_,
479*b2055c35SXin Li                                 VP8_ENC_ERROR_OUT_OF_MEMORY);  // imprecise
480*b2055c35SXin Li   }
481*b2055c35SXin Li   return ok;
482*b2055c35SXin Li }
483*b2055c35SXin Li 
484