1*b2055c35SXin Li // Copyright 2011 Google Inc. All Rights Reserved.
2*b2055c35SXin Li //
3*b2055c35SXin Li // Use of this source code is governed by a BSD-style license
4*b2055c35SXin Li // that can be found in the COPYING file in the root of the source
5*b2055c35SXin Li // tree. An additional intellectual property rights grant can be found
6*b2055c35SXin Li // in the file PATENTS. All contributing project authors may
7*b2055c35SXin Li // be found in the AUTHORS file in the root of the source tree.
8*b2055c35SXin Li // -----------------------------------------------------------------------------
9*b2055c35SXin Li //
10*b2055c35SXin Li // Macroblock analysis
11*b2055c35SXin Li //
12*b2055c35SXin Li // Author: Skal ([email protected])
13*b2055c35SXin Li
14*b2055c35SXin Li #include <stdlib.h>
15*b2055c35SXin Li #include <string.h>
16*b2055c35SXin Li #include <assert.h>
17*b2055c35SXin Li
18*b2055c35SXin Li #include "src/enc/vp8i_enc.h"
19*b2055c35SXin Li #include "src/enc/cost_enc.h"
20*b2055c35SXin Li #include "src/utils/utils.h"
21*b2055c35SXin Li
22*b2055c35SXin Li #define MAX_ITERS_K_MEANS 6
23*b2055c35SXin Li
24*b2055c35SXin Li //------------------------------------------------------------------------------
25*b2055c35SXin Li // Smooth the segment map by replacing isolated block by the majority of its
26*b2055c35SXin Li // neighbours.
27*b2055c35SXin Li
SmoothSegmentMap(VP8Encoder * const enc)28*b2055c35SXin Li static void SmoothSegmentMap(VP8Encoder* const enc) {
29*b2055c35SXin Li int n, x, y;
30*b2055c35SXin Li const int w = enc->mb_w_;
31*b2055c35SXin Li const int h = enc->mb_h_;
32*b2055c35SXin Li const int majority_cnt_3_x_3_grid = 5;
33*b2055c35SXin Li uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
34*b2055c35SXin Li assert((uint64_t)(w * h) == (uint64_t)w * h); // no overflow, as per spec
35*b2055c35SXin Li
36*b2055c35SXin Li if (tmp == NULL) return;
37*b2055c35SXin Li for (y = 1; y < h - 1; ++y) {
38*b2055c35SXin Li for (x = 1; x < w - 1; ++x) {
39*b2055c35SXin Li int cnt[NUM_MB_SEGMENTS] = { 0 };
40*b2055c35SXin Li const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
41*b2055c35SXin Li int majority_seg = mb->segment_;
42*b2055c35SXin Li // Check the 8 neighbouring segment values.
43*b2055c35SXin Li cnt[mb[-w - 1].segment_]++; // top-left
44*b2055c35SXin Li cnt[mb[-w + 0].segment_]++; // top
45*b2055c35SXin Li cnt[mb[-w + 1].segment_]++; // top-right
46*b2055c35SXin Li cnt[mb[ - 1].segment_]++; // left
47*b2055c35SXin Li cnt[mb[ + 1].segment_]++; // right
48*b2055c35SXin Li cnt[mb[ w - 1].segment_]++; // bottom-left
49*b2055c35SXin Li cnt[mb[ w + 0].segment_]++; // bottom
50*b2055c35SXin Li cnt[mb[ w + 1].segment_]++; // bottom-right
51*b2055c35SXin Li for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
52*b2055c35SXin Li if (cnt[n] >= majority_cnt_3_x_3_grid) {
53*b2055c35SXin Li majority_seg = n;
54*b2055c35SXin Li break;
55*b2055c35SXin Li }
56*b2055c35SXin Li }
57*b2055c35SXin Li tmp[x + y * w] = majority_seg;
58*b2055c35SXin Li }
59*b2055c35SXin Li }
60*b2055c35SXin Li for (y = 1; y < h - 1; ++y) {
61*b2055c35SXin Li for (x = 1; x < w - 1; ++x) {
62*b2055c35SXin Li VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
63*b2055c35SXin Li mb->segment_ = tmp[x + y * w];
64*b2055c35SXin Li }
65*b2055c35SXin Li }
66*b2055c35SXin Li WebPSafeFree(tmp);
67*b2055c35SXin Li }
68*b2055c35SXin Li
69*b2055c35SXin Li //------------------------------------------------------------------------------
70*b2055c35SXin Li // set segment susceptibility alpha_ / beta_
71*b2055c35SXin Li
clip(int v,int m,int M)72*b2055c35SXin Li static WEBP_INLINE int clip(int v, int m, int M) {
73*b2055c35SXin Li return (v < m) ? m : (v > M) ? M : v;
74*b2055c35SXin Li }
75*b2055c35SXin Li
SetSegmentAlphas(VP8Encoder * const enc,const int centers[NUM_MB_SEGMENTS],int mid)76*b2055c35SXin Li static void SetSegmentAlphas(VP8Encoder* const enc,
77*b2055c35SXin Li const int centers[NUM_MB_SEGMENTS],
78*b2055c35SXin Li int mid) {
79*b2055c35SXin Li const int nb = enc->segment_hdr_.num_segments_;
80*b2055c35SXin Li int min = centers[0], max = centers[0];
81*b2055c35SXin Li int n;
82*b2055c35SXin Li
83*b2055c35SXin Li if (nb > 1) {
84*b2055c35SXin Li for (n = 0; n < nb; ++n) {
85*b2055c35SXin Li if (min > centers[n]) min = centers[n];
86*b2055c35SXin Li if (max < centers[n]) max = centers[n];
87*b2055c35SXin Li }
88*b2055c35SXin Li }
89*b2055c35SXin Li if (max == min) max = min + 1;
90*b2055c35SXin Li assert(mid <= max && mid >= min);
91*b2055c35SXin Li for (n = 0; n < nb; ++n) {
92*b2055c35SXin Li const int alpha = 255 * (centers[n] - mid) / (max - min);
93*b2055c35SXin Li const int beta = 255 * (centers[n] - min) / (max - min);
94*b2055c35SXin Li enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
95*b2055c35SXin Li enc->dqm_[n].beta_ = clip(beta, 0, 255);
96*b2055c35SXin Li }
97*b2055c35SXin Li }
98*b2055c35SXin Li
99*b2055c35SXin Li //------------------------------------------------------------------------------
100*b2055c35SXin Li // Compute susceptibility based on DCT-coeff histograms:
101*b2055c35SXin Li // the higher, the "easier" the macroblock is to compress.
102*b2055c35SXin Li
103*b2055c35SXin Li #define MAX_ALPHA 255 // 8b of precision for susceptibilities.
104*b2055c35SXin Li #define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha.
105*b2055c35SXin Li #define DEFAULT_ALPHA (-1)
106*b2055c35SXin Li #define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
107*b2055c35SXin Li
FinalAlphaValue(int alpha)108*b2055c35SXin Li static int FinalAlphaValue(int alpha) {
109*b2055c35SXin Li alpha = MAX_ALPHA - alpha;
110*b2055c35SXin Li return clip(alpha, 0, MAX_ALPHA);
111*b2055c35SXin Li }
112*b2055c35SXin Li
GetAlpha(const VP8Histogram * const histo)113*b2055c35SXin Li static int GetAlpha(const VP8Histogram* const histo) {
114*b2055c35SXin Li // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
115*b2055c35SXin Li // values which happen to be mostly noise. This leaves the maximum precision
116*b2055c35SXin Li // for handling the useful small values which contribute most.
117*b2055c35SXin Li const int max_value = histo->max_value;
118*b2055c35SXin Li const int last_non_zero = histo->last_non_zero;
119*b2055c35SXin Li const int alpha =
120*b2055c35SXin Li (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
121*b2055c35SXin Li return alpha;
122*b2055c35SXin Li }
123*b2055c35SXin Li
InitHistogram(VP8Histogram * const histo)124*b2055c35SXin Li static void InitHistogram(VP8Histogram* const histo) {
125*b2055c35SXin Li histo->max_value = 0;
126*b2055c35SXin Li histo->last_non_zero = 1;
127*b2055c35SXin Li }
128*b2055c35SXin Li
129*b2055c35SXin Li //------------------------------------------------------------------------------
130*b2055c35SXin Li // Simplified k-Means, to assign Nb segments based on alpha-histogram
131*b2055c35SXin Li
AssignSegments(VP8Encoder * const enc,const int alphas[MAX_ALPHA+1])132*b2055c35SXin Li static void AssignSegments(VP8Encoder* const enc,
133*b2055c35SXin Li const int alphas[MAX_ALPHA + 1]) {
134*b2055c35SXin Li // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
135*b2055c35SXin Li // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
136*b2055c35SXin Li // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
137*b2055c35SXin Li const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ?
138*b2055c35SXin Li enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
139*b2055c35SXin Li int centers[NUM_MB_SEGMENTS];
140*b2055c35SXin Li int weighted_average = 0;
141*b2055c35SXin Li int map[MAX_ALPHA + 1];
142*b2055c35SXin Li int a, n, k;
143*b2055c35SXin Li int min_a = 0, max_a = MAX_ALPHA, range_a;
144*b2055c35SXin Li // 'int' type is ok for histo, and won't overflow
145*b2055c35SXin Li int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
146*b2055c35SXin Li
147*b2055c35SXin Li assert(nb >= 1);
148*b2055c35SXin Li assert(nb <= NUM_MB_SEGMENTS);
149*b2055c35SXin Li
150*b2055c35SXin Li // bracket the input
151*b2055c35SXin Li for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
152*b2055c35SXin Li min_a = n;
153*b2055c35SXin Li for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
154*b2055c35SXin Li max_a = n;
155*b2055c35SXin Li range_a = max_a - min_a;
156*b2055c35SXin Li
157*b2055c35SXin Li // Spread initial centers evenly
158*b2055c35SXin Li for (k = 0, n = 1; k < nb; ++k, n += 2) {
159*b2055c35SXin Li assert(n < 2 * nb);
160*b2055c35SXin Li centers[k] = min_a + (n * range_a) / (2 * nb);
161*b2055c35SXin Li }
162*b2055c35SXin Li
163*b2055c35SXin Li for (k = 0; k < MAX_ITERS_K_MEANS; ++k) { // few iters are enough
164*b2055c35SXin Li int total_weight;
165*b2055c35SXin Li int displaced;
166*b2055c35SXin Li // Reset stats
167*b2055c35SXin Li for (n = 0; n < nb; ++n) {
168*b2055c35SXin Li accum[n] = 0;
169*b2055c35SXin Li dist_accum[n] = 0;
170*b2055c35SXin Li }
171*b2055c35SXin Li // Assign nearest center for each 'a'
172*b2055c35SXin Li n = 0; // track the nearest center for current 'a'
173*b2055c35SXin Li for (a = min_a; a <= max_a; ++a) {
174*b2055c35SXin Li if (alphas[a]) {
175*b2055c35SXin Li while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
176*b2055c35SXin Li n++;
177*b2055c35SXin Li }
178*b2055c35SXin Li map[a] = n;
179*b2055c35SXin Li // accumulate contribution into best centroid
180*b2055c35SXin Li dist_accum[n] += a * alphas[a];
181*b2055c35SXin Li accum[n] += alphas[a];
182*b2055c35SXin Li }
183*b2055c35SXin Li }
184*b2055c35SXin Li // All point are classified. Move the centroids to the
185*b2055c35SXin Li // center of their respective cloud.
186*b2055c35SXin Li displaced = 0;
187*b2055c35SXin Li weighted_average = 0;
188*b2055c35SXin Li total_weight = 0;
189*b2055c35SXin Li for (n = 0; n < nb; ++n) {
190*b2055c35SXin Li if (accum[n]) {
191*b2055c35SXin Li const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
192*b2055c35SXin Li displaced += abs(centers[n] - new_center);
193*b2055c35SXin Li centers[n] = new_center;
194*b2055c35SXin Li weighted_average += new_center * accum[n];
195*b2055c35SXin Li total_weight += accum[n];
196*b2055c35SXin Li }
197*b2055c35SXin Li }
198*b2055c35SXin Li weighted_average = (weighted_average + total_weight / 2) / total_weight;
199*b2055c35SXin Li if (displaced < 5) break; // no need to keep on looping...
200*b2055c35SXin Li }
201*b2055c35SXin Li
202*b2055c35SXin Li // Map each original value to the closest centroid
203*b2055c35SXin Li for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
204*b2055c35SXin Li VP8MBInfo* const mb = &enc->mb_info_[n];
205*b2055c35SXin Li const int alpha = mb->alpha_;
206*b2055c35SXin Li mb->segment_ = map[alpha];
207*b2055c35SXin Li mb->alpha_ = centers[map[alpha]]; // for the record.
208*b2055c35SXin Li }
209*b2055c35SXin Li
210*b2055c35SXin Li if (nb > 1) {
211*b2055c35SXin Li const int smooth = (enc->config_->preprocessing & 1);
212*b2055c35SXin Li if (smooth) SmoothSegmentMap(enc);
213*b2055c35SXin Li }
214*b2055c35SXin Li
215*b2055c35SXin Li SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.
216*b2055c35SXin Li }
217*b2055c35SXin Li
218*b2055c35SXin Li //------------------------------------------------------------------------------
219*b2055c35SXin Li // Macroblock analysis: collect histogram for each mode, deduce the maximal
220*b2055c35SXin Li // susceptibility and set best modes for this macroblock.
221*b2055c35SXin Li // Segment assignment is done later.
222*b2055c35SXin Li
223*b2055c35SXin Li // Number of modes to inspect for alpha_ evaluation. We don't need to test all
224*b2055c35SXin Li // the possible modes during the analysis phase: we risk falling into a local
225*b2055c35SXin Li // optimum, or be subject to boundary effect
226*b2055c35SXin Li #define MAX_INTRA16_MODE 2
227*b2055c35SXin Li #define MAX_INTRA4_MODE 2
228*b2055c35SXin Li #define MAX_UV_MODE 2
229*b2055c35SXin Li
MBAnalyzeBestIntra16Mode(VP8EncIterator * const it)230*b2055c35SXin Li static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
231*b2055c35SXin Li const int max_mode = MAX_INTRA16_MODE;
232*b2055c35SXin Li int mode;
233*b2055c35SXin Li int best_alpha = DEFAULT_ALPHA;
234*b2055c35SXin Li int best_mode = 0;
235*b2055c35SXin Li
236*b2055c35SXin Li VP8MakeLuma16Preds(it);
237*b2055c35SXin Li for (mode = 0; mode < max_mode; ++mode) {
238*b2055c35SXin Li VP8Histogram histo;
239*b2055c35SXin Li int alpha;
240*b2055c35SXin Li
241*b2055c35SXin Li InitHistogram(&histo);
242*b2055c35SXin Li VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC,
243*b2055c35SXin Li it->yuv_p_ + VP8I16ModeOffsets[mode],
244*b2055c35SXin Li 0, 16, &histo);
245*b2055c35SXin Li alpha = GetAlpha(&histo);
246*b2055c35SXin Li if (IS_BETTER_ALPHA(alpha, best_alpha)) {
247*b2055c35SXin Li best_alpha = alpha;
248*b2055c35SXin Li best_mode = mode;
249*b2055c35SXin Li }
250*b2055c35SXin Li }
251*b2055c35SXin Li VP8SetIntra16Mode(it, best_mode);
252*b2055c35SXin Li return best_alpha;
253*b2055c35SXin Li }
254*b2055c35SXin Li
FastMBAnalyze(VP8EncIterator * const it)255*b2055c35SXin Li static int FastMBAnalyze(VP8EncIterator* const it) {
256*b2055c35SXin Li // Empirical cut-off value, should be around 16 (~=block size). We use the
257*b2055c35SXin Li // [8-17] range and favor intra4 at high quality, intra16 for low quality.
258*b2055c35SXin Li const int q = (int)it->enc_->config_->quality;
259*b2055c35SXin Li const uint32_t kThreshold = 8 + (17 - 8) * q / 100;
260*b2055c35SXin Li int k;
261*b2055c35SXin Li uint32_t dc[16], m, m2;
262*b2055c35SXin Li for (k = 0; k < 16; k += 4) {
263*b2055c35SXin Li VP8Mean16x4(it->yuv_in_ + Y_OFF_ENC + k * BPS, &dc[k]);
264*b2055c35SXin Li }
265*b2055c35SXin Li for (m = 0, m2 = 0, k = 0; k < 16; ++k) {
266*b2055c35SXin Li m += dc[k];
267*b2055c35SXin Li m2 += dc[k] * dc[k];
268*b2055c35SXin Li }
269*b2055c35SXin Li if (kThreshold * m2 < m * m) {
270*b2055c35SXin Li VP8SetIntra16Mode(it, 0); // DC16
271*b2055c35SXin Li } else {
272*b2055c35SXin Li const uint8_t modes[16] = { 0 }; // DC4
273*b2055c35SXin Li VP8SetIntra4Mode(it, modes);
274*b2055c35SXin Li }
275*b2055c35SXin Li return 0;
276*b2055c35SXin Li }
277*b2055c35SXin Li
MBAnalyzeBestUVMode(VP8EncIterator * const it)278*b2055c35SXin Li static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
279*b2055c35SXin Li int best_alpha = DEFAULT_ALPHA;
280*b2055c35SXin Li int smallest_alpha = 0;
281*b2055c35SXin Li int best_mode = 0;
282*b2055c35SXin Li const int max_mode = MAX_UV_MODE;
283*b2055c35SXin Li int mode;
284*b2055c35SXin Li
285*b2055c35SXin Li VP8MakeChroma8Preds(it);
286*b2055c35SXin Li for (mode = 0; mode < max_mode; ++mode) {
287*b2055c35SXin Li VP8Histogram histo;
288*b2055c35SXin Li int alpha;
289*b2055c35SXin Li InitHistogram(&histo);
290*b2055c35SXin Li VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC,
291*b2055c35SXin Li it->yuv_p_ + VP8UVModeOffsets[mode],
292*b2055c35SXin Li 16, 16 + 4 + 4, &histo);
293*b2055c35SXin Li alpha = GetAlpha(&histo);
294*b2055c35SXin Li if (IS_BETTER_ALPHA(alpha, best_alpha)) {
295*b2055c35SXin Li best_alpha = alpha;
296*b2055c35SXin Li }
297*b2055c35SXin Li // The best prediction mode tends to be the one with the smallest alpha.
298*b2055c35SXin Li if (mode == 0 || alpha < smallest_alpha) {
299*b2055c35SXin Li smallest_alpha = alpha;
300*b2055c35SXin Li best_mode = mode;
301*b2055c35SXin Li }
302*b2055c35SXin Li }
303*b2055c35SXin Li VP8SetIntraUVMode(it, best_mode);
304*b2055c35SXin Li return best_alpha;
305*b2055c35SXin Li }
306*b2055c35SXin Li
MBAnalyze(VP8EncIterator * const it,int alphas[MAX_ALPHA+1],int * const alpha,int * const uv_alpha)307*b2055c35SXin Li static void MBAnalyze(VP8EncIterator* const it,
308*b2055c35SXin Li int alphas[MAX_ALPHA + 1],
309*b2055c35SXin Li int* const alpha, int* const uv_alpha) {
310*b2055c35SXin Li const VP8Encoder* const enc = it->enc_;
311*b2055c35SXin Li int best_alpha, best_uv_alpha;
312*b2055c35SXin Li
313*b2055c35SXin Li VP8SetIntra16Mode(it, 0); // default: Intra16, DC_PRED
314*b2055c35SXin Li VP8SetSkip(it, 0); // not skipped
315*b2055c35SXin Li VP8SetSegment(it, 0); // default segment, spec-wise.
316*b2055c35SXin Li
317*b2055c35SXin Li if (enc->method_ <= 1) {
318*b2055c35SXin Li best_alpha = FastMBAnalyze(it);
319*b2055c35SXin Li } else {
320*b2055c35SXin Li best_alpha = MBAnalyzeBestIntra16Mode(it);
321*b2055c35SXin Li }
322*b2055c35SXin Li best_uv_alpha = MBAnalyzeBestUVMode(it);
323*b2055c35SXin Li
324*b2055c35SXin Li // Final susceptibility mix
325*b2055c35SXin Li best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
326*b2055c35SXin Li best_alpha = FinalAlphaValue(best_alpha);
327*b2055c35SXin Li alphas[best_alpha]++;
328*b2055c35SXin Li it->mb_->alpha_ = best_alpha; // for later remapping.
329*b2055c35SXin Li
330*b2055c35SXin Li // Accumulate for later complexity analysis.
331*b2055c35SXin Li *alpha += best_alpha; // mixed susceptibility (not just luma)
332*b2055c35SXin Li *uv_alpha += best_uv_alpha;
333*b2055c35SXin Li }
334*b2055c35SXin Li
DefaultMBInfo(VP8MBInfo * const mb)335*b2055c35SXin Li static void DefaultMBInfo(VP8MBInfo* const mb) {
336*b2055c35SXin Li mb->type_ = 1; // I16x16
337*b2055c35SXin Li mb->uv_mode_ = 0;
338*b2055c35SXin Li mb->skip_ = 0; // not skipped
339*b2055c35SXin Li mb->segment_ = 0; // default segment
340*b2055c35SXin Li mb->alpha_ = 0;
341*b2055c35SXin Li }
342*b2055c35SXin Li
343*b2055c35SXin Li //------------------------------------------------------------------------------
344*b2055c35SXin Li // Main analysis loop:
345*b2055c35SXin Li // Collect all susceptibilities for each macroblock and record their
346*b2055c35SXin Li // distribution in alphas[]. Segments is assigned a-posteriori, based on
347*b2055c35SXin Li // this histogram.
348*b2055c35SXin Li // We also pick an intra16 prediction mode, which shouldn't be considered
349*b2055c35SXin Li // final except for fast-encode settings. We can also pick some intra4 modes
350*b2055c35SXin Li // and decide intra4/intra16, but that's usually almost always a bad choice at
351*b2055c35SXin Li // this stage.
352*b2055c35SXin Li
ResetAllMBInfo(VP8Encoder * const enc)353*b2055c35SXin Li static void ResetAllMBInfo(VP8Encoder* const enc) {
354*b2055c35SXin Li int n;
355*b2055c35SXin Li for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
356*b2055c35SXin Li DefaultMBInfo(&enc->mb_info_[n]);
357*b2055c35SXin Li }
358*b2055c35SXin Li // Default susceptibilities.
359*b2055c35SXin Li enc->dqm_[0].alpha_ = 0;
360*b2055c35SXin Li enc->dqm_[0].beta_ = 0;
361*b2055c35SXin Li // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
362*b2055c35SXin Li enc->alpha_ = 0;
363*b2055c35SXin Li enc->uv_alpha_ = 0;
364*b2055c35SXin Li WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
365*b2055c35SXin Li }
366*b2055c35SXin Li
367*b2055c35SXin Li // struct used to collect job result
368*b2055c35SXin Li typedef struct {
369*b2055c35SXin Li WebPWorker worker;
370*b2055c35SXin Li int alphas[MAX_ALPHA + 1];
371*b2055c35SXin Li int alpha, uv_alpha;
372*b2055c35SXin Li VP8EncIterator it;
373*b2055c35SXin Li int delta_progress;
374*b2055c35SXin Li } SegmentJob;
375*b2055c35SXin Li
376*b2055c35SXin Li // main work call
DoSegmentsJob(void * arg1,void * arg2)377*b2055c35SXin Li static int DoSegmentsJob(void* arg1, void* arg2) {
378*b2055c35SXin Li SegmentJob* const job = (SegmentJob*)arg1;
379*b2055c35SXin Li VP8EncIterator* const it = (VP8EncIterator*)arg2;
380*b2055c35SXin Li int ok = 1;
381*b2055c35SXin Li if (!VP8IteratorIsDone(it)) {
382*b2055c35SXin Li uint8_t tmp[32 + WEBP_ALIGN_CST];
383*b2055c35SXin Li uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
384*b2055c35SXin Li do {
385*b2055c35SXin Li // Let's pretend we have perfect lossless reconstruction.
386*b2055c35SXin Li VP8IteratorImport(it, scratch);
387*b2055c35SXin Li MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
388*b2055c35SXin Li ok = VP8IteratorProgress(it, job->delta_progress);
389*b2055c35SXin Li } while (ok && VP8IteratorNext(it));
390*b2055c35SXin Li }
391*b2055c35SXin Li return ok;
392*b2055c35SXin Li }
393*b2055c35SXin Li
394*b2055c35SXin Li #ifdef WEBP_USE_THREAD
MergeJobs(const SegmentJob * const src,SegmentJob * const dst)395*b2055c35SXin Li static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
396*b2055c35SXin Li int i;
397*b2055c35SXin Li for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
398*b2055c35SXin Li dst->alpha += src->alpha;
399*b2055c35SXin Li dst->uv_alpha += src->uv_alpha;
400*b2055c35SXin Li }
401*b2055c35SXin Li #endif
402*b2055c35SXin Li
403*b2055c35SXin Li // initialize the job struct with some tasks to perform
InitSegmentJob(VP8Encoder * const enc,SegmentJob * const job,int start_row,int end_row)404*b2055c35SXin Li static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
405*b2055c35SXin Li int start_row, int end_row) {
406*b2055c35SXin Li WebPGetWorkerInterface()->Init(&job->worker);
407*b2055c35SXin Li job->worker.data1 = job;
408*b2055c35SXin Li job->worker.data2 = &job->it;
409*b2055c35SXin Li job->worker.hook = DoSegmentsJob;
410*b2055c35SXin Li VP8IteratorInit(enc, &job->it);
411*b2055c35SXin Li VP8IteratorSetRow(&job->it, start_row);
412*b2055c35SXin Li VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
413*b2055c35SXin Li memset(job->alphas, 0, sizeof(job->alphas));
414*b2055c35SXin Li job->alpha = 0;
415*b2055c35SXin Li job->uv_alpha = 0;
416*b2055c35SXin Li // only one of both jobs can record the progress, since we don't
417*b2055c35SXin Li // expect the user's hook to be multi-thread safe
418*b2055c35SXin Li job->delta_progress = (start_row == 0) ? 20 : 0;
419*b2055c35SXin Li }
420*b2055c35SXin Li
421*b2055c35SXin Li // main entry point
VP8EncAnalyze(VP8Encoder * const enc)422*b2055c35SXin Li int VP8EncAnalyze(VP8Encoder* const enc) {
423*b2055c35SXin Li int ok = 1;
424*b2055c35SXin Li const int do_segments =
425*b2055c35SXin Li enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
426*b2055c35SXin Li (enc->segment_hdr_.num_segments_ > 1) ||
427*b2055c35SXin Li (enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled.
428*b2055c35SXin Li if (do_segments) {
429*b2055c35SXin Li const int last_row = enc->mb_h_;
430*b2055c35SXin Li const int total_mb = last_row * enc->mb_w_;
431*b2055c35SXin Li #ifdef WEBP_USE_THREAD
432*b2055c35SXin Li // We give a little more than a half work to the main thread.
433*b2055c35SXin Li const int split_row = (9 * last_row + 15) >> 4;
434*b2055c35SXin Li const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it
435*b2055c35SXin Li const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
436*b2055c35SXin Li #else
437*b2055c35SXin Li const int do_mt = 0;
438*b2055c35SXin Li #endif
439*b2055c35SXin Li const WebPWorkerInterface* const worker_interface =
440*b2055c35SXin Li WebPGetWorkerInterface();
441*b2055c35SXin Li SegmentJob main_job;
442*b2055c35SXin Li if (do_mt) {
443*b2055c35SXin Li #ifdef WEBP_USE_THREAD
444*b2055c35SXin Li SegmentJob side_job;
445*b2055c35SXin Li // Note the use of '&' instead of '&&' because we must call the functions
446*b2055c35SXin Li // no matter what.
447*b2055c35SXin Li InitSegmentJob(enc, &main_job, 0, split_row);
448*b2055c35SXin Li InitSegmentJob(enc, &side_job, split_row, last_row);
449*b2055c35SXin Li // we don't need to call Reset() on main_job.worker, since we're calling
450*b2055c35SXin Li // WebPWorkerExecute() on it
451*b2055c35SXin Li ok &= worker_interface->Reset(&side_job.worker);
452*b2055c35SXin Li // launch the two jobs in parallel
453*b2055c35SXin Li if (ok) {
454*b2055c35SXin Li worker_interface->Launch(&side_job.worker);
455*b2055c35SXin Li worker_interface->Execute(&main_job.worker);
456*b2055c35SXin Li ok &= worker_interface->Sync(&side_job.worker);
457*b2055c35SXin Li ok &= worker_interface->Sync(&main_job.worker);
458*b2055c35SXin Li }
459*b2055c35SXin Li worker_interface->End(&side_job.worker);
460*b2055c35SXin Li if (ok) MergeJobs(&side_job, &main_job); // merge results together
461*b2055c35SXin Li #endif // WEBP_USE_THREAD
462*b2055c35SXin Li } else {
463*b2055c35SXin Li // Even for single-thread case, we use the generic Worker tools.
464*b2055c35SXin Li InitSegmentJob(enc, &main_job, 0, last_row);
465*b2055c35SXin Li worker_interface->Execute(&main_job.worker);
466*b2055c35SXin Li ok &= worker_interface->Sync(&main_job.worker);
467*b2055c35SXin Li }
468*b2055c35SXin Li worker_interface->End(&main_job.worker);
469*b2055c35SXin Li if (ok) {
470*b2055c35SXin Li enc->alpha_ = main_job.alpha / total_mb;
471*b2055c35SXin Li enc->uv_alpha_ = main_job.uv_alpha / total_mb;
472*b2055c35SXin Li AssignSegments(enc, main_job.alphas);
473*b2055c35SXin Li }
474*b2055c35SXin Li } else { // Use only one default segment.
475*b2055c35SXin Li ResetAllMBInfo(enc);
476*b2055c35SXin Li }
477*b2055c35SXin Li if (!ok) {
478*b2055c35SXin Li return WebPEncodingSetError(enc->pic_,
479*b2055c35SXin Li VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise
480*b2055c35SXin Li }
481*b2055c35SXin Li return ok;
482*b2055c35SXin Li }
483*b2055c35SXin Li
484