1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13
14 #include "./vp9_rtcd.h"
15 #include "./vpx_dsp_rtcd.h"
16
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/mem.h"
20 #include "vpx_ports/system_state.h"
21
22 #include "vp9/common/vp9_common.h"
23 #include "vp9/common/vp9_entropy.h"
24 #include "vp9/common/vp9_entropymode.h"
25 #include "vp9/common/vp9_idct.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_scan.h"
32 #include "vp9/common/vp9_seg_common.h"
33
34 #if !CONFIG_REALTIME_ONLY
35 #include "vp9/encoder/vp9_aq_variance.h"
36 #endif
37 #include "vp9/encoder/vp9_cost.h"
38 #include "vp9/encoder/vp9_encodemb.h"
39 #include "vp9/encoder/vp9_encodemv.h"
40 #include "vp9/encoder/vp9_encoder.h"
41 #include "vp9/encoder/vp9_mcomp.h"
42 #include "vp9/encoder/vp9_quantize.h"
43 #include "vp9/encoder/vp9_ratectrl.h"
44 #include "vp9/encoder/vp9_rd.h"
45 #include "vp9/encoder/vp9_rdopt.h"
46
47 #define LAST_FRAME_MODE_MASK \
48 ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
49 #define GOLDEN_FRAME_MODE_MASK \
50 ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
51 #define ALT_REF_MODE_MASK \
52 ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
53
54 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
55
56 #define MIN_EARLY_TERM_INDEX 3
57 #define NEW_MV_DISCOUNT_FACTOR 8
58
59 typedef struct {
60 PREDICTION_MODE mode;
61 MV_REFERENCE_FRAME ref_frame[2];
62 } MODE_DEFINITION;
63
64 typedef struct {
65 MV_REFERENCE_FRAME ref_frame[2];
66 } REF_DEFINITION;
67
68 struct rdcost_block_args {
69 const VP9_COMP *cpi;
70 MACROBLOCK *x;
71 ENTROPY_CONTEXT t_above[16];
72 ENTROPY_CONTEXT t_left[16];
73 int this_rate;
74 int64_t this_dist;
75 int64_t this_sse;
76 int64_t this_rd;
77 int64_t best_rd;
78 int exit_early;
79 int use_fast_coef_costing;
80 const ScanOrder *so;
81 uint8_t skippable;
82 struct buf_2d *this_recon;
83 };
84
85 #define LAST_NEW_MV_INDEX 6
86
87 #if !CONFIG_REALTIME_ONLY
88 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
89 { NEARESTMV, { LAST_FRAME, NO_REF_FRAME } },
90 { NEARESTMV, { ALTREF_FRAME, NO_REF_FRAME } },
91 { NEARESTMV, { GOLDEN_FRAME, NO_REF_FRAME } },
92
93 { DC_PRED, { INTRA_FRAME, NO_REF_FRAME } },
94
95 { NEWMV, { LAST_FRAME, NO_REF_FRAME } },
96 { NEWMV, { ALTREF_FRAME, NO_REF_FRAME } },
97 { NEWMV, { GOLDEN_FRAME, NO_REF_FRAME } },
98
99 { NEARMV, { LAST_FRAME, NO_REF_FRAME } },
100 { NEARMV, { ALTREF_FRAME, NO_REF_FRAME } },
101 { NEARMV, { GOLDEN_FRAME, NO_REF_FRAME } },
102
103 { ZEROMV, { LAST_FRAME, NO_REF_FRAME } },
104 { ZEROMV, { GOLDEN_FRAME, NO_REF_FRAME } },
105 { ZEROMV, { ALTREF_FRAME, NO_REF_FRAME } },
106
107 { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
108 { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
109
110 { TM_PRED, { INTRA_FRAME, NO_REF_FRAME } },
111
112 { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
113 { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
114 { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
115 { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
116
117 { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
118 { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
119
120 { H_PRED, { INTRA_FRAME, NO_REF_FRAME } },
121 { V_PRED, { INTRA_FRAME, NO_REF_FRAME } },
122 { D135_PRED, { INTRA_FRAME, NO_REF_FRAME } },
123 { D207_PRED, { INTRA_FRAME, NO_REF_FRAME } },
124 { D153_PRED, { INTRA_FRAME, NO_REF_FRAME } },
125 { D63_PRED, { INTRA_FRAME, NO_REF_FRAME } },
126 { D117_PRED, { INTRA_FRAME, NO_REF_FRAME } },
127 { D45_PRED, { INTRA_FRAME, NO_REF_FRAME } },
128 };
129
130 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
131 { { LAST_FRAME, NO_REF_FRAME } }, { { GOLDEN_FRAME, NO_REF_FRAME } },
132 { { ALTREF_FRAME, NO_REF_FRAME } }, { { LAST_FRAME, ALTREF_FRAME } },
133 { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NO_REF_FRAME } },
134 };
135 #endif // !CONFIG_REALTIME_ONLY
136
swap_block_ptr(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int m,int n,int min_plane,int max_plane)137 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n,
138 int min_plane, int max_plane) {
139 int i;
140
141 for (i = min_plane; i < max_plane; ++i) {
142 struct macroblock_plane *const p = &x->plane[i];
143 struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
144
145 p->coeff = ctx->coeff_pbuf[i][m];
146 p->qcoeff = ctx->qcoeff_pbuf[i][m];
147 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
148 p->eobs = ctx->eobs_pbuf[i][m];
149
150 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
151 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
152 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
153 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
154
155 ctx->coeff_pbuf[i][n] = p->coeff;
156 ctx->qcoeff_pbuf[i][n] = p->qcoeff;
157 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
158 ctx->eobs_pbuf[i][n] = p->eobs;
159 }
160 }
161
162 #if !CONFIG_REALTIME_ONLY
163 // Planewise build inter prediction and compute rdcost with early termination
164 // option
build_inter_pred_model_rd_earlyterm(VP9_COMP * cpi,int mi_row,int mi_col,BLOCK_SIZE bsize,MACROBLOCK * x,MACROBLOCKD * xd,int * out_rate_sum,int64_t * out_dist_sum,int * skip_txfm_sb,int64_t * skip_sse_sb,int do_earlyterm,int64_t best_rd)165 static int build_inter_pred_model_rd_earlyterm(
166 VP9_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE bsize, MACROBLOCK *x,
167 MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum,
168 int *skip_txfm_sb, int64_t *skip_sse_sb, int do_earlyterm,
169 int64_t best_rd) {
170 // Note our transform coeffs are 8 times an orthogonal transform.
171 // Hence quantizer step is also 8 times. To get effective quantizer
172 // we need to divide by 8 before sending to modeling function.
173 int i;
174 int64_t rate_sum = 0;
175 int64_t dist_sum = 0;
176 const int ref = xd->mi[0]->ref_frame[0];
177 unsigned int sse;
178 unsigned int var = 0;
179 int64_t total_sse = 0;
180 int skip_flag = 1;
181 const int shift = 6;
182 const int dequant_shift =
183 #if CONFIG_VP9_HIGHBITDEPTH
184 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
185 #endif // CONFIG_VP9_HIGHBITDEPTH
186 3;
187
188 x->pred_sse[ref] = 0;
189
190 // Build prediction signal, compute stats and RD cost on per-plane basis
191 for (i = 0; i < MAX_MB_PLANE; ++i) {
192 struct macroblock_plane *const p = &x->plane[i];
193 struct macroblockd_plane *const pd = &xd->plane[i];
194 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
195 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
196 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
197 const int64_t dc_thr = p->quant_thred[0] >> shift;
198 const int64_t ac_thr = p->quant_thred[1] >> shift;
199 unsigned int sum_sse = 0;
200 // The low thresholds are used to measure if the prediction errors are
201 // low enough so that we can skip the mode search.
202 const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
203 const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
204 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
205 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
206 int idx, idy;
207 int lw = b_width_log2_lookup[unit_size] + 2;
208 int lh = b_height_log2_lookup[unit_size] + 2;
209 unsigned int qstep;
210 unsigned int nlog2;
211 int64_t dist = 0;
212
213 // Build inter predictor
214 vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
215
216 // Compute useful stats
217 for (idy = 0; idy < bh; ++idy) {
218 for (idx = 0; idx < bw; ++idx) {
219 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
220 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
221 int block_idx = (idy << 1) + idx;
222 int low_err_skip = 0;
223
224 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride,
225 &sse);
226 x->bsse[(i << 2) + block_idx] = sse;
227 sum_sse += sse;
228
229 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
230 if (!x->select_tx_size) {
231 // Check if all ac coefficients can be quantized to zero.
232 if (var < ac_thr || var == 0) {
233 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
234
235 // Check if dc coefficient can be quantized to zero.
236 if (sse - var < dc_thr || sse == var) {
237 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
238
239 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
240 low_err_skip = 1;
241 }
242 }
243 }
244
245 if (skip_flag && !low_err_skip) skip_flag = 0;
246
247 if (i == 0) x->pred_sse[ref] += sse;
248 }
249 }
250
251 total_sse += sum_sse;
252 qstep = pd->dequant[1] >> dequant_shift;
253 nlog2 = num_pels_log2_lookup[bs];
254
255 // Fast approximate the modelling function.
256 if (cpi->sf.simple_model_rd_from_var) {
257 int64_t rate;
258 if (qstep < 120)
259 rate = ((int64_t)sum_sse * (280 - qstep)) >> (16 - VP9_PROB_COST_SHIFT);
260 else
261 rate = 0;
262 dist = ((int64_t)sum_sse * qstep) >> 8;
263 rate_sum += rate;
264 } else {
265 int rate;
266 vp9_model_rd_from_var_lapndz(sum_sse, nlog2, qstep, &rate, &dist);
267 rate_sum += rate;
268 }
269 dist_sum += dist;
270 if (do_earlyterm) {
271 if (RDCOST(x->rdmult, x->rddiv, rate_sum,
272 dist_sum << VP9_DIST_SCALE_LOG2) >= best_rd)
273 return 1;
274 }
275 }
276 *skip_txfm_sb = skip_flag;
277 *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2;
278 *out_rate_sum = (int)rate_sum;
279 *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2;
280
281 return 0;
282 }
283 #endif // !CONFIG_REALTIME_ONLY
284
285 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)286 int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
287 const tran_low_t *dqcoeff, intptr_t block_size,
288 int64_t *ssz, int bd) {
289 int i;
290 int64_t error = 0, sqcoeff = 0;
291 int shift = 2 * (bd - 8);
292 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
293
294 for (i = 0; i < block_size; i++) {
295 const int64_t diff = coeff[i] - dqcoeff[i];
296 error += diff * diff;
297 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
298 }
299 assert(error >= 0 && sqcoeff >= 0);
300 error = (error + rounding) >> shift;
301 sqcoeff = (sqcoeff + rounding) >> shift;
302
303 *ssz = sqcoeff;
304 return error;
305 }
306
vp9_highbd_block_error_dispatch(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)307 static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
308 const tran_low_t *dqcoeff,
309 intptr_t block_size,
310 int64_t *ssz, int bd) {
311 if (bd == 8) {
312 return vp9_block_error(coeff, dqcoeff, block_size, ssz);
313 } else {
314 return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd);
315 }
316 }
317 #endif // CONFIG_VP9_HIGHBITDEPTH
318
vp9_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)319 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
320 intptr_t block_size, int64_t *ssz) {
321 int i;
322 int64_t error = 0, sqcoeff = 0;
323
324 for (i = 0; i < block_size; i++) {
325 const int diff = coeff[i] - dqcoeff[i];
326 error += diff * diff;
327 sqcoeff += coeff[i] * coeff[i];
328 }
329
330 *ssz = sqcoeff;
331 return error;
332 }
333
vp9_block_error_fp_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,int block_size)334 int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
335 int block_size) {
336 int i;
337 int64_t error = 0;
338
339 for (i = 0; i < block_size; i++) {
340 const int diff = coeff[i] - dqcoeff[i];
341 error += diff * diff;
342 }
343
344 return error;
345 }
346
347 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
348 * decide whether to include cost of a trailing EOB node or not (i.e. we
349 * can skip this if the last coefficient in this transform block, e.g. the
350 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
351 * were non-zero). */
352 static const int16_t band_counts[TX_SIZES][8] = {
353 { 1, 2, 3, 4, 3, 16 - 13, 0 },
354 { 1, 2, 3, 4, 11, 64 - 21, 0 },
355 { 1, 2, 3, 4, 11, 256 - 21, 0 },
356 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
357 };
cost_coeffs(MACROBLOCK * x,int plane,int block,TX_SIZE tx_size,int pt,const int16_t * scan,const int16_t * nb,int use_fast_coef_costing)358 static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
359 int pt, const int16_t *scan, const int16_t *nb,
360 int use_fast_coef_costing) {
361 MACROBLOCKD *const xd = &x->e_mbd;
362 MODE_INFO *mi = xd->mi[0];
363 const struct macroblock_plane *p = &x->plane[plane];
364 const PLANE_TYPE type = get_plane_type(plane);
365 const int16_t *band_count = &band_counts[tx_size][1];
366 const int eob = p->eobs[block];
367 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
368 unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
369 x->token_costs[tx_size][type][is_inter_block(mi)];
370 uint8_t token_cache[32 * 32];
371 int cost;
372 #if CONFIG_VP9_HIGHBITDEPTH
373 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
374 #else
375 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
376 #endif
377
378 // Check for consistency of tx_size with mode info
379 assert(type == PLANE_TYPE_Y
380 ? mi->tx_size == tx_size
381 : get_uv_tx_size(mi, &xd->plane[plane]) == tx_size);
382
383 if (eob == 0) {
384 // single eob token
385 cost = token_costs[0][0][pt][EOB_TOKEN];
386 } else {
387 if (use_fast_coef_costing) {
388 int band_left = *band_count++;
389 int c;
390
391 // dc token
392 int v = qcoeff[0];
393 int16_t prev_t;
394 cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost);
395 cost += (*token_costs)[0][pt][prev_t];
396
397 token_cache[0] = vp9_pt_energy_class[prev_t];
398 ++token_costs;
399
400 // ac tokens
401 for (c = 1; c < eob; c++) {
402 const int rc = scan[c];
403 int16_t t;
404
405 v = qcoeff[rc];
406 cost += vp9_get_token_cost(v, &t, cat6_high_cost);
407 cost += (*token_costs)[!prev_t][!prev_t][t];
408 prev_t = t;
409 if (!--band_left) {
410 band_left = *band_count++;
411 ++token_costs;
412 }
413 }
414
415 // eob token
416 if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
417
418 } else { // !use_fast_coef_costing
419 int band_left = *band_count++;
420 int c;
421
422 // dc token
423 int v = qcoeff[0];
424 int16_t tok;
425 unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
426 cost = vp9_get_token_cost(v, &tok, cat6_high_cost);
427 cost += (*token_costs)[0][pt][tok];
428
429 token_cache[0] = vp9_pt_energy_class[tok];
430 ++token_costs;
431
432 tok_cost_ptr = &((*token_costs)[!tok]);
433
434 // ac tokens
435 for (c = 1; c < eob; c++) {
436 const int rc = scan[c];
437
438 v = qcoeff[rc];
439 cost += vp9_get_token_cost(v, &tok, cat6_high_cost);
440 pt = get_coef_context(nb, token_cache, c);
441 cost += (*tok_cost_ptr)[pt][tok];
442 token_cache[rc] = vp9_pt_energy_class[tok];
443 if (!--band_left) {
444 band_left = *band_count++;
445 ++token_costs;
446 }
447 tok_cost_ptr = &((*token_costs)[!tok]);
448 }
449
450 // eob token
451 if (band_left) {
452 pt = get_coef_context(nb, token_cache, c);
453 cost += (*token_costs)[0][pt][EOB_TOKEN];
454 }
455 }
456 }
457
458 return cost;
459 }
460
461 // Copy all visible 4x4s in the transform block.
copy_block_visible(const MACROBLOCKD * xd,const struct macroblockd_plane * const pd,const uint8_t * src,const int src_stride,uint8_t * dst,const int dst_stride,int blk_row,int blk_col,const BLOCK_SIZE plane_bsize,const BLOCK_SIZE tx_bsize)462 static void copy_block_visible(const MACROBLOCKD *xd,
463 const struct macroblockd_plane *const pd,
464 const uint8_t *src, const int src_stride,
465 uint8_t *dst, const int dst_stride, int blk_row,
466 int blk_col, const BLOCK_SIZE plane_bsize,
467 const BLOCK_SIZE tx_bsize) {
468 const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
469 const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
470 const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
471 const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
472 int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
473 pd->subsampling_x, blk_col);
474 int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
475 pd->subsampling_y, blk_row);
476 const int is_highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
477 if (tx_bsize == BLOCK_4X4 ||
478 (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
479 const int w = tx_4x4_w << 2;
480 const int h = tx_4x4_h << 2;
481 #if CONFIG_VP9_HIGHBITDEPTH
482 if (is_highbd) {
483 vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src), src_stride,
484 CONVERT_TO_SHORTPTR(dst), dst_stride, NULL, 0, 0,
485 0, 0, w, h, xd->bd);
486 } else {
487 #endif
488 vpx_convolve_copy(src, src_stride, dst, dst_stride, NULL, 0, 0, 0, 0, w,
489 h);
490 #if CONFIG_VP9_HIGHBITDEPTH
491 }
492 #endif
493 } else {
494 int r, c;
495 int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
496 int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
497 // if we are in the unrestricted motion border.
498 for (r = 0; r < max_r; ++r) {
499 // Skip visiting the sub blocks that are wholly within the UMV.
500 for (c = 0; c < max_c; ++c) {
501 const uint8_t *src_ptr = src + r * src_stride * 4 + c * 4;
502 uint8_t *dst_ptr = dst + r * dst_stride * 4 + c * 4;
503 #if CONFIG_VP9_HIGHBITDEPTH
504 if (is_highbd) {
505 vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
506 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
507 NULL, 0, 0, 0, 0, 4, 4, xd->bd);
508 } else {
509 #endif
510 vpx_convolve_copy(src_ptr, src_stride, dst_ptr, dst_stride, NULL, 0,
511 0, 0, 0, 4, 4);
512 #if CONFIG_VP9_HIGHBITDEPTH
513 }
514 #endif
515 }
516 }
517 }
518 (void)is_highbd;
519 }
520
521 // Compute the pixel domain sum square error on all visible 4x4s in the
522 // transform block.
pixel_sse(const VP9_COMP * const cpi,const MACROBLOCKD * xd,const struct macroblockd_plane * const pd,const uint8_t * src,const int src_stride,const uint8_t * dst,const int dst_stride,int blk_row,int blk_col,const BLOCK_SIZE plane_bsize,const BLOCK_SIZE tx_bsize)523 static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd,
524 const struct macroblockd_plane *const pd,
525 const uint8_t *src, const int src_stride,
526 const uint8_t *dst, const int dst_stride, int blk_row,
527 int blk_col, const BLOCK_SIZE plane_bsize,
528 const BLOCK_SIZE tx_bsize) {
529 unsigned int sse = 0;
530 const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
531 const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
532 const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
533 const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
534 int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
535 pd->subsampling_x, blk_col);
536 int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
537 pd->subsampling_y, blk_row);
538 if (tx_bsize == BLOCK_4X4 ||
539 (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
540 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
541 } else {
542 const vpx_variance_fn_t vf_4x4 = cpi->fn_ptr[BLOCK_4X4].vf;
543 int r, c;
544 unsigned this_sse = 0;
545 int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
546 int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
547 sse = 0;
548 // if we are in the unrestricted motion border.
549 for (r = 0; r < max_r; ++r) {
550 // Skip visiting the sub blocks that are wholly within the UMV.
551 for (c = 0; c < max_c; ++c) {
552 vf_4x4(src + r * src_stride * 4 + c * 4, src_stride,
553 dst + r * dst_stride * 4 + c * 4, dst_stride, &this_sse);
554 sse += this_sse;
555 }
556 }
557 }
558 return sse;
559 }
560
dist_block(const VP9_COMP * cpi,MACROBLOCK * x,int plane,BLOCK_SIZE plane_bsize,int block,int blk_row,int blk_col,TX_SIZE tx_size,int64_t * out_dist,int64_t * out_sse,struct buf_2d * out_recon,int sse_calc_done)561 static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
562 BLOCK_SIZE plane_bsize, int block, int blk_row,
563 int blk_col, TX_SIZE tx_size, int64_t *out_dist,
564 int64_t *out_sse, struct buf_2d *out_recon,
565 int sse_calc_done) {
566 MACROBLOCKD *const xd = &x->e_mbd;
567 const struct macroblock_plane *const p = &x->plane[plane];
568 const struct macroblockd_plane *const pd = &xd->plane[plane];
569 const int eob = p->eobs[block];
570
571 if (!out_recon && x->block_tx_domain && eob) {
572 const int ss_txfrm_size = tx_size << 1;
573 int64_t this_sse;
574 const int shift = tx_size == TX_32X32 ? 0 : 2;
575 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
576 const tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
577 #if CONFIG_VP9_HIGHBITDEPTH
578 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
579 *out_dist = vp9_highbd_block_error_dispatch(
580 coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd) >>
581 shift;
582 #else
583 *out_dist =
584 vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >>
585 shift;
586 #endif // CONFIG_VP9_HIGHBITDEPTH
587 *out_sse = this_sse >> shift;
588
589 if (x->skip_encode && !is_inter_block(xd->mi[0])) {
590 // TODO(jingning): tune the model to better capture the distortion.
591 const int64_t mean_quant_error =
592 (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >>
593 #if CONFIG_VP9_HIGHBITDEPTH
594 (shift + 2 + (bd - 8) * 2);
595 #else
596 (shift + 2);
597 #endif // CONFIG_VP9_HIGHBITDEPTH
598 *out_dist += (mean_quant_error >> 4);
599 *out_sse += mean_quant_error;
600 }
601 } else {
602 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
603 const int bs = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
604 const int src_stride = p->src.stride;
605 const int dst_stride = pd->dst.stride;
606 const int src_idx = 4 * (blk_row * src_stride + blk_col);
607 const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
608 const uint8_t *src = &p->src.buf[src_idx];
609 const uint8_t *dst = &pd->dst.buf[dst_idx];
610 uint8_t *out_recon_ptr = 0;
611
612 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
613 unsigned int tmp;
614
615 if (sse_calc_done) {
616 tmp = (unsigned int)(*out_sse);
617 } else {
618 tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row,
619 blk_col, plane_bsize, tx_bsize);
620 }
621 *out_sse = (int64_t)tmp * 16;
622 if (out_recon) {
623 const int out_recon_idx = 4 * (blk_row * out_recon->stride + blk_col);
624 out_recon_ptr = &out_recon->buf[out_recon_idx];
625 copy_block_visible(xd, pd, dst, dst_stride, out_recon_ptr,
626 out_recon->stride, blk_row, blk_col, plane_bsize,
627 tx_bsize);
628 }
629
630 if (eob) {
631 #if CONFIG_VP9_HIGHBITDEPTH
632 DECLARE_ALIGNED(16, uint16_t, recon16[1024]);
633 uint8_t *recon = (uint8_t *)recon16;
634 #else
635 DECLARE_ALIGNED(16, uint8_t, recon[1024]);
636 #endif // CONFIG_VP9_HIGHBITDEPTH
637
638 #if CONFIG_VP9_HIGHBITDEPTH
639 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
640 vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
641 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd);
642 if (xd->lossless) {
643 vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd);
644 } else {
645 switch (tx_size) {
646 case TX_4X4:
647 vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd);
648 break;
649 case TX_8X8:
650 vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd);
651 break;
652 case TX_16X16:
653 vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd);
654 break;
655 default:
656 assert(tx_size == TX_32X32);
657 vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd);
658 break;
659 }
660 }
661 recon = CONVERT_TO_BYTEPTR(recon16);
662 } else {
663 #endif // CONFIG_VP9_HIGHBITDEPTH
664 vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs);
665 switch (tx_size) {
666 case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break;
667 case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break;
668 case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break;
669 default:
670 assert(tx_size == TX_4X4);
671 // this is like vp9_short_idct4x4 but has a special case around
672 // eob<=1, which is significant (not just an optimization) for
673 // the lossless case.
674 x->inv_txfm_add(dqcoeff, recon, 32, eob);
675 break;
676 }
677 #if CONFIG_VP9_HIGHBITDEPTH
678 }
679 #endif // CONFIG_VP9_HIGHBITDEPTH
680
681 tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col,
682 plane_bsize, tx_bsize);
683 if (out_recon) {
684 copy_block_visible(xd, pd, recon, 32, out_recon_ptr, out_recon->stride,
685 blk_row, blk_col, plane_bsize, tx_bsize);
686 }
687 }
688
689 *out_dist = (int64_t)tmp * 16;
690 }
691 }
692
rate_block(int plane,int block,TX_SIZE tx_size,int coeff_ctx,struct rdcost_block_args * args)693 static int rate_block(int plane, int block, TX_SIZE tx_size, int coeff_ctx,
694 struct rdcost_block_args *args) {
695 return cost_coeffs(args->x, plane, block, tx_size, coeff_ctx, args->so->scan,
696 args->so->neighbors, args->use_fast_coef_costing);
697 }
698
block_rd_txfm(int plane,int block,int blk_row,int blk_col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)699 static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
700 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
701 struct rdcost_block_args *args = arg;
702 MACROBLOCK *const x = args->x;
703 MACROBLOCKD *const xd = &x->e_mbd;
704 MODE_INFO *const mi = xd->mi[0];
705 int64_t rd1, rd2, rd;
706 int rate;
707 int64_t dist = INT64_MAX;
708 int64_t sse = INT64_MAX;
709 const int coeff_ctx =
710 combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]);
711 struct buf_2d *recon = args->this_recon;
712 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
713 const struct macroblockd_plane *const pd = &xd->plane[plane];
714 const int dst_stride = pd->dst.stride;
715 const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
716 const int enable_trellis_opt = args->cpi->sf.trellis_opt_tx_rd.method;
717 const double trellis_opt_thresh = args->cpi->sf.trellis_opt_tx_rd.thresh;
718 int sse_calc_done = 0;
719 #if CONFIG_MISMATCH_DEBUG
720 struct encode_b_args encode_b_arg = {
721 x, enable_trellis_opt, trellis_opt_thresh, &sse_calc_done,
722 &sse, args->t_above, args->t_left, &mi->skip,
723 0, // mi_row
724 0, // mi_col
725 0 // output_enabled
726 };
727 #else
728 struct encode_b_args encode_b_arg = {
729 x, enable_trellis_opt, trellis_opt_thresh, &sse_calc_done,
730 &sse, args->t_above, args->t_left, &mi->skip
731 };
732 #endif
733
734 if (args->exit_early) return;
735
736 if (!is_inter_block(mi)) {
737 vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
738 &encode_b_arg);
739 if (recon) {
740 uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)];
741 copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride,
742 blk_row, blk_col, plane_bsize, tx_bsize);
743 }
744 if (x->block_tx_domain) {
745 dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
746 tx_size, &dist, &sse, /*out_recon=*/NULL, sse_calc_done);
747 } else {
748 const struct macroblock_plane *const p = &x->plane[plane];
749 const int src_stride = p->src.stride;
750 const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
751 unsigned int tmp;
752 if (!sse_calc_done) {
753 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
754 const int16_t *diff =
755 &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
756 int visible_width, visible_height;
757 sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col,
758 plane_bsize, tx_bsize, &visible_width,
759 &visible_height);
760 }
761 #if CONFIG_VP9_HIGHBITDEPTH
762 if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->bd > 8))
763 sse = ROUND64_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
764 #endif // CONFIG_VP9_HIGHBITDEPTH
765 sse = sse * 16;
766 tmp = pixel_sse(args->cpi, xd, pd, src, src_stride, dst, dst_stride,
767 blk_row, blk_col, plane_bsize, tx_bsize);
768 dist = (int64_t)tmp * 16;
769 }
770 } else {
771 int skip_txfm_flag = SKIP_TXFM_NONE;
772 if (max_txsize_lookup[plane_bsize] == tx_size)
773 skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))];
774
775 // This reduces the risk of bad perceptual quality due to bad prediction.
776 // We always force the encoder to perform transform and quantization.
777 if (!args->cpi->sf.allow_skip_txfm_ac_dc &&
778 skip_txfm_flag == SKIP_TXFM_AC_DC) {
779 skip_txfm_flag = SKIP_TXFM_NONE;
780 }
781
782 if (skip_txfm_flag == SKIP_TXFM_NONE ||
783 (recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) {
784 const struct macroblock_plane *const p = &x->plane[plane];
785 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
786 const int16_t *const diff =
787 &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
788 const int use_trellis_opt =
789 do_trellis_opt(pd, diff, diff_stride, blk_row, blk_col, plane_bsize,
790 tx_size, &encode_b_arg);
791 // full forward transform and quantization
792 vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
793 if (use_trellis_opt) vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
794 dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
795 tx_size, &dist, &sse, recon, sse_calc_done);
796 } else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) {
797 // compute DC coefficient
798 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
799 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
800 vp9_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize,
801 tx_size);
802 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
803 dist = sse;
804 if (x->plane[plane].eobs[block]) {
805 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
806 const int64_t resd_sse = coeff[0] - dqcoeff[0];
807 int64_t dc_correct = orig_sse - resd_sse * resd_sse;
808 #if CONFIG_VP9_HIGHBITDEPTH
809 dc_correct >>= ((xd->bd - 8) * 2);
810 #endif
811 if (tx_size != TX_32X32) dc_correct >>= 2;
812
813 dist = VPXMAX(0, sse - dc_correct);
814 }
815 } else {
816 assert(0 && "allow_skip_txfm_ac_dc does not allow SKIP_TXFM_AC_DC.");
817 }
818 }
819
820 rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
821 if (args->this_rd + rd > args->best_rd) {
822 args->exit_early = 1;
823 return;
824 }
825
826 rate = rate_block(plane, block, tx_size, coeff_ctx, args);
827 args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
828 args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
829 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
830 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
831
832 // TODO(jingning): temporarily enabled only for luma component
833 rd = VPXMIN(rd1, rd2);
834 if (plane == 0) {
835 x->zcoeff_blk[tx_size][block] =
836 !x->plane[plane].eobs[block] ||
837 (x->sharpness == 0 && rd1 > rd2 && !xd->lossless);
838 x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block];
839 }
840
841 args->this_rate += rate;
842 args->this_dist += dist;
843 args->this_sse += sse;
844 args->this_rd += rd;
845
846 if (args->this_rd > args->best_rd) {
847 args->exit_early = 1;
848 return;
849 }
850
851 args->skippable &= !x->plane[plane].eobs[block];
852 }
853
txfm_rd_in_plane(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skippable,int64_t * sse,int64_t ref_best_rd,int plane,BLOCK_SIZE bsize,TX_SIZE tx_size,int use_fast_coef_costing,struct buf_2d * recon)854 static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
855 int64_t *distortion, int *skippable, int64_t *sse,
856 int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
857 TX_SIZE tx_size, int use_fast_coef_costing,
858 struct buf_2d *recon) {
859 MACROBLOCKD *const xd = &x->e_mbd;
860 const struct macroblockd_plane *const pd = &xd->plane[plane];
861 struct rdcost_block_args args;
862 vp9_zero(args);
863 args.cpi = cpi;
864 args.x = x;
865 args.best_rd = ref_best_rd;
866 args.use_fast_coef_costing = use_fast_coef_costing;
867 args.skippable = 1;
868 args.this_recon = recon;
869
870 if (plane == 0) xd->mi[0]->tx_size = tx_size;
871
872 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
873
874 args.so = get_scan(xd, tx_size, get_plane_type(plane), 0);
875
876 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
877 &args);
878 if (args.exit_early) {
879 *rate = INT_MAX;
880 *distortion = INT64_MAX;
881 *sse = INT64_MAX;
882 *skippable = 0;
883 } else {
884 *distortion = args.this_dist;
885 *rate = args.this_rate;
886 *sse = args.this_sse;
887 *skippable = args.skippable;
888 }
889 }
890
choose_largest_tx_size(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * sse,int64_t ref_best_rd,BLOCK_SIZE bs,struct buf_2d * recon)891 static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
892 int64_t *distortion, int *skip, int64_t *sse,
893 int64_t ref_best_rd, BLOCK_SIZE bs,
894 struct buf_2d *recon) {
895 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
896 VP9_COMMON *const cm = &cpi->common;
897 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
898 MACROBLOCKD *const xd = &x->e_mbd;
899 MODE_INFO *const mi = xd->mi[0];
900
901 mi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
902
903 txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
904 mi->tx_size, cpi->sf.use_fast_coef_costing, recon);
905 }
906
choose_tx_size_from_rd(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * psse,int64_t ref_best_rd,BLOCK_SIZE bs,struct buf_2d * recon)907 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
908 int64_t *distortion, int *skip,
909 int64_t *psse, int64_t ref_best_rd,
910 BLOCK_SIZE bs, struct buf_2d *recon) {
911 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
912 VP9_COMMON *const cm = &cpi->common;
913 MACROBLOCKD *const xd = &x->e_mbd;
914 MODE_INFO *const mi = xd->mi[0];
915 vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
916 int r[TX_SIZES][2], s[TX_SIZES];
917 int64_t d[TX_SIZES], sse[TX_SIZES];
918 int64_t rd[TX_SIZES][2] = { { INT64_MAX, INT64_MAX },
919 { INT64_MAX, INT64_MAX },
920 { INT64_MAX, INT64_MAX },
921 { INT64_MAX, INT64_MAX } };
922 int n;
923 int s0, s1;
924 int64_t best_rd = ref_best_rd;
925 TX_SIZE best_tx = max_tx_size;
926 int start_tx, end_tx;
927 const int tx_size_ctx = get_tx_size_context(xd);
928 #if CONFIG_VP9_HIGHBITDEPTH
929 DECLARE_ALIGNED(16, uint16_t, recon_buf16[TX_SIZES][64 * 64]);
930 uint8_t *recon_buf[TX_SIZES];
931 for (n = 0; n < TX_SIZES; ++n) {
932 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
933 recon_buf[n] = CONVERT_TO_BYTEPTR(recon_buf16[n]);
934 } else {
935 recon_buf[n] = (uint8_t *)recon_buf16[n];
936 }
937 }
938 #else
939 DECLARE_ALIGNED(16, uint8_t, recon_buf[TX_SIZES][64 * 64]);
940 #endif // CONFIG_VP9_HIGHBITDEPTH
941
942 assert(skip_prob > 0);
943 s0 = vp9_cost_bit(skip_prob, 0);
944 s1 = vp9_cost_bit(skip_prob, 1);
945
946 if (cm->tx_mode == TX_MODE_SELECT) {
947 start_tx = max_tx_size;
948 end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0);
949 if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx);
950 } else {
951 TX_SIZE chosen_tx_size =
952 VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
953 start_tx = chosen_tx_size;
954 end_tx = chosen_tx_size;
955 }
956
957 for (n = start_tx; n >= end_tx; n--) {
958 const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n];
959 if (recon) {
960 struct buf_2d this_recon;
961 this_recon.buf = recon_buf[n];
962 this_recon.stride = recon->stride;
963 txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs,
964 n, cpi->sf.use_fast_coef_costing, &this_recon);
965 } else {
966 txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs,
967 n, cpi->sf.use_fast_coef_costing, 0);
968 }
969 r[n][1] = r[n][0];
970 if (r[n][0] < INT_MAX) {
971 r[n][1] += r_tx_size;
972 }
973 if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
974 rd[n][0] = rd[n][1] = INT64_MAX;
975 } else if (s[n]) {
976 if (is_inter_block(mi)) {
977 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
978 r[n][1] -= r_tx_size;
979 } else {
980 rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
981 rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
982 }
983 } else {
984 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
985 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
986 }
987
988 if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
989 rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
990 rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
991 }
992
993 // Early termination in transform size search.
994 if (cpi->sf.tx_size_search_breakout &&
995 (rd[n][1] == INT64_MAX ||
996 (n < (int)max_tx_size && rd[n][1] > rd[n + 1][1]) || s[n] == 1))
997 break;
998
999 if (rd[n][1] < best_rd) {
1000 best_tx = n;
1001 best_rd = rd[n][1];
1002 }
1003 }
1004 mi->tx_size = best_tx;
1005
1006 *distortion = d[mi->tx_size];
1007 *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT];
1008 *skip = s[mi->tx_size];
1009 *psse = sse[mi->tx_size];
1010 if (recon) {
1011 #if CONFIG_VP9_HIGHBITDEPTH
1012 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1013 memcpy(CONVERT_TO_SHORTPTR(recon->buf),
1014 CONVERT_TO_SHORTPTR(recon_buf[mi->tx_size]),
1015 64 * 64 * sizeof(uint16_t));
1016 } else {
1017 #endif
1018 memcpy(recon->buf, recon_buf[mi->tx_size], 64 * 64);
1019 #if CONFIG_VP9_HIGHBITDEPTH
1020 }
1021 #endif
1022 }
1023 }
1024
super_block_yrd(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * psse,BLOCK_SIZE bs,int64_t ref_best_rd,struct buf_2d * recon)1025 static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1026 int64_t *distortion, int *skip, int64_t *psse,
1027 BLOCK_SIZE bs, int64_t ref_best_rd,
1028 struct buf_2d *recon) {
1029 MACROBLOCKD *xd = &x->e_mbd;
1030 int64_t sse;
1031 int64_t *ret_sse = psse ? psse : &sse;
1032
1033 assert(bs == xd->mi[0]->sb_type);
1034
1035 if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
1036 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
1037 bs, recon);
1038 } else {
1039 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
1040 bs, recon);
1041 }
1042 }
1043
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)1044 static int conditional_skipintra(PREDICTION_MODE mode,
1045 PREDICTION_MODE best_intra_mode) {
1046 if (mode == D117_PRED && best_intra_mode != V_PRED &&
1047 best_intra_mode != D135_PRED)
1048 return 1;
1049 if (mode == D63_PRED && best_intra_mode != V_PRED &&
1050 best_intra_mode != D45_PRED)
1051 return 1;
1052 if (mode == D207_PRED && best_intra_mode != H_PRED &&
1053 best_intra_mode != D45_PRED)
1054 return 1;
1055 if (mode == D153_PRED && best_intra_mode != H_PRED &&
1056 best_intra_mode != D135_PRED)
1057 return 1;
1058 return 0;
1059 }
1060
rd_pick_intra4x4block(VP9_COMP * cpi,MACROBLOCK * x,int row,int col,PREDICTION_MODE * best_mode,const int * bmode_costs,ENTROPY_CONTEXT * a,ENTROPY_CONTEXT * l,int * bestrate,int * bestratey,int64_t * bestdistortion,BLOCK_SIZE bsize,int64_t rd_thresh)1061 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row,
1062 int col, PREDICTION_MODE *best_mode,
1063 const int *bmode_costs, ENTROPY_CONTEXT *a,
1064 ENTROPY_CONTEXT *l, int *bestrate,
1065 int *bestratey, int64_t *bestdistortion,
1066 BLOCK_SIZE bsize, int64_t rd_thresh) {
1067 PREDICTION_MODE mode;
1068 MACROBLOCKD *const xd = &x->e_mbd;
1069 int64_t best_rd = rd_thresh;
1070 struct macroblock_plane *p = &x->plane[0];
1071 struct macroblockd_plane *pd = &xd->plane[0];
1072 const int src_stride = p->src.stride;
1073 const int dst_stride = pd->dst.stride;
1074 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
1075 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
1076 ENTROPY_CONTEXT ta[2], tempa[2];
1077 ENTROPY_CONTEXT tl[2], templ[2];
1078 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1079 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1080 int idx, idy;
1081 uint8_t best_dst[8 * 8];
1082 #if CONFIG_VP9_HIGHBITDEPTH
1083 uint16_t best_dst16[8 * 8];
1084 #endif
1085 memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
1086 memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
1087
1088 xd->mi[0]->tx_size = TX_4X4;
1089
1090 assert(!x->skip_block);
1091
1092 #if CONFIG_VP9_HIGHBITDEPTH
1093 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1094 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1095 int64_t this_rd;
1096 int ratey = 0;
1097 int64_t distortion = 0;
1098 int rate = bmode_costs[mode];
1099
1100 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
1101
1102 // Only do the oblique modes if the best so far is
1103 // one of the neighboring directional modes
1104 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1105 if (conditional_skipintra(mode, *best_mode)) continue;
1106 }
1107
1108 memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
1109 memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
1110
1111 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1112 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1113 const int block = (row + idy) * 2 + (col + idx);
1114 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1115 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1116 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
1117 int16_t *const src_diff =
1118 vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1119 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1120 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1121 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1122 uint16_t *const eob = &p->eobs[block];
1123 xd->mi[0]->bmi[block].as_mode = mode;
1124 vp9_predict_intra_block(xd, 1, TX_4X4, mode,
1125 x->skip_encode ? src : dst,
1126 x->skip_encode ? src_stride : dst_stride, dst,
1127 dst_stride, col + idx, row + idy, 0);
1128 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
1129 dst_stride, xd->bd);
1130 if (xd->lossless) {
1131 const ScanOrder *so = &vp9_default_scan_orders[TX_4X4];
1132 const int coeff_ctx =
1133 combine_entropy_contexts(tempa[idx], templ[idy]);
1134 vp9_highbd_fwht4x4(src_diff, coeff, 8);
1135 vpx_highbd_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant,
1136 eob, so);
1137 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1138 so->neighbors, cpi->sf.use_fast_coef_costing);
1139 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
1140 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1141 goto next_highbd;
1142 vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16,
1143 dst_stride, p->eobs[block], xd->bd);
1144 } else {
1145 int64_t unused;
1146 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1147 const ScanOrder *so = &vp9_scan_orders[TX_4X4][tx_type];
1148 const int coeff_ctx =
1149 combine_entropy_contexts(tempa[idx], templ[idy]);
1150 if (tx_type == DCT_DCT)
1151 vpx_highbd_fdct4x4(src_diff, coeff, 8);
1152 else
1153 vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
1154 vpx_highbd_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant,
1155 eob, so);
1156 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1157 so->neighbors, cpi->sf.use_fast_coef_costing);
1158 distortion += vp9_highbd_block_error_dispatch(
1159 coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16,
1160 &unused, xd->bd) >>
1161 2;
1162 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
1163 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1164 goto next_highbd;
1165 vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1166 dst16, dst_stride, p->eobs[block], xd->bd);
1167 }
1168 }
1169 }
1170
1171 rate += ratey;
1172 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1173
1174 if (this_rd < best_rd) {
1175 *bestrate = rate;
1176 *bestratey = ratey;
1177 *bestdistortion = distortion;
1178 best_rd = this_rd;
1179 *best_mode = mode;
1180 memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
1181 memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
1182 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1183 memcpy(best_dst16 + idy * 8,
1184 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1185 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1186 }
1187 }
1188 next_highbd : {}
1189 }
1190 if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
1191
1192 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1193 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1194 best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1195 }
1196
1197 return best_rd;
1198 }
1199 #endif // CONFIG_VP9_HIGHBITDEPTH
1200
1201 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1202 int64_t this_rd;
1203 int ratey = 0;
1204 int64_t distortion = 0;
1205 int rate = bmode_costs[mode];
1206
1207 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
1208
1209 // Only do the oblique modes if the best so far is
1210 // one of the neighboring directional modes
1211 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1212 if (conditional_skipintra(mode, *best_mode)) continue;
1213 }
1214
1215 memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
1216 memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
1217
1218 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1219 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1220 const int block = (row + idy) * 2 + (col + idx);
1221 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1222 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1223 int16_t *const src_diff =
1224 vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1225 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1226 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1227 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1228 uint16_t *const eob = &p->eobs[block];
1229 xd->mi[0]->bmi[block].as_mode = mode;
1230 vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst,
1231 x->skip_encode ? src_stride : dst_stride, dst,
1232 dst_stride, col + idx, row + idy, 0);
1233 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1234
1235 if (xd->lossless) {
1236 const ScanOrder *so = &vp9_default_scan_orders[TX_4X4];
1237 const int coeff_ctx =
1238 combine_entropy_contexts(tempa[idx], templ[idy]);
1239 vp9_fwht4x4(src_diff, coeff, 8);
1240 vpx_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob,
1241 so);
1242 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1243 so->neighbors, cpi->sf.use_fast_coef_costing);
1244 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
1245 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1246 goto next;
1247 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1248 p->eobs[block]);
1249 } else {
1250 int64_t unused;
1251 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1252 const ScanOrder *so = &vp9_scan_orders[TX_4X4][tx_type];
1253 const int coeff_ctx =
1254 combine_entropy_contexts(tempa[idx], templ[idy]);
1255 vp9_fht4x4(src_diff, coeff, 8, tx_type);
1256 vpx_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob,
1257 so);
1258 ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1259 so->neighbors, cpi->sf.use_fast_coef_costing);
1260 tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
1261 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1262 16, &unused) >>
1263 2;
1264 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1265 goto next;
1266 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst,
1267 dst_stride, p->eobs[block]);
1268 }
1269 }
1270 }
1271
1272 rate += ratey;
1273 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1274
1275 if (this_rd < best_rd) {
1276 *bestrate = rate;
1277 *bestratey = ratey;
1278 *bestdistortion = distortion;
1279 best_rd = this_rd;
1280 *best_mode = mode;
1281 memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
1282 memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
1283 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1284 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1285 num_4x4_blocks_wide * 4);
1286 }
1287 next : {}
1288 }
1289
1290 if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
1291
1292 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1293 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1294 num_4x4_blocks_wide * 4);
1295
1296 return best_rd;
1297 }
1298
rd_pick_intra_sub_8x8_y_mode(VP9_COMP * cpi,MACROBLOCK * mb,int * rate,int * rate_y,int64_t * distortion,int64_t best_rd)1299 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1300 int *rate, int *rate_y,
1301 int64_t *distortion,
1302 int64_t best_rd) {
1303 int i, j;
1304 const MACROBLOCKD *const xd = &mb->e_mbd;
1305 MODE_INFO *const mic = xd->mi[0];
1306 const MODE_INFO *above_mi = xd->above_mi;
1307 const MODE_INFO *left_mi = xd->left_mi;
1308 const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
1309 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1310 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1311 int idx, idy;
1312 int cost = 0;
1313 int64_t total_distortion = 0;
1314 int tot_rate_y = 0;
1315 int64_t total_rd = 0;
1316 const int *bmode_costs = cpi->mbmode_cost;
1317
1318 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1319 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1320 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1321 PREDICTION_MODE best_mode = DC_PRED;
1322 int r = INT_MAX, ry = INT_MAX;
1323 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1324 i = idy * 2 + idx;
1325 if (cpi->common.frame_type == KEY_FRAME) {
1326 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1327 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1328
1329 bmode_costs = cpi->y_mode_costs[A][L];
1330 }
1331
1332 this_rd = rd_pick_intra4x4block(
1333 cpi, mb, idy, idx, &best_mode, bmode_costs,
1334 xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
1335 &ry, &d, bsize, best_rd - total_rd);
1336
1337 if (this_rd >= best_rd - total_rd) return INT64_MAX;
1338
1339 total_rd += this_rd;
1340 cost += r;
1341 total_distortion += d;
1342 tot_rate_y += ry;
1343
1344 mic->bmi[i].as_mode = best_mode;
1345 for (j = 1; j < num_4x4_blocks_high; ++j)
1346 mic->bmi[i + j * 2].as_mode = best_mode;
1347 for (j = 1; j < num_4x4_blocks_wide; ++j)
1348 mic->bmi[i + j].as_mode = best_mode;
1349
1350 if (total_rd >= best_rd) return INT64_MAX;
1351 }
1352 }
1353
1354 *rate = cost;
1355 *rate_y = tot_rate_y;
1356 *distortion = total_distortion;
1357 mic->mode = mic->bmi[3].as_mode;
1358
1359 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1360 }
1361
1362 // This function is used only for intra_only frames
rd_pick_intra_sby_mode(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,int64_t best_rd)1363 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1364 int *rate_tokenonly, int64_t *distortion,
1365 int *skippable, BLOCK_SIZE bsize,
1366 int64_t best_rd) {
1367 PREDICTION_MODE mode;
1368 PREDICTION_MODE mode_selected = DC_PRED;
1369 MACROBLOCKD *const xd = &x->e_mbd;
1370 MODE_INFO *const mic = xd->mi[0];
1371 int this_rate, this_rate_tokenonly, s;
1372 int64_t this_distortion, this_rd;
1373 TX_SIZE best_tx = TX_4X4;
1374 int *bmode_costs;
1375 const MODE_INFO *above_mi = xd->above_mi;
1376 const MODE_INFO *left_mi = xd->left_mi;
1377 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1378 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1379 bmode_costs = cpi->y_mode_costs[A][L];
1380
1381 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1382 /* Y Search for intra prediction mode */
1383 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1384 if (cpi->sf.use_nonrd_pick_mode) {
1385 // These speed features are turned on in hybrid non-RD and RD mode
1386 // for key frame coding in the context of real-time setting.
1387 if (conditional_skipintra(mode, mode_selected)) continue;
1388 if (*skippable) break;
1389 }
1390
1391 mic->mode = mode;
1392
1393 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1394 bsize, best_rd, /*recon=*/NULL);
1395
1396 if (this_rate_tokenonly == INT_MAX) continue;
1397
1398 this_rate = this_rate_tokenonly + bmode_costs[mode];
1399 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1400
1401 if (this_rd < best_rd) {
1402 mode_selected = mode;
1403 best_rd = this_rd;
1404 best_tx = mic->tx_size;
1405 *rate = this_rate;
1406 *rate_tokenonly = this_rate_tokenonly;
1407 *distortion = this_distortion;
1408 *skippable = s;
1409 }
1410 }
1411
1412 mic->mode = mode_selected;
1413 mic->tx_size = best_tx;
1414
1415 return best_rd;
1416 }
1417
1418 // Return value 0: early termination triggered, no valid rd cost available;
1419 // 1: rd cost values are valid.
super_block_uvrd(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skippable,int64_t * sse,BLOCK_SIZE bsize,int64_t ref_best_rd)1420 static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1421 int64_t *distortion, int *skippable, int64_t *sse,
1422 BLOCK_SIZE bsize, int64_t ref_best_rd) {
1423 MACROBLOCKD *const xd = &x->e_mbd;
1424 MODE_INFO *const mi = xd->mi[0];
1425 const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
1426 int plane;
1427 int pnrate = 0, pnskip = 1;
1428 int64_t pndist = 0, pnsse = 0;
1429 int is_cost_valid = 1;
1430
1431 if (ref_best_rd < 0) is_cost_valid = 0;
1432
1433 if (is_inter_block(mi) && is_cost_valid) {
1434 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1435 vp9_subtract_plane(x, bsize, plane);
1436 }
1437
1438 *rate = 0;
1439 *distortion = 0;
1440 *sse = 0;
1441 *skippable = 1;
1442
1443 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1444 txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
1445 plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing,
1446 /*recon=*/NULL);
1447 if (pnrate == INT_MAX) {
1448 is_cost_valid = 0;
1449 break;
1450 }
1451 *rate += pnrate;
1452 *distortion += pndist;
1453 *sse += pnsse;
1454 *skippable &= pnskip;
1455 }
1456
1457 if (!is_cost_valid) {
1458 // reset cost value
1459 *rate = INT_MAX;
1460 *distortion = INT64_MAX;
1461 *sse = INT64_MAX;
1462 *skippable = 0;
1463 }
1464
1465 return is_cost_valid;
1466 }
1467
rd_pick_intra_sbuv_mode(VP9_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,TX_SIZE max_tx_size)1468 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1469 PICK_MODE_CONTEXT *ctx, int *rate,
1470 int *rate_tokenonly, int64_t *distortion,
1471 int *skippable, BLOCK_SIZE bsize,
1472 TX_SIZE max_tx_size) {
1473 MACROBLOCKD *xd = &x->e_mbd;
1474 PREDICTION_MODE mode;
1475 PREDICTION_MODE mode_selected = DC_PRED;
1476 int64_t best_rd = INT64_MAX, this_rd;
1477 int this_rate_tokenonly, this_rate, s;
1478 int64_t this_distortion, this_sse;
1479
1480 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1481 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1482 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue;
1483 #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
1484 if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
1485 (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode])
1486 continue;
1487 #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
1488
1489 xd->mi[0]->uv_mode = mode;
1490
1491 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
1492 &this_sse, bsize, best_rd))
1493 continue;
1494 this_rate =
1495 this_rate_tokenonly +
1496 cpi->intra_uv_mode_cost[cpi->common.frame_type][xd->mi[0]->mode][mode];
1497 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1498
1499 if (this_rd < best_rd) {
1500 mode_selected = mode;
1501 best_rd = this_rd;
1502 *rate = this_rate;
1503 *rate_tokenonly = this_rate_tokenonly;
1504 *distortion = this_distortion;
1505 *skippable = s;
1506 if (!x->select_tx_size) swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
1507 }
1508 }
1509
1510 xd->mi[0]->uv_mode = mode_selected;
1511 return best_rd;
1512 }
1513
1514 #if !CONFIG_REALTIME_ONLY
rd_sbuv_dcpred(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize)1515 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1516 int *rate_tokenonly, int64_t *distortion,
1517 int *skippable, BLOCK_SIZE bsize) {
1518 const VP9_COMMON *cm = &cpi->common;
1519 int64_t unused;
1520
1521 x->e_mbd.mi[0]->uv_mode = DC_PRED;
1522 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1523 super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused,
1524 bsize, INT64_MAX);
1525 *rate =
1526 *rate_tokenonly +
1527 cpi->intra_uv_mode_cost[cm->frame_type][x->e_mbd.mi[0]->mode][DC_PRED];
1528 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1529 }
1530
choose_intra_uv_mode(VP9_COMP * cpi,MACROBLOCK * const x,PICK_MODE_CONTEXT * ctx,BLOCK_SIZE bsize,TX_SIZE max_tx_size,int * rate_uv,int * rate_uv_tokenonly,int64_t * dist_uv,int * skip_uv,PREDICTION_MODE * mode_uv)1531 static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x,
1532 PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
1533 TX_SIZE max_tx_size, int *rate_uv,
1534 int *rate_uv_tokenonly, int64_t *dist_uv,
1535 int *skip_uv, PREDICTION_MODE *mode_uv) {
1536 // Use an estimated rd for uv_intra based on DC_PRED if the
1537 // appropriate speed flag is set.
1538 if (cpi->sf.use_uv_intra_rd_estimate) {
1539 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1540 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1541 // Else do a proper rd search for each possible transform size that may
1542 // be considered in the main rd loop.
1543 } else {
1544 rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv,
1545 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
1546 max_tx_size);
1547 }
1548 *mode_uv = x->e_mbd.mi[0]->uv_mode;
1549 }
1550
cost_mv_ref(const VP9_COMP * cpi,PREDICTION_MODE mode,int mode_context)1551 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1552 int mode_context) {
1553 assert(is_inter_mode(mode));
1554 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1555 }
1556
set_and_cost_bmi_mvs(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,int i,PREDICTION_MODE mode,int_mv this_mv[2],int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],int_mv seg_mvs[MAX_REF_FRAMES],int_mv * best_ref_mv[2],const int * mvjcost,int * mvcost[2])1557 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
1558 int i, PREDICTION_MODE mode, int_mv this_mv[2],
1559 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1560 int_mv seg_mvs[MAX_REF_FRAMES],
1561 int_mv *best_ref_mv[2], const int *mvjcost,
1562 int *mvcost[2]) {
1563 MODE_INFO *const mi = xd->mi[0];
1564 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1565 int thismvcost = 0;
1566 int idx, idy;
1567 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type];
1568 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type];
1569 const int is_compound = has_second_ref(mi);
1570
1571 switch (mode) {
1572 case NEWMV:
1573 this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int;
1574 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1575 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1576 if (is_compound) {
1577 this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int;
1578 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1579 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1580 }
1581 break;
1582 case NEARMV:
1583 case NEARESTMV:
1584 this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int;
1585 if (is_compound)
1586 this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int;
1587 break;
1588 default:
1589 assert(mode == ZEROMV);
1590 this_mv[0].as_int = 0;
1591 if (is_compound) this_mv[1].as_int = 0;
1592 break;
1593 }
1594
1595 mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1596 if (is_compound) mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1597
1598 mi->bmi[i].as_mode = mode;
1599
1600 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1601 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1602 memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i]));
1603
1604 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) +
1605 thismvcost;
1606 }
1607
encode_inter_mb_segment(VP9_COMP * cpi,MACROBLOCK * x,int64_t best_yrd,int i,int * labelyrate,int64_t * distortion,int64_t * sse,ENTROPY_CONTEXT * ta,ENTROPY_CONTEXT * tl,int mi_row,int mi_col)1608 static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x,
1609 int64_t best_yrd, int i, int *labelyrate,
1610 int64_t *distortion, int64_t *sse,
1611 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
1612 int mi_row, int mi_col) {
1613 int k;
1614 MACROBLOCKD *xd = &x->e_mbd;
1615 struct macroblockd_plane *const pd = &xd->plane[0];
1616 struct macroblock_plane *const p = &x->plane[0];
1617 MODE_INFO *const mi = xd->mi[0];
1618 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd);
1619 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1620 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1621 int idx, idy;
1622
1623 const uint8_t *const src =
1624 &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1625 uint8_t *const dst =
1626 &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
1627 int64_t thisdistortion = 0, thissse = 0;
1628 int thisrate = 0, ref;
1629 const ScanOrder *so = &vp9_default_scan_orders[TX_4X4];
1630 const int is_compound = has_second_ref(mi);
1631 const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
1632
1633 assert(!x->skip_block);
1634
1635 for (ref = 0; ref < 1 + is_compound; ++ref) {
1636 const int bw = b_width_log2_lookup[BLOCK_8X8];
1637 const int h = 4 * (i >> bw);
1638 const int w = 4 * (i & ((1 << bw) - 1));
1639 const struct scale_factors *sf = &xd->block_refs[ref]->sf;
1640 int y_stride = pd->pre[ref].stride;
1641 uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
1642
1643 if (vp9_is_scaled(sf)) {
1644 const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
1645 const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
1646
1647 y_stride = xd->block_refs[ref]->buf->y_stride;
1648 pre = xd->block_refs[ref]->buf->y_buffer;
1649 pre += scaled_buffer_offset(x_start + w, y_start + h, y_stride, sf);
1650 }
1651 #if CONFIG_VP9_HIGHBITDEPTH
1652 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1653 vp9_highbd_build_inter_predictor(
1654 CONVERT_TO_SHORTPTR(pre), y_stride, CONVERT_TO_SHORTPTR(dst),
1655 pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1656 &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1657 mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2),
1658 xd->bd);
1659 } else {
1660 vp9_build_inter_predictor(
1661 pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1662 &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1663 mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
1664 }
1665 #else
1666 vp9_build_inter_predictor(
1667 pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1668 &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1669 mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
1670 #endif // CONFIG_VP9_HIGHBITDEPTH
1671 }
1672
1673 #if CONFIG_VP9_HIGHBITDEPTH
1674 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1675 vpx_highbd_subtract_block(
1676 height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1677 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
1678 } else {
1679 vpx_subtract_block(height, width,
1680 vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1681 8, src, p->src.stride, dst, pd->dst.stride);
1682 }
1683 #else
1684 vpx_subtract_block(height, width,
1685 vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1686 8, src, p->src.stride, dst, pd->dst.stride);
1687 #endif // CONFIG_VP9_HIGHBITDEPTH
1688
1689 k = i;
1690 for (idy = 0; idy < height / 4; ++idy) {
1691 for (idx = 0; idx < width / 4; ++idx) {
1692 #if CONFIG_VP9_HIGHBITDEPTH
1693 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1694 #endif
1695 int64_t ssz, rd, rd1, rd2;
1696 tran_low_t *coeff, *qcoeff, *dqcoeff;
1697 uint16_t *eob;
1698 int coeff_ctx;
1699 k += (idy * 2 + idx);
1700 coeff_ctx = combine_entropy_contexts(ta[k & 1], tl[k >> 1]);
1701 coeff = BLOCK_OFFSET(p->coeff, k);
1702 qcoeff = BLOCK_OFFSET(p->qcoeff, k);
1703 dqcoeff = BLOCK_OFFSET(pd->dqcoeff, k);
1704 eob = &p->eobs[k];
1705
1706 x->fwd_txfm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1707 coeff, 8);
1708 #if CONFIG_VP9_HIGHBITDEPTH
1709 vpx_highbd_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob,
1710 so);
1711 thisdistortion += vp9_highbd_block_error_dispatch(
1712 coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd);
1713 #else
1714 vpx_quantize_b(coeff, 4 * 4, p, qcoeff, dqcoeff, pd->dequant, eob, so);
1715 thisdistortion +=
1716 vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz);
1717 #endif // CONFIG_VP9_HIGHBITDEPTH
1718 thissse += ssz;
1719 thisrate += cost_coeffs(x, 0, k, TX_4X4, coeff_ctx, so->scan,
1720 so->neighbors, cpi->sf.use_fast_coef_costing);
1721 ta[k & 1] = tl[k >> 1] = (x->plane[0].eobs[k] > 0) ? 1 : 0;
1722 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1723 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1724 rd = VPXMIN(rd1, rd2);
1725 if (rd >= best_yrd) return INT64_MAX;
1726 }
1727 }
1728
1729 *distortion = thisdistortion >> 2;
1730 *labelyrate = thisrate;
1731 *sse = thissse >> 2;
1732
1733 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1734 }
1735 #endif // !CONFIG_REALTIME_ONLY
1736
1737 typedef struct {
1738 int eobs;
1739 int brate;
1740 int byrate;
1741 int64_t bdist;
1742 int64_t bsse;
1743 int64_t brdcost;
1744 int_mv mvs[2];
1745 ENTROPY_CONTEXT ta[2];
1746 ENTROPY_CONTEXT tl[2];
1747 } SEG_RDSTAT;
1748
1749 typedef struct {
1750 int_mv *ref_mv[2];
1751 int_mv mvp;
1752
1753 int64_t segment_rd;
1754 int r;
1755 int64_t d;
1756 int64_t sse;
1757 int segment_yrate;
1758 PREDICTION_MODE modes[4];
1759 SEG_RDSTAT rdstat[4][INTER_MODES];
1760 int mvthresh;
1761 } BEST_SEG_INFO;
1762
1763 #if !CONFIG_REALTIME_ONLY
mv_check_bounds(const MvLimits * mv_limits,const MV * mv)1764 static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
1765 return (mv->row >> 3) < mv_limits->row_min ||
1766 (mv->row >> 3) > mv_limits->row_max ||
1767 (mv->col >> 3) < mv_limits->col_min ||
1768 (mv->col >> 3) > mv_limits->col_max;
1769 }
1770
mi_buf_shift(MACROBLOCK * x,int i)1771 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1772 MODE_INFO *const mi = x->e_mbd.mi[0];
1773 struct macroblock_plane *const p = &x->plane[0];
1774 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1775
1776 p->src.buf =
1777 &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1778 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1779 pd->pre[0].buf =
1780 &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
1781 if (has_second_ref(mi))
1782 pd->pre[1].buf =
1783 &pd->pre[1]
1784 .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)];
1785 }
1786
mi_buf_restore(MACROBLOCK * x,struct buf_2d orig_src,struct buf_2d orig_pre[2])1787 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1788 struct buf_2d orig_pre[2]) {
1789 MODE_INFO *mi = x->e_mbd.mi[0];
1790 x->plane[0].src = orig_src;
1791 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1792 if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1];
1793 }
1794
mv_has_subpel(const MV * mv)1795 static INLINE int mv_has_subpel(const MV *mv) {
1796 return (mv->row & 0x0F) || (mv->col & 0x0F);
1797 }
1798
1799 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1800 // TODO(aconverse): Find out if this is still productive then clean up or remove
check_best_zero_mv(const VP9_COMP * cpi,const uint8_t mode_context[MAX_REF_FRAMES],int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],int this_mode,const MV_REFERENCE_FRAME ref_frames[2])1801 static int check_best_zero_mv(const VP9_COMP *cpi,
1802 const uint8_t mode_context[MAX_REF_FRAMES],
1803 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1804 int this_mode,
1805 const MV_REFERENCE_FRAME ref_frames[2]) {
1806 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1807 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1808 (ref_frames[1] == NO_REF_FRAME ||
1809 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1810 int rfc = mode_context[ref_frames[0]];
1811 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1812 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1813 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1814
1815 if (this_mode == NEARMV) {
1816 if (c1 > c3) return 0;
1817 } else if (this_mode == NEARESTMV) {
1818 if (c2 > c3) return 0;
1819 } else {
1820 assert(this_mode == ZEROMV);
1821 if (ref_frames[1] == NO_REF_FRAME) {
1822 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1823 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1824 return 0;
1825 } else {
1826 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1827 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1828 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1829 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1830 return 0;
1831 }
1832 }
1833 }
1834 return 1;
1835 }
1836
skip_iters(int_mv iter_mvs[][2],int ite,int id)1837 static INLINE int skip_iters(int_mv iter_mvs[][2], int ite, int id) {
1838 if (ite >= 2 && iter_mvs[ite - 2][!id].as_int == iter_mvs[ite][!id].as_int) {
1839 int_mv cur_fullpel_mv, prev_fullpel_mv;
1840 cur_fullpel_mv.as_mv.row = iter_mvs[ite][id].as_mv.row >> 3;
1841 cur_fullpel_mv.as_mv.col = iter_mvs[ite][id].as_mv.col >> 3;
1842 prev_fullpel_mv.as_mv.row = iter_mvs[ite - 2][id].as_mv.row >> 3;
1843 prev_fullpel_mv.as_mv.col = iter_mvs[ite - 2][id].as_mv.col >> 3;
1844 if (cur_fullpel_mv.as_int == prev_fullpel_mv.as_int) return 1;
1845 }
1846 return 0;
1847 }
1848
1849 // Compares motion vector and mode rate of current mode and given mode.
compare_mv_mode_rate(MV this_mv,MV mode_mv,int this_mode_rate,int mode_rate,int mv_thresh)1850 static INLINE int compare_mv_mode_rate(MV this_mv, MV mode_mv,
1851 int this_mode_rate, int mode_rate,
1852 int mv_thresh) {
1853 const int mv_diff =
1854 abs(mode_mv.col - this_mv.col) + abs(mode_mv.row - this_mv.row);
1855 if (mv_diff <= mv_thresh && mode_rate < this_mode_rate) return 1;
1856 return 0;
1857 }
1858
1859 // Skips single reference inter modes NEARMV and ZEROMV based on motion vector
1860 // difference and mode rate.
skip_single_mode_based_on_mode_rate(int_mv (* mode_mv)[MAX_REF_FRAMES],int * single_mode_rate,int this_mode,int ref0,int this_mode_rate,int best_mode_index)1861 static INLINE int skip_single_mode_based_on_mode_rate(
1862 int_mv (*mode_mv)[MAX_REF_FRAMES], int *single_mode_rate, int this_mode,
1863 int ref0, int this_mode_rate, int best_mode_index) {
1864 MV this_mv = mode_mv[this_mode][ref0].as_mv;
1865 const int mv_thresh = 3;
1866
1867 // Pruning is not applicable for NEARESTMV or NEWMV modes.
1868 if (this_mode == NEARESTMV || this_mode == NEWMV) return 0;
1869 // Pruning is not done when reference frame of the mode is same as best
1870 // reference so far.
1871 if (best_mode_index > 0 &&
1872 ref0 == vp9_mode_order[best_mode_index].ref_frame[0])
1873 return 0;
1874
1875 // Check absolute mv difference and mode rate of current mode w.r.t NEARESTMV
1876 if (compare_mv_mode_rate(
1877 this_mv, mode_mv[NEARESTMV][ref0].as_mv, this_mode_rate,
1878 single_mode_rate[INTER_OFFSET(NEARESTMV)], mv_thresh))
1879 return 1;
1880
1881 // Check absolute mv difference and mode rate of current mode w.r.t NEWMV
1882 if (compare_mv_mode_rate(this_mv, mode_mv[NEWMV][ref0].as_mv, this_mode_rate,
1883 single_mode_rate[INTER_OFFSET(NEWMV)], mv_thresh))
1884 return 1;
1885
1886 // Pruning w.r.t NEARMV is applicable only for ZEROMV mode
1887 if (this_mode == NEARMV) return 0;
1888 // Check absolute mv difference and mode rate of current mode w.r.t NEARMV
1889 if (compare_mv_mode_rate(this_mv, mode_mv[NEARMV][ref0].as_mv, this_mode_rate,
1890 single_mode_rate[INTER_OFFSET(NEARMV)], mv_thresh))
1891 return 1;
1892 return 0;
1893 }
1894
1895 #define MAX_JOINT_MV_SEARCH_ITERS 4
get_joint_search_iters(int sf_level,BLOCK_SIZE bsize)1896 static INLINE int get_joint_search_iters(int sf_level, BLOCK_SIZE bsize) {
1897 int num_iters = MAX_JOINT_MV_SEARCH_ITERS; // sf_level = 0
1898 if (sf_level >= 2)
1899 num_iters = 0;
1900 else if (sf_level >= 1)
1901 num_iters = bsize < BLOCK_8X8
1902 ? 0
1903 : (bsize <= BLOCK_16X16 ? 2 : MAX_JOINT_MV_SEARCH_ITERS);
1904 return num_iters;
1905 }
1906
joint_motion_search(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int_mv * frame_mv,int mi_row,int mi_col,int_mv single_newmv[MAX_REF_FRAMES],int * rate_mv,int num_iters)1907 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
1908 int_mv *frame_mv, int mi_row, int mi_col,
1909 int_mv single_newmv[MAX_REF_FRAMES],
1910 int *rate_mv, int num_iters) {
1911 const VP9_COMMON *const cm = &cpi->common;
1912 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1913 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1914 MACROBLOCKD *xd = &x->e_mbd;
1915 MODE_INFO *mi = xd->mi[0];
1916 const int refs[2] = { mi->ref_frame[0],
1917 mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] };
1918 int_mv ref_mv[2];
1919 int_mv iter_mvs[MAX_JOINT_MV_SEARCH_ITERS][2];
1920 int ite, ref;
1921 const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
1922 struct scale_factors sf;
1923
1924 // Do joint motion search in compound mode to get more accurate mv.
1925 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1926 uint32_t last_besterr[2] = { UINT_MAX, UINT_MAX };
1927 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1928 vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]),
1929 vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1])
1930 };
1931
1932 // Prediction buffer from second frame.
1933 #if CONFIG_VP9_HIGHBITDEPTH
1934 DECLARE_ALIGNED(32, uint16_t, second_pred_alloc_16[64 * 64]);
1935 uint8_t *second_pred;
1936 #else
1937 DECLARE_ALIGNED(32, uint8_t, second_pred[64 * 64]);
1938 #endif // CONFIG_VP9_HIGHBITDEPTH
1939
1940 // Check number of iterations do not exceed the max
1941 assert(num_iters <= MAX_JOINT_MV_SEARCH_ITERS);
1942
1943 for (ref = 0; ref < 2; ++ref) {
1944 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
1945
1946 if (scaled_ref_frame[ref]) {
1947 int i;
1948 // Swap out the reference frame for a version that's been scaled to
1949 // match the resolution of the current frame, allowing the existing
1950 // motion search code to be used without additional modifications.
1951 for (i = 0; i < MAX_MB_PLANE; i++)
1952 backup_yv12[ref][i] = xd->plane[i].pre[ref];
1953 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
1954 NULL);
1955 }
1956
1957 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
1958 iter_mvs[0][ref].as_int = single_newmv[refs[ref]].as_int;
1959 }
1960
1961 // Since we have scaled the reference frames to match the size of the current
1962 // frame we must use a unit scaling factor during mode selection.
1963 #if CONFIG_VP9_HIGHBITDEPTH
1964 vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
1965 cm->height, cm->use_highbitdepth);
1966 #else
1967 vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
1968 cm->height);
1969 #endif // CONFIG_VP9_HIGHBITDEPTH
1970
1971 // Allow joint search multiple times iteratively for each reference frame
1972 // and break out of the search loop if it couldn't find a better mv.
1973 for (ite = 0; ite < num_iters; ite++) {
1974 struct buf_2d ref_yv12[2];
1975 uint32_t bestsme = UINT_MAX;
1976 int sadpb = x->sadperbit16;
1977 MV tmp_mv;
1978 int search_range = 3;
1979
1980 const MvLimits tmp_mv_limits = x->mv_limits;
1981 int id = ite % 2; // Even iterations search in the first reference frame,
1982 // odd iterations search in the second. The predictor
1983 // found for the 'other' reference frame is factored in.
1984
1985 // Skip further iterations of search if in the previous iteration, the
1986 // motion vector of the searched ref frame is unchanged, and the other ref
1987 // frame's full-pixel mv is unchanged.
1988 if (skip_iters(iter_mvs, ite, id)) break;
1989
1990 // Initialized here because of compiler problem in Visual Studio.
1991 ref_yv12[0] = xd->plane[0].pre[0];
1992 ref_yv12[1] = xd->plane[0].pre[1];
1993
1994 // Get the prediction block from the 'other' reference frame.
1995 #if CONFIG_VP9_HIGHBITDEPTH
1996 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1997 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
1998 vp9_highbd_build_inter_predictor(
1999 CONVERT_TO_SHORTPTR(ref_yv12[!id].buf), ref_yv12[!id].stride,
2000 second_pred_alloc_16, pw, &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0,
2001 kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
2002 } else {
2003 second_pred = (uint8_t *)second_pred_alloc_16;
2004 vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
2005 second_pred, pw, &frame_mv[refs[!id]].as_mv,
2006 &sf, pw, ph, 0, kernel, MV_PRECISION_Q3,
2007 mi_col * MI_SIZE, mi_row * MI_SIZE);
2008 }
2009 #else
2010 vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
2011 second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf,
2012 pw, ph, 0, kernel, MV_PRECISION_Q3,
2013 mi_col * MI_SIZE, mi_row * MI_SIZE);
2014 #endif // CONFIG_VP9_HIGHBITDEPTH
2015
2016 // Do compound motion search on the current reference frame.
2017 if (id) xd->plane[0].pre[0] = ref_yv12[id];
2018 vp9_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
2019
2020 // Use the mv result from the single mode as mv predictor.
2021 tmp_mv = frame_mv[refs[id]].as_mv;
2022
2023 tmp_mv.col >>= 3;
2024 tmp_mv.row >>= 3;
2025
2026 // Small-range full-pixel motion search.
2027 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range,
2028 &cpi->fn_ptr[bsize], &ref_mv[id].as_mv,
2029 second_pred);
2030 if (bestsme < UINT_MAX)
2031 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
2032 second_pred, &cpi->fn_ptr[bsize], 1);
2033
2034 x->mv_limits = tmp_mv_limits;
2035
2036 if (bestsme < UINT_MAX) {
2037 uint32_t dis; /* TODO: use dis in distortion calculation later. */
2038 uint32_t sse;
2039 bestsme = cpi->find_fractional_mv_step(
2040 x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
2041 x->errorperbit, &cpi->fn_ptr[bsize], 0,
2042 cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost,
2043 &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search);
2044 }
2045
2046 // Restore the pointer to the first (possibly scaled) prediction buffer.
2047 if (id) xd->plane[0].pre[0] = ref_yv12[0];
2048
2049 if (bestsme < last_besterr[id]) {
2050 frame_mv[refs[id]].as_mv = tmp_mv;
2051 last_besterr[id] = bestsme;
2052 } else {
2053 break;
2054 }
2055 if (ite < num_iters - 1) {
2056 iter_mvs[ite + 1][0].as_int = frame_mv[refs[0]].as_int;
2057 iter_mvs[ite + 1][1].as_int = frame_mv[refs[1]].as_int;
2058 }
2059 }
2060
2061 *rate_mv = 0;
2062
2063 for (ref = 0; ref < 2; ++ref) {
2064 if (scaled_ref_frame[ref]) {
2065 // Restore the prediction frame pointers to their unscaled versions.
2066 int i;
2067 for (i = 0; i < MAX_MB_PLANE; i++)
2068 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2069 }
2070
2071 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2072 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
2073 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2074 }
2075 }
2076
rd_pick_best_sub8x8_mode(VP9_COMP * cpi,MACROBLOCK * x,int_mv * best_ref_mv,int_mv * second_best_ref_mv,int64_t best_rd_so_far,int * returntotrate,int * returnyrate,int64_t * returndistortion,int * skippable,int64_t * psse,int mvthresh,int_mv seg_mvs[4][MAX_REF_FRAMES],BEST_SEG_INFO * bsi_buf,int filter_idx,int mi_row,int mi_col)2077 static int64_t rd_pick_best_sub8x8_mode(
2078 VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
2079 int_mv *second_best_ref_mv, int64_t best_rd_so_far, int *returntotrate,
2080 int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
2081 int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf,
2082 int filter_idx, int mi_row, int mi_col) {
2083 int i;
2084 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2085 MACROBLOCKD *xd = &x->e_mbd;
2086 MODE_INFO *mi = xd->mi[0];
2087 int mode_idx;
2088 int k, br = 0, idx, idy;
2089 int64_t bd = 0, block_sse = 0;
2090 PREDICTION_MODE this_mode;
2091 VP9_COMMON *cm = &cpi->common;
2092 struct macroblock_plane *const p = &x->plane[0];
2093 struct macroblockd_plane *const pd = &xd->plane[0];
2094 const int label_count = 4;
2095 int64_t this_segment_rd = 0;
2096 int label_mv_thresh;
2097 int segmentyrate = 0;
2098 const BLOCK_SIZE bsize = mi->sb_type;
2099 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
2100 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
2101 const int pw = num_4x4_blocks_wide << 2;
2102 const int ph = num_4x4_blocks_high << 2;
2103 ENTROPY_CONTEXT t_above[2], t_left[2];
2104 int subpelmv = 1, have_ref = 0;
2105 SPEED_FEATURES *const sf = &cpi->sf;
2106 const int has_second_rf = has_second_ref(mi);
2107 const int inter_mode_mask = sf->inter_mode_mask[bsize];
2108 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2109
2110 vp9_zero(*bsi);
2111
2112 bsi->segment_rd = best_rd_so_far;
2113 bsi->ref_mv[0] = best_ref_mv;
2114 bsi->ref_mv[1] = second_best_ref_mv;
2115 bsi->mvp.as_int = best_ref_mv->as_int;
2116 bsi->mvthresh = mvthresh;
2117
2118 for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV;
2119
2120 memcpy(t_above, pd->above_context, sizeof(t_above));
2121 memcpy(t_left, pd->left_context, sizeof(t_left));
2122
2123 // 64 makes this threshold really big effectively
2124 // making it so that we very rarely check mvs on
2125 // segments. setting this to 1 would make mv thresh
2126 // roughly equal to what it is for macroblocks
2127 label_mv_thresh = 1 * bsi->mvthresh / label_count;
2128
2129 // Segmentation method overheads
2130 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
2131 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
2132 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
2133 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
2134 int_mv mode_mv[MB_MODE_COUNT][2];
2135 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2136 PREDICTION_MODE mode_selected = ZEROMV;
2137 int64_t best_rd = INT64_MAX;
2138 const int block = idy * 2 + idx;
2139 int ref;
2140
2141 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2142 const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
2143 frame_mv[ZEROMV][frame].as_int = 0;
2144 vp9_append_sub8x8_mvs_for_idx(
2145 cm, xd, block, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame],
2146 &frame_mv[NEARMV][frame], mbmi_ext->mode_context);
2147 }
2148
2149 // search for the best motion vector on this segment
2150 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
2151 const struct buf_2d orig_src = x->plane[0].src;
2152 struct buf_2d orig_pre[2];
2153
2154 mode_idx = INTER_OFFSET(this_mode);
2155 bsi->rdstat[block][mode_idx].brdcost = INT64_MAX;
2156 if (!(inter_mode_mask & (1 << this_mode))) continue;
2157
2158 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
2159 this_mode, mi->ref_frame))
2160 continue;
2161
2162 memcpy(orig_pre, pd->pre, sizeof(orig_pre));
2163 memcpy(bsi->rdstat[block][mode_idx].ta, t_above,
2164 sizeof(bsi->rdstat[block][mode_idx].ta));
2165 memcpy(bsi->rdstat[block][mode_idx].tl, t_left,
2166 sizeof(bsi->rdstat[block][mode_idx].tl));
2167
2168 // motion search for newmv (single predictor case only)
2169 if (!has_second_rf && this_mode == NEWMV &&
2170 seg_mvs[block][mi->ref_frame[0]].as_int == INVALID_MV) {
2171 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
2172 int step_param = 0;
2173 uint32_t bestsme = UINT_MAX;
2174 int sadpb = x->sadperbit4;
2175 MV mvp_full;
2176 int max_mv;
2177 int cost_list[5];
2178 const MvLimits tmp_mv_limits = x->mv_limits;
2179
2180 /* Is the best so far sufficiently good that we can't justify doing
2181 * and new motion search. */
2182 if (best_rd < label_mv_thresh) break;
2183
2184 if (cpi->oxcf.mode != BEST) {
2185 // use previous block's result as next block's MV predictor.
2186 if (block > 0) {
2187 bsi->mvp.as_int = mi->bmi[block - 1].as_mv[0].as_int;
2188 if (block == 2)
2189 bsi->mvp.as_int = mi->bmi[block - 2].as_mv[0].as_int;
2190 }
2191 }
2192 if (block == 0)
2193 max_mv = x->max_mv_context[mi->ref_frame[0]];
2194 else
2195 max_mv =
2196 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
2197
2198 if (sf->mv.auto_mv_step_size && cm->show_frame) {
2199 // Take wtd average of the step_params based on the last frame's
2200 // max mv magnitude and the best ref mvs of the current block for
2201 // the given reference.
2202 step_param =
2203 (vp9_init_search_range(max_mv) + cpi->mv_step_param) / 2;
2204 } else {
2205 step_param = cpi->mv_step_param;
2206 }
2207
2208 mvp_full.row = bsi->mvp.as_mv.row >> 3;
2209 mvp_full.col = bsi->mvp.as_mv.col >> 3;
2210
2211 if (sf->adaptive_motion_search) {
2212 if (x->pred_mv[mi->ref_frame[0]].row != INT16_MAX &&
2213 x->pred_mv[mi->ref_frame[0]].col != INT16_MAX) {
2214 mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3;
2215 mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3;
2216 }
2217 step_param = VPXMAX(step_param, 8);
2218 }
2219
2220 // adjust src pointer for this block
2221 mi_buf_shift(x, block);
2222
2223 vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv);
2224
2225 bestsme = vp9_full_pixel_search(
2226 cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method,
2227 sadpb,
2228 sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
2229 &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1);
2230
2231 x->mv_limits = tmp_mv_limits;
2232
2233 if (bestsme < UINT_MAX) {
2234 uint32_t distortion;
2235 cpi->find_fractional_mv_step(
2236 x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
2237 x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,
2238 sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list),
2239 x->nmvjointcost, x->mvcost, &distortion,
2240 &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph,
2241 cpi->sf.use_accurate_subpel_search);
2242
2243 // save motion search result for use in compound prediction
2244 seg_mvs[block][mi->ref_frame[0]].as_mv = *new_mv;
2245 }
2246
2247 x->pred_mv[mi->ref_frame[0]] = *new_mv;
2248
2249 // restore src pointers
2250 mi_buf_restore(x, orig_src, orig_pre);
2251 }
2252
2253 if (has_second_rf) {
2254 if (seg_mvs[block][mi->ref_frame[1]].as_int == INVALID_MV ||
2255 seg_mvs[block][mi->ref_frame[0]].as_int == INVALID_MV)
2256 continue;
2257 }
2258
2259 if (has_second_rf && this_mode == NEWMV &&
2260 mi->interp_filter == EIGHTTAP) {
2261 // Decide number of joint motion search iterations
2262 const int num_joint_search_iters = get_joint_search_iters(
2263 cpi->sf.comp_inter_joint_search_iter_level, bsize);
2264 // adjust src pointers
2265 mi_buf_shift(x, block);
2266 if (num_joint_search_iters) {
2267 int rate_mv;
2268 joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
2269 mi_col, seg_mvs[block], &rate_mv,
2270 num_joint_search_iters);
2271 seg_mvs[block][mi->ref_frame[0]].as_int =
2272 frame_mv[this_mode][mi->ref_frame[0]].as_int;
2273 seg_mvs[block][mi->ref_frame[1]].as_int =
2274 frame_mv[this_mode][mi->ref_frame[1]].as_int;
2275 }
2276 // restore src pointers
2277 mi_buf_restore(x, orig_src, orig_pre);
2278 }
2279
2280 bsi->rdstat[block][mode_idx].brate = set_and_cost_bmi_mvs(
2281 cpi, x, xd, block, this_mode, mode_mv[this_mode], frame_mv,
2282 seg_mvs[block], bsi->ref_mv, x->nmvjointcost, x->mvcost);
2283
2284 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2285 bsi->rdstat[block][mode_idx].mvs[ref].as_int =
2286 mode_mv[this_mode][ref].as_int;
2287 if (num_4x4_blocks_wide > 1)
2288 bsi->rdstat[block + 1][mode_idx].mvs[ref].as_int =
2289 mode_mv[this_mode][ref].as_int;
2290 if (num_4x4_blocks_high > 1)
2291 bsi->rdstat[block + 2][mode_idx].mvs[ref].as_int =
2292 mode_mv[this_mode][ref].as_int;
2293 }
2294
2295 // Trap vectors that reach beyond the UMV borders
2296 if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) ||
2297 (has_second_rf &&
2298 mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv)))
2299 continue;
2300
2301 if (filter_idx > 0) {
2302 BEST_SEG_INFO *ref_bsi = bsi_buf;
2303 subpelmv = 0;
2304 have_ref = 1;
2305
2306 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2307 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
2308 have_ref &= mode_mv[this_mode][ref].as_int ==
2309 ref_bsi->rdstat[block][mode_idx].mvs[ref].as_int;
2310 }
2311
2312 if (filter_idx > 1 && !subpelmv && !have_ref) {
2313 ref_bsi = bsi_buf + 1;
2314 have_ref = 1;
2315 for (ref = 0; ref < 1 + has_second_rf; ++ref)
2316 have_ref &= mode_mv[this_mode][ref].as_int ==
2317 ref_bsi->rdstat[block][mode_idx].mvs[ref].as_int;
2318 }
2319
2320 if (!subpelmv && have_ref &&
2321 ref_bsi->rdstat[block][mode_idx].brdcost < INT64_MAX) {
2322 memcpy(&bsi->rdstat[block][mode_idx],
2323 &ref_bsi->rdstat[block][mode_idx], sizeof(SEG_RDSTAT));
2324 if (num_4x4_blocks_wide > 1)
2325 bsi->rdstat[block + 1][mode_idx].eobs =
2326 ref_bsi->rdstat[block + 1][mode_idx].eobs;
2327 if (num_4x4_blocks_high > 1)
2328 bsi->rdstat[block + 2][mode_idx].eobs =
2329 ref_bsi->rdstat[block + 2][mode_idx].eobs;
2330
2331 if (bsi->rdstat[block][mode_idx].brdcost < best_rd) {
2332 mode_selected = this_mode;
2333 best_rd = bsi->rdstat[block][mode_idx].brdcost;
2334 }
2335 continue;
2336 }
2337 }
2338
2339 bsi->rdstat[block][mode_idx].brdcost = encode_inter_mb_segment(
2340 cpi, x, bsi->segment_rd - this_segment_rd, block,
2341 &bsi->rdstat[block][mode_idx].byrate,
2342 &bsi->rdstat[block][mode_idx].bdist,
2343 &bsi->rdstat[block][mode_idx].bsse, bsi->rdstat[block][mode_idx].ta,
2344 bsi->rdstat[block][mode_idx].tl, mi_row, mi_col);
2345 if (bsi->rdstat[block][mode_idx].brdcost < INT64_MAX) {
2346 bsi->rdstat[block][mode_idx].brdcost += RDCOST(
2347 x->rdmult, x->rddiv, bsi->rdstat[block][mode_idx].brate, 0);
2348 bsi->rdstat[block][mode_idx].brate +=
2349 bsi->rdstat[block][mode_idx].byrate;
2350 bsi->rdstat[block][mode_idx].eobs = p->eobs[block];
2351 if (num_4x4_blocks_wide > 1)
2352 bsi->rdstat[block + 1][mode_idx].eobs = p->eobs[block + 1];
2353 if (num_4x4_blocks_high > 1)
2354 bsi->rdstat[block + 2][mode_idx].eobs = p->eobs[block + 2];
2355 }
2356
2357 if (bsi->rdstat[block][mode_idx].brdcost < best_rd) {
2358 mode_selected = this_mode;
2359 best_rd = bsi->rdstat[block][mode_idx].brdcost;
2360 }
2361 } /*for each 4x4 mode*/
2362
2363 if (best_rd == INT64_MAX) {
2364 int iy, midx;
2365 for (iy = block + 1; iy < 4; ++iy)
2366 for (midx = 0; midx < INTER_MODES; ++midx)
2367 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2368 bsi->segment_rd = INT64_MAX;
2369 return INT64_MAX;
2370 }
2371
2372 mode_idx = INTER_OFFSET(mode_selected);
2373 memcpy(t_above, bsi->rdstat[block][mode_idx].ta, sizeof(t_above));
2374 memcpy(t_left, bsi->rdstat[block][mode_idx].tl, sizeof(t_left));
2375
2376 set_and_cost_bmi_mvs(cpi, x, xd, block, mode_selected,
2377 mode_mv[mode_selected], frame_mv, seg_mvs[block],
2378 bsi->ref_mv, x->nmvjointcost, x->mvcost);
2379
2380 br += bsi->rdstat[block][mode_idx].brate;
2381 bd += bsi->rdstat[block][mode_idx].bdist;
2382 block_sse += bsi->rdstat[block][mode_idx].bsse;
2383 segmentyrate += bsi->rdstat[block][mode_idx].byrate;
2384 this_segment_rd += bsi->rdstat[block][mode_idx].brdcost;
2385
2386 if (this_segment_rd > bsi->segment_rd) {
2387 int iy, midx;
2388 for (iy = block + 1; iy < 4; ++iy)
2389 for (midx = 0; midx < INTER_MODES; ++midx)
2390 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2391 bsi->segment_rd = INT64_MAX;
2392 return INT64_MAX;
2393 }
2394 }
2395 } /* for each label */
2396
2397 bsi->r = br;
2398 bsi->d = bd;
2399 bsi->segment_yrate = segmentyrate;
2400 bsi->segment_rd = this_segment_rd;
2401 bsi->sse = block_sse;
2402
2403 // update the coding decisions
2404 for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode;
2405
2406 if (bsi->segment_rd > best_rd_so_far) return INT64_MAX;
2407 /* set it to the best */
2408 for (i = 0; i < 4; i++) {
2409 mode_idx = INTER_OFFSET(bsi->modes[i]);
2410 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2411 if (has_second_ref(mi))
2412 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2413 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2414 mi->bmi[i].as_mode = bsi->modes[i];
2415 }
2416
2417 /*
2418 * used to set mbmi->mv.as_int
2419 */
2420 *returntotrate = bsi->r;
2421 *returndistortion = bsi->d;
2422 *returnyrate = bsi->segment_yrate;
2423 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2424 *psse = bsi->sse;
2425 mi->mode = bsi->modes[3];
2426
2427 return bsi->segment_rd;
2428 }
2429
estimate_ref_frame_costs(const VP9_COMMON * cm,const MACROBLOCKD * xd,int segment_id,unsigned int * ref_costs_single,unsigned int * ref_costs_comp,vpx_prob * comp_mode_p)2430 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
2431 const MACROBLOCKD *xd, int segment_id,
2432 unsigned int *ref_costs_single,
2433 unsigned int *ref_costs_comp,
2434 vpx_prob *comp_mode_p) {
2435 int seg_ref_active =
2436 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
2437 if (seg_ref_active) {
2438 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2439 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2440 *comp_mode_p = 128;
2441 } else {
2442 vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2443 vpx_prob comp_inter_p = 128;
2444
2445 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2446 comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2447 *comp_mode_p = comp_inter_p;
2448 } else {
2449 *comp_mode_p = 128;
2450 }
2451
2452 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2453
2454 if (cm->reference_mode != COMPOUND_REFERENCE) {
2455 vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2456 vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2457 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2458
2459 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2460 base_cost += vp9_cost_bit(comp_inter_p, 0);
2461
2462 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2463 ref_costs_single[ALTREF_FRAME] = base_cost;
2464 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2465 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2466 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2467 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2468 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2469 } else {
2470 ref_costs_single[LAST_FRAME] = 512;
2471 ref_costs_single[GOLDEN_FRAME] = 512;
2472 ref_costs_single[ALTREF_FRAME] = 512;
2473 }
2474 if (cm->reference_mode != SINGLE_REFERENCE) {
2475 vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2476 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2477
2478 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2479 base_cost += vp9_cost_bit(comp_inter_p, 1);
2480
2481 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2482 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2483 } else {
2484 ref_costs_comp[LAST_FRAME] = 512;
2485 ref_costs_comp[GOLDEN_FRAME] = 512;
2486 }
2487 }
2488 }
2489
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int64_t comp_pred_diff[REFERENCE_MODES],int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],int skippable)2490 static void store_coding_context(
2491 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
2492 int64_t comp_pred_diff[REFERENCE_MODES],
2493 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) {
2494 MACROBLOCKD *const xd = &x->e_mbd;
2495
2496 // Take a snapshot of the coding context so it can be
2497 // restored if we decide to encode this way
2498 ctx->skip = x->skip;
2499 ctx->skippable = skippable;
2500 ctx->best_mode_index = mode_index;
2501 ctx->mic = *xd->mi[0];
2502 ctx->mbmi_ext = *x->mbmi_ext;
2503 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2504 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2505 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2506
2507 memcpy(ctx->best_filter_diff, best_filter_diff,
2508 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2509 }
2510
setup_buffer_inter(VP9_COMP * cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,int mi_row,int mi_col,int_mv frame_nearest_mv[MAX_REF_FRAMES],int_mv frame_near_mv[MAX_REF_FRAMES],struct buf_2d yv12_mb[4][MAX_MB_PLANE])2511 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2512 MV_REFERENCE_FRAME ref_frame,
2513 BLOCK_SIZE block_size, int mi_row, int mi_col,
2514 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2515 int_mv frame_near_mv[MAX_REF_FRAMES],
2516 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2517 const VP9_COMMON *cm = &cpi->common;
2518 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2519 MACROBLOCKD *const xd = &x->e_mbd;
2520 MODE_INFO *const mi = xd->mi[0];
2521 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
2522 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2523 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2524
2525 assert(yv12 != NULL);
2526
2527 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2528 // use the UV scaling factors.
2529 vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2530
2531 // Gets an initial list of candidate vectors from neighbours and orders them
2532 vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
2533 mbmi_ext->mode_context);
2534
2535 // Candidate refinement carried out at encoder and decoder
2536 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2537 &frame_nearest_mv[ref_frame],
2538 &frame_near_mv[ref_frame]);
2539
2540 // Further refinement that is encode side only to test the top few candidates
2541 // in full and choose the best as the centre point for subsequent searches.
2542 // The current implementation doesn't support scaling.
2543 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2544 vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
2545 block_size);
2546 }
2547
2548 #if CONFIG_NON_GREEDY_MV
ref_frame_to_gf_rf_idx(int ref_frame)2549 static int ref_frame_to_gf_rf_idx(int ref_frame) {
2550 if (ref_frame == GOLDEN_FRAME) {
2551 return 0;
2552 }
2553 if (ref_frame == LAST_FRAME) {
2554 return 1;
2555 }
2556 if (ref_frame == ALTREF_FRAME) {
2557 return 2;
2558 }
2559 assert(0);
2560 return -1;
2561 }
2562 #endif
2563
single_motion_search(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * tmp_mv,int * rate_mv)2564 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2565 int mi_row, int mi_col, int_mv *tmp_mv,
2566 int *rate_mv) {
2567 MACROBLOCKD *xd = &x->e_mbd;
2568 const VP9_COMMON *cm = &cpi->common;
2569 MODE_INFO *mi = xd->mi[0];
2570 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
2571 int step_param;
2572 MV mvp_full;
2573 int ref = mi->ref_frame[0];
2574 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2575 const MvLimits tmp_mv_limits = x->mv_limits;
2576 int cost_list[5];
2577 const int best_predmv_idx = x->mv_best_ref_index[ref];
2578 const YV12_BUFFER_CONFIG *scaled_ref_frame =
2579 vp9_get_scaled_ref_frame(cpi, ref);
2580 const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
2581 const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
2582 MV pred_mv[3];
2583
2584 int bestsme = INT_MAX;
2585 #if CONFIG_NON_GREEDY_MV
2586 int gf_group_idx = cpi->twopass.gf_group.index;
2587 int gf_rf_idx = ref_frame_to_gf_rf_idx(ref);
2588 BLOCK_SIZE square_bsize = get_square_block_size(bsize);
2589 int_mv nb_full_mvs[NB_MVS_NUM] = { 0 };
2590 MotionField *motion_field = vp9_motion_field_info_get_motion_field(
2591 &cpi->motion_field_info, gf_group_idx, gf_rf_idx, square_bsize);
2592 const int nb_full_mv_num =
2593 vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
2594 const int lambda = (pw * ph) / 4;
2595 assert(pw * ph == lambda << 2);
2596 #else // CONFIG_NON_GREEDY_MV
2597 int sadpb = x->sadperbit16;
2598 #endif // CONFIG_NON_GREEDY_MV
2599
2600 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2601 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
2602 pred_mv[2] = x->pred_mv[ref];
2603
2604 if (scaled_ref_frame) {
2605 int i;
2606 // Swap out the reference frame for a version that's been scaled to
2607 // match the resolution of the current frame, allowing the existing
2608 // motion search code to be used without additional modifications.
2609 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
2610
2611 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2612 }
2613
2614 // Work out the size of the first step in the mv step search.
2615 // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
2616 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
2617 // Take wtd average of the step_params based on the last frame's
2618 // max mv magnitude and that based on the best ref mvs of the current
2619 // block for the given reference.
2620 step_param =
2621 (vp9_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
2622 2;
2623 } else {
2624 step_param = cpi->mv_step_param;
2625 }
2626
2627 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
2628 const int boffset =
2629 2 * (b_width_log2_lookup[BLOCK_64X64] -
2630 VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
2631 step_param = VPXMAX(step_param, boffset);
2632 }
2633
2634 if (cpi->sf.adaptive_motion_search) {
2635 int bwl = b_width_log2_lookup[bsize];
2636 int bhl = b_height_log2_lookup[bsize];
2637 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2638
2639 if (tlevel < 5) step_param += 2;
2640
2641 // prev_mv_sad is not setup for dynamically scaled frames.
2642 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
2643 int i;
2644 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
2645 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2646 x->pred_mv[ref].row = INT16_MAX;
2647 x->pred_mv[ref].col = INT16_MAX;
2648 tmp_mv->as_int = INVALID_MV;
2649
2650 if (scaled_ref_frame) {
2651 int j;
2652 for (j = 0; j < MAX_MB_PLANE; ++j)
2653 xd->plane[j].pre[0] = backup_yv12[j];
2654 }
2655 return;
2656 }
2657 }
2658 }
2659 }
2660
2661 // Note: MV limits are modified here. Always restore the original values
2662 // after full-pixel motion search.
2663 vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
2664
2665 mvp_full = pred_mv[best_predmv_idx];
2666 mvp_full.col >>= 3;
2667 mvp_full.row >>= 3;
2668
2669 #if CONFIG_NON_GREEDY_MV
2670 bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param,
2671 lambda, 1, nb_full_mvs, nb_full_mv_num,
2672 &tmp_mv->as_mv);
2673 #else // CONFIG_NON_GREEDY_MV
2674 bestsme = vp9_full_pixel_search(
2675 cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
2676 cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
2677 #endif // CONFIG_NON_GREEDY_MV
2678
2679 if (cpi->sf.enhanced_full_pixel_motion_search) {
2680 int i;
2681 for (i = 0; i < 3; ++i) {
2682 int this_me;
2683 MV this_mv;
2684 int diff_row;
2685 int diff_col;
2686 int step;
2687
2688 if (pred_mv[i].row == INT16_MAX || pred_mv[i].col == INT16_MAX) continue;
2689 if (i == best_predmv_idx) continue;
2690
2691 diff_row = ((int)pred_mv[i].row -
2692 pred_mv[i > 0 ? (i - 1) : best_predmv_idx].row) >>
2693 3;
2694 diff_col = ((int)pred_mv[i].col -
2695 pred_mv[i > 0 ? (i - 1) : best_predmv_idx].col) >>
2696 3;
2697 if (diff_row == 0 && diff_col == 0) continue;
2698 if (diff_row < 0) diff_row = -diff_row;
2699 if (diff_col < 0) diff_col = -diff_col;
2700 step = get_msb((diff_row + diff_col + 1) >> 1);
2701 if (step <= 0) continue;
2702
2703 mvp_full = pred_mv[i];
2704 mvp_full.col >>= 3;
2705 mvp_full.row >>= 3;
2706 #if CONFIG_NON_GREEDY_MV
2707 this_me = vp9_full_pixel_diamond_new(
2708 cpi, x, bsize, &mvp_full,
2709 VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs,
2710 nb_full_mv_num, &this_mv);
2711 #else // CONFIG_NON_GREEDY_MV
2712 this_me = vp9_full_pixel_search(
2713 cpi, x, bsize, &mvp_full,
2714 VPXMAX(step_param, MAX_MVSEARCH_STEPS - step),
2715 cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list),
2716 &ref_mv, &this_mv, INT_MAX, 1);
2717 #endif // CONFIG_NON_GREEDY_MV
2718 if (this_me < bestsme) {
2719 tmp_mv->as_mv = this_mv;
2720 bestsme = this_me;
2721 }
2722 }
2723 }
2724
2725 x->mv_limits = tmp_mv_limits;
2726
2727 if (bestsme < INT_MAX) {
2728 uint32_t dis; /* TODO: use dis in distortion calculation later. */
2729 cpi->find_fractional_mv_step(
2730 x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
2731 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
2732 cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
2733 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,
2734 cpi->sf.use_accurate_subpel_search);
2735 }
2736 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
2737 x->mvcost, MV_COST_WEIGHT);
2738
2739 x->pred_mv[ref] = tmp_mv->as_mv;
2740
2741 if (scaled_ref_frame) {
2742 int i;
2743 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2744 }
2745 }
2746
restore_dst_buf(MACROBLOCKD * xd,uint8_t * orig_dst[MAX_MB_PLANE],int orig_dst_stride[MAX_MB_PLANE])2747 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2748 uint8_t *orig_dst[MAX_MB_PLANE],
2749 int orig_dst_stride[MAX_MB_PLANE]) {
2750 int i;
2751 for (i = 0; i < MAX_MB_PLANE; i++) {
2752 xd->plane[i].dst.buf = orig_dst[i];
2753 xd->plane[i].dst.stride = orig_dst_stride[i];
2754 }
2755 }
2756
2757 // In some situations we want to discount tha pparent cost of a new motion
2758 // vector. Where there is a subtle motion field and especially where there is
2759 // low spatial complexity then it can be hard to cover the cost of a new motion
2760 // vector in a single block, even if that motion vector reduces distortion.
2761 // However, once established that vector may be usable through the nearest and
2762 // near mv modes to reduce distortion in subsequent blocks and also improve
2763 // visual quality.
discount_newmv_test(VP9_COMP * cpi,int this_mode,int_mv this_mv,int_mv (* mode_mv)[MAX_REF_FRAMES],int ref_frame,int mi_row,int mi_col,BLOCK_SIZE bsize)2764 static int discount_newmv_test(VP9_COMP *cpi, int this_mode, int_mv this_mv,
2765 int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame,
2766 int mi_row, int mi_col, BLOCK_SIZE bsize) {
2767 #if CONFIG_NON_GREEDY_MV
2768 (void)mode_mv;
2769 (void)this_mv;
2770 if (this_mode == NEWMV && bsize >= BLOCK_8X8 && cpi->tpl_ready) {
2771 const int gf_group_idx = cpi->twopass.gf_group.index;
2772 const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame);
2773 const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx];
2774 const MotionField *motion_field = vp9_motion_field_info_get_motion_field(
2775 &cpi->motion_field_info, gf_group_idx, gf_rf_idx, cpi->tpl_bsize);
2776 const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize];
2777 const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize];
2778 const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h);
2779 const int tpl_mi_col = mi_col - (mi_col % tpl_block_mi_w);
2780 const int mv_mode =
2781 tpl_frame
2782 .mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col];
2783 if (mv_mode == NEW_MV_MODE) {
2784 int_mv tpl_new_mv =
2785 vp9_motion_field_mi_get_mv(motion_field, tpl_mi_row, tpl_mi_col);
2786 int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row);
2787 int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col);
2788 if (VPXMAX(row_diff, col_diff) <= 8) {
2789 return 1;
2790 } else {
2791 return 0;
2792 }
2793 } else {
2794 return 0;
2795 }
2796 } else {
2797 return 0;
2798 }
2799 #else
2800 (void)mi_row;
2801 (void)mi_col;
2802 (void)bsize;
2803 return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
2804 (this_mv.as_int != 0) &&
2805 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
2806 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
2807 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
2808 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
2809 #endif
2810 }
2811
handle_inter_mode(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int * rate2,int64_t * distortion,int * skippable,int * rate_y,int * rate_uv,struct buf_2d * recon,int * disable_skip,int_mv (* mode_mv)[MAX_REF_FRAMES],int mi_row,int mi_col,int_mv single_newmv[MAX_REF_FRAMES],INTERP_FILTER (* single_filter)[MAX_REF_FRAMES],int (* single_skippable)[MAX_REF_FRAMES],int * single_mode_rate,int64_t * psse,const int64_t ref_best_rd,int64_t * mask_filter,int64_t filter_cache[],int best_mode_index)2812 static int64_t handle_inter_mode(
2813 VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
2814 int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
2815 struct buf_2d *recon, int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES],
2816 int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES],
2817 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2818 int (*single_skippable)[MAX_REF_FRAMES], int *single_mode_rate,
2819 int64_t *psse, const int64_t ref_best_rd, int64_t *mask_filter,
2820 int64_t filter_cache[], int best_mode_index) {
2821 VP9_COMMON *cm = &cpi->common;
2822 MACROBLOCKD *xd = &x->e_mbd;
2823 MODE_INFO *mi = xd->mi[0];
2824 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2825 const int is_comp_pred = has_second_ref(mi);
2826 const int this_mode = mi->mode;
2827 int_mv *frame_mv = mode_mv[this_mode];
2828 int i;
2829 int refs[2] = { mi->ref_frame[0],
2830 (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) };
2831 int_mv cur_mv[2];
2832 #if CONFIG_VP9_HIGHBITDEPTH
2833 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
2834 uint8_t *tmp_buf;
2835 #else
2836 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
2837 #endif // CONFIG_VP9_HIGHBITDEPTH
2838 int intpel_mv;
2839 int64_t rd, tmp_rd = INT64_MAX, best_rd = INT64_MAX;
2840 int best_needs_copy = 0;
2841 uint8_t *orig_dst[MAX_MB_PLANE];
2842 int orig_dst_stride[MAX_MB_PLANE];
2843 int rs = 0;
2844 INTERP_FILTER best_filter = SWITCHABLE;
2845 uint8_t skip_txfm[MAX_MB_PLANE << 2] = { 0 };
2846 int64_t bsse[MAX_MB_PLANE << 2] = { 0 };
2847
2848 const int bsl = mi_width_log2_lookup[bsize];
2849 const int blk_parity = (((mi_row + mi_col) >> bsl) +
2850 get_chessboard_index(cm->current_video_frame)) &
2851 0x1;
2852 const int pred_filter_search =
2853 (cpi->sf.cb_pred_filter_search >= 2) && blk_parity;
2854
2855 int skip_txfm_sb = 0;
2856 int64_t skip_sse_sb = INT64_MAX;
2857 int64_t distortion_y = 0, distortion_uv = 0;
2858
2859 #if CONFIG_VP9_HIGHBITDEPTH
2860 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2861 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
2862 } else {
2863 tmp_buf = (uint8_t *)tmp_buf16;
2864 }
2865 #endif // CONFIG_VP9_HIGHBITDEPTH
2866
2867 if (pred_filter_search) {
2868 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2869 if (xd->above_mi && is_inter_block(xd->above_mi))
2870 af = xd->above_mi->interp_filter;
2871 if (xd->left_mi && is_inter_block(xd->left_mi))
2872 lf = xd->left_mi->interp_filter;
2873
2874 if ((this_mode != NEWMV) || (af == lf)) best_filter = af;
2875 }
2876
2877 if (is_comp_pred) {
2878 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2879 frame_mv[refs[1]].as_int == INVALID_MV)
2880 return INT64_MAX;
2881
2882 if (cpi->sf.adaptive_mode_search) {
2883 if (single_filter[this_mode][refs[0]] ==
2884 single_filter[this_mode][refs[1]])
2885 best_filter = single_filter[this_mode][refs[0]];
2886 }
2887 }
2888
2889 if (this_mode == NEWMV) {
2890 int rate_mv;
2891 if (is_comp_pred) {
2892 // Decide number of joint motion search iterations
2893 const int num_joint_search_iters = get_joint_search_iters(
2894 cpi->sf.comp_inter_joint_search_iter_level, bsize);
2895
2896 // Initialize mv using single prediction mode result.
2897 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2898 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2899
2900 if (num_joint_search_iters) {
2901 #if CONFIG_COLLECT_COMPONENT_TIMING
2902 start_timing(cpi, joint_motion_search_time);
2903 #endif
2904 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col,
2905 single_newmv, &rate_mv, num_joint_search_iters);
2906 #if CONFIG_COLLECT_COMPONENT_TIMING
2907 end_timing(cpi, joint_motion_search_time);
2908 #endif
2909 } else {
2910 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2911 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
2912 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2913 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2914 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
2915 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2916 }
2917 *rate2 += rate_mv;
2918 } else {
2919 int_mv tmp_mv;
2920 #if CONFIG_COLLECT_COMPONENT_TIMING
2921 start_timing(cpi, single_motion_search_time);
2922 #endif
2923 single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
2924 #if CONFIG_COLLECT_COMPONENT_TIMING
2925 end_timing(cpi, single_motion_search_time);
2926 #endif
2927 if (tmp_mv.as_int == INVALID_MV) return INT64_MAX;
2928
2929 frame_mv[refs[0]].as_int = xd->mi[0]->bmi[0].as_mv[0].as_int =
2930 tmp_mv.as_int;
2931 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2932
2933 // Estimate the rate implications of a new mv but discount this
2934 // under certain circumstances where we want to help initiate a weak
2935 // motion field, where the distortion gain for a single block may not
2936 // be enough to overcome the cost of a new mv.
2937 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0], mi_row,
2938 mi_col, bsize)) {
2939 *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
2940 } else {
2941 *rate2 += rate_mv;
2942 }
2943 }
2944 }
2945
2946 for (i = 0; i < is_comp_pred + 1; ++i) {
2947 cur_mv[i] = frame_mv[refs[i]];
2948 // Clip "next_nearest" so that it does not extend to far out of image
2949 if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
2950
2951 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
2952 mi->mv[i].as_int = cur_mv[i].as_int;
2953 }
2954
2955 // do first prediction into the destination buffer. Do the next
2956 // prediction into a temporary buffer. Then keep track of which one
2957 // of these currently holds the best predictor, and use the other
2958 // one for future predictions. In the end, copy from tmp_buf to
2959 // dst if necessary.
2960 for (i = 0; i < MAX_MB_PLANE; i++) {
2961 orig_dst[i] = xd->plane[i].dst.buf;
2962 orig_dst_stride[i] = xd->plane[i].dst.stride;
2963 }
2964
2965 // We don't include the cost of the second reference here, because there
2966 // are only two options: Last/ARF or Golden/ARF; The second one is always
2967 // known, which is ARF.
2968 //
2969 // Under some circumstances we discount the cost of new mv mode to encourage
2970 // initiation of a motion field.
2971 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, refs[0],
2972 mi_row, mi_col, bsize)) {
2973 *rate2 +=
2974 VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]),
2975 cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]]));
2976 } else {
2977 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
2978 }
2979
2980 if (!is_comp_pred && cpi->sf.prune_single_mode_based_on_mv_diff_mode_rate) {
2981 single_mode_rate[INTER_OFFSET(this_mode)] = *rate2;
2982 // Prune NEARMV and ZEROMV modes based on motion vector difference and mode
2983 // rate.
2984 if (skip_single_mode_based_on_mode_rate(mode_mv, single_mode_rate,
2985 this_mode, refs[0], *rate2,
2986 best_mode_index)) {
2987 // Check when the single inter mode is pruned, NEARESTMV or NEWMV modes
2988 // are not early terminated. This ensures all single modes are not getting
2989 // skipped when the speed feature is enabled.
2990 assert(single_mode_rate[INTER_OFFSET(NEARESTMV)] != INT_MAX ||
2991 single_mode_rate[INTER_OFFSET(NEWMV)] != INT_MAX);
2992 return INT64_MAX;
2993 }
2994 }
2995 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
2996 mi->mode != NEARESTMV)
2997 return INT64_MAX;
2998
2999 // Are all MVs integer pel for Y and UV
3000 intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv);
3001 if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv);
3002
3003 #if CONFIG_COLLECT_COMPONENT_TIMING
3004 start_timing(cpi, interp_filter_time);
3005 #endif
3006 // Search for best switchable filter by checking the variance of
3007 // pred error irrespective of whether the filter will be used
3008 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
3009
3010 if (cm->interp_filter != BILINEAR) {
3011 // Use cb pattern for filter eval when filter is not switchable
3012 const int enable_interp_search =
3013 (cpi->sf.cb_pred_filter_search && cm->interp_filter != SWITCHABLE)
3014 ? blk_parity
3015 : 1;
3016 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
3017 best_filter = EIGHTTAP;
3018 } else if (best_filter == SWITCHABLE && enable_interp_search) {
3019 int newbest;
3020 int tmp_rate_sum = 0;
3021 int64_t tmp_dist_sum = 0;
3022
3023 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3024 int j;
3025 int64_t rs_rd;
3026 int tmp_skip_sb = 0;
3027 int64_t tmp_skip_sse = INT64_MAX;
3028 const int enable_earlyterm =
3029 cpi->sf.early_term_interp_search_plane_rd && cm->interp_filter != i;
3030 int64_t filt_best_rd;
3031
3032 mi->interp_filter = i;
3033 rs = vp9_get_switchable_rate(cpi, xd);
3034 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3035
3036 if (i > 0 && intpel_mv) {
3037 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
3038 filter_cache[i] = rd;
3039 filter_cache[SWITCHABLE_FILTERS] =
3040 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
3041 if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
3042 *mask_filter = VPXMAX(*mask_filter, rd);
3043 } else {
3044 int rate_sum = 0;
3045 int64_t dist_sum = 0;
3046 if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
3047 (cpi->sf.interp_filter_search_mask & (1 << i))) {
3048 rate_sum = INT_MAX;
3049 dist_sum = INT64_MAX;
3050 continue;
3051 }
3052
3053 if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) ||
3054 (cm->interp_filter != SWITCHABLE &&
3055 (cm->interp_filter == mi->interp_filter ||
3056 (i == 0 && intpel_mv)))) {
3057 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3058 } else {
3059 for (j = 0; j < MAX_MB_PLANE; j++) {
3060 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
3061 xd->plane[j].dst.stride = 64;
3062 }
3063 }
3064
3065 filt_best_rd =
3066 cm->interp_filter == SWITCHABLE ? (best_rd - rs_rd) : best_rd;
3067 if (build_inter_pred_model_rd_earlyterm(
3068 cpi, mi_row, mi_col, bsize, x, xd, &rate_sum, &dist_sum,
3069 &tmp_skip_sb, &tmp_skip_sse, enable_earlyterm,
3070 filt_best_rd)) {
3071 filter_cache[i] = INT64_MAX;
3072 continue;
3073 }
3074
3075 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
3076 filter_cache[i] = rd;
3077 filter_cache[SWITCHABLE_FILTERS] =
3078 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
3079 if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
3080 *mask_filter = VPXMAX(*mask_filter, rd);
3081
3082 if (i == 0 && intpel_mv) {
3083 tmp_rate_sum = rate_sum;
3084 tmp_dist_sum = dist_sum;
3085 }
3086 }
3087
3088 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
3089 if (rd / 2 > ref_best_rd) {
3090 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3091 return INT64_MAX;
3092 }
3093 }
3094 newbest = i == 0 || rd < best_rd;
3095
3096 if (newbest) {
3097 best_rd = rd;
3098 best_filter = mi->interp_filter;
3099 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
3100 best_needs_copy = !best_needs_copy;
3101 }
3102
3103 if ((cm->interp_filter == SWITCHABLE && newbest) ||
3104 (cm->interp_filter != SWITCHABLE &&
3105 cm->interp_filter == mi->interp_filter)) {
3106 tmp_rd = best_rd;
3107
3108 skip_txfm_sb = tmp_skip_sb;
3109 skip_sse_sb = tmp_skip_sse;
3110 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
3111 memcpy(bsse, x->bsse, sizeof(bsse));
3112 }
3113 }
3114 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3115 }
3116 }
3117 #if CONFIG_COLLECT_COMPONENT_TIMING
3118 end_timing(cpi, interp_filter_time);
3119 #endif
3120 // Set the appropriate filter
3121 mi->interp_filter =
3122 cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter;
3123 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0;
3124
3125 if (tmp_rd != INT64_MAX) {
3126 if (best_needs_copy) {
3127 // again temporarily set the buffers to local memory to prevent a memcpy
3128 for (i = 0; i < MAX_MB_PLANE; i++) {
3129 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
3130 xd->plane[i].dst.stride = 64;
3131 }
3132 }
3133 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
3134 } else {
3135 int tmp_rate;
3136 int64_t tmp_dist;
3137 // Handles the special case when a filter that is not in the
3138 // switchable list (ex. bilinear) is indicated at the frame level, or
3139 // skip condition holds.
3140 build_inter_pred_model_rd_earlyterm(
3141 cpi, mi_row, mi_col, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb,
3142 &skip_sse_sb, 0 /*do_earlyterm*/, INT64_MAX);
3143 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
3144 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
3145 memcpy(bsse, x->bsse, sizeof(bsse));
3146 }
3147
3148 if (!is_comp_pred) single_filter[this_mode][refs[0]] = mi->interp_filter;
3149
3150 if (cpi->sf.adaptive_mode_search)
3151 if (is_comp_pred)
3152 if (single_skippable[this_mode][refs[0]] &&
3153 single_skippable[this_mode][refs[1]])
3154 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
3155
3156 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
3157 // if current pred_error modeled rd is substantially more than the best
3158 // so far, do not bother doing full rd
3159 if (rd / 2 > ref_best_rd) {
3160 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3161 return INT64_MAX;
3162 }
3163 }
3164
3165 if (cm->interp_filter == SWITCHABLE) *rate2 += rs;
3166
3167 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
3168 memcpy(x->bsse, bsse, sizeof(bsse));
3169
3170 if (!skip_txfm_sb || xd->lossless) {
3171 int skippable_y, skippable_uv;
3172 int64_t sseuv = INT64_MAX;
3173 int64_t rdcosty = INT64_MAX;
3174
3175 // Y cost and distortion
3176 vp9_subtract_plane(x, bsize, 0);
3177 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize,
3178 ref_best_rd, recon);
3179
3180 if (*rate_y == INT_MAX) {
3181 *rate2 = INT_MAX;
3182 *distortion = INT64_MAX;
3183 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3184 return INT64_MAX;
3185 }
3186
3187 *rate2 += *rate_y;
3188 *distortion += distortion_y;
3189
3190 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3191 rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
3192
3193 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
3194 &sseuv, bsize, ref_best_rd - rdcosty)) {
3195 *rate2 = INT_MAX;
3196 *distortion = INT64_MAX;
3197 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3198 return INT64_MAX;
3199 }
3200
3201 *psse += sseuv;
3202 *rate2 += *rate_uv;
3203 *distortion += distortion_uv;
3204 *skippable = skippable_y && skippable_uv;
3205 } else {
3206 x->skip = 1;
3207 *disable_skip = 1;
3208
3209 // The cost of skip bit needs to be added.
3210 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3211
3212 *distortion = skip_sse_sb;
3213 }
3214
3215 if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable;
3216
3217 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3218 return 0; // The rate-distortion cost will be re-calculated by caller.
3219 }
3220 #endif // !CONFIG_REALTIME_ONLY
3221
vp9_rd_pick_intra_mode_sb(VP9_COMP * cpi,MACROBLOCK * x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3222 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
3223 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
3224 int64_t best_rd) {
3225 VP9_COMMON *const cm = &cpi->common;
3226 MACROBLOCKD *const xd = &x->e_mbd;
3227 struct macroblockd_plane *const pd = xd->plane;
3228 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3229 int y_skip = 0, uv_skip = 0;
3230 int64_t dist_y = 0, dist_uv = 0;
3231 TX_SIZE max_uv_tx_size;
3232 x->skip_encode = 0;
3233 ctx->skip = 0;
3234 xd->mi[0]->ref_frame[0] = INTRA_FRAME;
3235 xd->mi[0]->ref_frame[1] = NO_REF_FRAME;
3236 // Initialize interp_filter here so we do not have to check for inter block
3237 // modes in get_pred_context_switchable_interp()
3238 xd->mi[0]->interp_filter = SWITCHABLE_FILTERS;
3239
3240 if (bsize >= BLOCK_8X8) {
3241 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3242 &y_skip, bsize, best_rd) >= best_rd) {
3243 rd_cost->rate = INT_MAX;
3244 return;
3245 }
3246 } else {
3247 y_skip = 0;
3248 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3249 &dist_y, best_rd) >= best_rd) {
3250 rd_cost->rate = INT_MAX;
3251 return;
3252 }
3253 }
3254 max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->tx_size]
3255 [pd[1].subsampling_x][pd[1].subsampling_y];
3256 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv,
3257 &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size);
3258
3259 if (y_skip && uv_skip) {
3260 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3261 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3262 rd_cost->dist = dist_y + dist_uv;
3263 } else {
3264 rd_cost->rate =
3265 rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3266 rd_cost->dist = dist_y + dist_uv;
3267 }
3268
3269 ctx->mic = *xd->mi[0];
3270 ctx->mbmi_ext = *x->mbmi_ext;
3271 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
3272 }
3273
3274 #if !CONFIG_REALTIME_ONLY
3275 // This function is designed to apply a bias or adjustment to an rd value based
3276 // on the relative variance of the source and reconstruction.
3277 #define LOW_VAR_THRESH 250
3278 #define VAR_MULT 250
3279 static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 250 };
3280
rd_variance_adjustment(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int64_t * this_rd,struct buf_2d * recon,MV_REFERENCE_FRAME ref_frame,MV_REFERENCE_FRAME second_ref_frame,PREDICTION_MODE this_mode)3281 static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
3282 BLOCK_SIZE bsize, int64_t *this_rd,
3283 struct buf_2d *recon,
3284 MV_REFERENCE_FRAME ref_frame,
3285 MV_REFERENCE_FRAME second_ref_frame,
3286 PREDICTION_MODE this_mode) {
3287 MACROBLOCKD *const xd = &x->e_mbd;
3288 unsigned int rec_variance;
3289 unsigned int src_variance;
3290 unsigned int src_rec_min;
3291 unsigned int var_diff = 0;
3292 unsigned int var_factor = 0;
3293 unsigned int adj_max;
3294 unsigned int low_var_thresh = LOW_VAR_THRESH;
3295 const int bw = num_8x8_blocks_wide_lookup[bsize];
3296 const int bh = num_8x8_blocks_high_lookup[bsize];
3297 vp9e_tune_content content_type = cpi->oxcf.content;
3298
3299 if (*this_rd == INT64_MAX) return;
3300
3301 #if CONFIG_VP9_HIGHBITDEPTH
3302 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3303 rec_variance = vp9_high_get_sby_variance(cpi, recon, bsize, xd->bd);
3304 src_variance =
3305 vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd);
3306 } else {
3307 rec_variance = vp9_get_sby_variance(cpi, recon, bsize);
3308 src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize);
3309 }
3310 #else
3311 rec_variance = vp9_get_sby_variance(cpi, recon, bsize);
3312 src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize);
3313 #endif // CONFIG_VP9_HIGHBITDEPTH
3314
3315 // Scale based on area in 8x8 blocks
3316 rec_variance /= (bw * bh);
3317 src_variance /= (bw * bh);
3318
3319 if (content_type == VP9E_CONTENT_FILM) {
3320 if (cpi->oxcf.pass == 2) {
3321 // Adjust low variance threshold based on estimated group noise enegry.
3322 double noise_factor =
3323 (double)cpi->twopass.gf_group.group_noise_energy / SECTION_NOISE_DEF;
3324 low_var_thresh = (unsigned int)(low_var_thresh * noise_factor);
3325
3326 if (ref_frame == INTRA_FRAME) {
3327 low_var_thresh *= 2;
3328 if (this_mode == DC_PRED) low_var_thresh *= 5;
3329 } else if (second_ref_frame > INTRA_FRAME) {
3330 low_var_thresh *= 2;
3331 }
3332 }
3333 } else {
3334 low_var_thresh = LOW_VAR_THRESH / 2;
3335 }
3336
3337 // Lower of source (raw per pixel value) and recon variance. Note that
3338 // if the source per pixel is 0 then the recon value here will not be per
3339 // pixel (see above) so will likely be much larger.
3340 src_rec_min = VPXMIN(src_variance, rec_variance);
3341
3342 if (src_rec_min > low_var_thresh) return;
3343
3344 // We care more when the reconstruction has lower variance so give this case
3345 // a stronger weighting.
3346 var_diff = (src_variance > rec_variance) ? (src_variance - rec_variance) * 2
3347 : (rec_variance - src_variance) / 2;
3348
3349 adj_max = max_var_adjust[content_type];
3350
3351 var_factor =
3352 (unsigned int)((int64_t)VAR_MULT * var_diff) / VPXMAX(1, src_variance);
3353 var_factor = VPXMIN(adj_max, var_factor);
3354
3355 if ((content_type == VP9E_CONTENT_FILM) &&
3356 ((ref_frame == INTRA_FRAME) || (second_ref_frame > INTRA_FRAME))) {
3357 var_factor *= 2;
3358 }
3359
3360 *this_rd += (*this_rd * var_factor) / 100;
3361
3362 (void)xd;
3363 }
3364 #endif // !CONFIG_REALTIME_ONLY
3365
3366 // Do we have an internal image edge (e.g. formatting bars).
vp9_internal_image_edge(VP9_COMP * cpi)3367 int vp9_internal_image_edge(VP9_COMP *cpi) {
3368 return (cpi->oxcf.pass == 2) &&
3369 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
3370 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
3371 }
3372
3373 // Checks to see if a super block is on a horizontal image edge.
3374 // In most cases this is the "real" edge unless there are formatting
3375 // bars embedded in the stream.
vp9_active_h_edge(VP9_COMP * cpi,int mi_row,int mi_step)3376 int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) {
3377 int top_edge = 0;
3378 int bottom_edge = cpi->common.mi_rows;
3379 int is_active_h_edge = 0;
3380
3381 // For two pass account for any formatting bars detected.
3382 if (cpi->oxcf.pass == 2) {
3383 TWO_PASS *twopass = &cpi->twopass;
3384 vpx_clear_system_state();
3385
3386 // The inactive region is specified in MBs not mi units.
3387 // The image edge is in the following MB row.
3388 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
3389
3390 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
3391 bottom_edge = VPXMAX(top_edge, bottom_edge);
3392 }
3393
3394 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
3395 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
3396 is_active_h_edge = 1;
3397 }
3398 return is_active_h_edge;
3399 }
3400
3401 // Checks to see if a super block is on a vertical image edge.
3402 // In most cases this is the "real" edge unless there are formatting
3403 // bars embedded in the stream.
vp9_active_v_edge(VP9_COMP * cpi,int mi_col,int mi_step)3404 int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) {
3405 int left_edge = 0;
3406 int right_edge = cpi->common.mi_cols;
3407 int is_active_v_edge = 0;
3408
3409 // For two pass account for any formatting bars detected.
3410 if (cpi->oxcf.pass == 2) {
3411 TWO_PASS *twopass = &cpi->twopass;
3412 vpx_clear_system_state();
3413
3414 // The inactive region is specified in MBs not mi units.
3415 // The image edge is in the following MB row.
3416 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
3417
3418 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
3419 right_edge = VPXMAX(left_edge, right_edge);
3420 }
3421
3422 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
3423 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
3424 is_active_v_edge = 1;
3425 }
3426 return is_active_v_edge;
3427 }
3428
3429 // Checks to see if a super block is at the edge of the active image.
3430 // In most cases this is the "real" edge unless there are formatting
3431 // bars embedded in the stream.
vp9_active_edge_sb(VP9_COMP * cpi,int mi_row,int mi_col)3432 int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) {
3433 return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
3434 vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
3435 }
3436
3437 #if !CONFIG_REALTIME_ONLY
init_frame_mv(int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES])3438 static void init_frame_mv(int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]) {
3439 for (int mode = 0; mode < MB_MODE_COUNT; ++mode) {
3440 for (int ref_frame = 0; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
3441 frame_mv[mode][ref_frame].as_int = INVALID_MV;
3442 }
3443 }
3444 }
3445
vp9_rd_pick_inter_mode_sb(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)3446 void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
3447 MACROBLOCK *x, int mi_row, int mi_col,
3448 RD_COST *rd_cost, BLOCK_SIZE bsize,
3449 PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
3450 VP9_COMMON *const cm = &cpi->common;
3451 TileInfo *const tile_info = &tile_data->tile_info;
3452 RD_OPT *const rd_opt = &cpi->rd;
3453 SPEED_FEATURES *const sf = &cpi->sf;
3454 MACROBLOCKD *const xd = &x->e_mbd;
3455 MODE_INFO *const mi = xd->mi[0];
3456 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3457 const struct segmentation *const seg = &cm->seg;
3458 PREDICTION_MODE this_mode;
3459 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3460 unsigned char segment_id = mi->segment_id;
3461 int comp_pred, i, k;
3462 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3463 struct buf_2d yv12_mb[4][MAX_MB_PLANE] = { 0 };
3464 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3465 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
3466 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
3467 int single_mode_rate[MAX_REF_FRAMES][INTER_MODES];
3468 int64_t best_rd = best_rd_so_far;
3469 int64_t best_pred_diff[REFERENCE_MODES];
3470 int64_t best_pred_rd[REFERENCE_MODES];
3471 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3472 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3473 MODE_INFO best_mbmode;
3474 int best_mode_skippable = 0;
3475 int midx, best_mode_index = -1;
3476 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3477 vpx_prob comp_mode_p;
3478 int64_t best_intra_rd = INT64_MAX;
3479 unsigned int best_pred_sse = UINT_MAX;
3480 PREDICTION_MODE best_intra_mode = DC_PRED;
3481 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3482 int64_t dist_uv[TX_SIZES];
3483 int skip_uv[TX_SIZES];
3484 PREDICTION_MODE mode_uv[TX_SIZES];
3485 const int intra_cost_penalty =
3486 vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q);
3487 int best_skip2 = 0;
3488 uint8_t ref_frame_skip_mask[2] = { 0, 1 };
3489 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
3490 int mode_skip_start = sf->mode_skip_start + 1;
3491 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3492 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
3493 int64_t mode_threshold[MAX_MODES];
3494 int8_t *tile_mode_map = tile_data->mode_map[bsize];
3495 int8_t mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid
3496 // lock mechanism involved with reads from
3497 // tile_mode_map
3498 const int mode_search_skip_flags = sf->mode_search_skip_flags;
3499 const int is_rect_partition =
3500 num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize];
3501 int64_t mask_filter = 0;
3502 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3503
3504 struct buf_2d *recon;
3505 struct buf_2d recon_buf;
3506 #if CONFIG_VP9_HIGHBITDEPTH
3507 DECLARE_ALIGNED(16, uint16_t, recon16[64 * 64]);
3508 recon_buf.buf = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH
3509 ? CONVERT_TO_BYTEPTR(recon16)
3510 : (uint8_t *)recon16;
3511 #else
3512 DECLARE_ALIGNED(16, uint8_t, recon8[64 * 64]);
3513 recon_buf.buf = recon8;
3514 #endif // CONFIG_VP9_HIGHBITDEPTH
3515 recon_buf.stride = 64;
3516 recon = cpi->oxcf.content == VP9E_CONTENT_FILM ? &recon_buf : 0;
3517
3518 vp9_zero(best_mbmode);
3519
3520 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3521
3522 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
3523
3524 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3525 &comp_mode_p);
3526
3527 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
3528 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3529 best_filter_rd[i] = INT64_MAX;
3530 for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX;
3531 for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
3532 for (i = 0; i < MB_MODE_COUNT; ++i) {
3533 for (k = 0; k < MAX_REF_FRAMES; ++k) {
3534 single_inter_filter[i][k] = SWITCHABLE;
3535 single_skippable[i][k] = 0;
3536 }
3537 }
3538
3539 rd_cost->rate = INT_MAX;
3540
3541 init_frame_mv(frame_mv);
3542
3543 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3544 x->pred_mv_sad[ref_frame] = INT_MAX;
3545 if ((cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) &&
3546 !(is_rect_partition && (ctx->skip_ref_frame_mask & (1 << ref_frame)))) {
3547 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
3548 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3549 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3550 }
3551 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3552 frame_mv[ZEROMV][ref_frame].as_int = 0;
3553 }
3554
3555 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3556 if (!(cpi->ref_frame_flags & ref_frame_to_flag(ref_frame))) {
3557 // Skip checking missing references in both single and compound reference
3558 // modes. Note that a mode will be skipped if both reference frames
3559 // are masked out.
3560 ref_frame_skip_mask[0] |= (1 << ref_frame);
3561 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3562 } else if (sf->reference_masking) {
3563 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3564 // Skip fixed mv modes for poor references
3565 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3566 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3567 break;
3568 }
3569 }
3570 }
3571 // If the segment reference frame feature is enabled....
3572 // then do nothing if the current ref frame is not allowed..
3573 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3574 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3575 ref_frame_skip_mask[0] |= (1 << ref_frame);
3576 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3577 }
3578 }
3579
3580 // Disable this drop out case if the ref frame
3581 // segment level feature is enabled for this segment. This is to
3582 // prevent the possibility that we end up unable to pick any mode.
3583 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3584 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3585 // unless ARNR filtering is enabled in which case we want
3586 // an unfiltered alternative. We allow near/nearest as well
3587 // because they may result in zero-zero MVs but be cheaper.
3588 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3589 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
3590 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3591 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3592 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3593 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
3594 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3595 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
3596 }
3597 }
3598
3599 if (cpi->rc.is_src_frame_alt_ref) {
3600 if (sf->alt_ref_search_fp) {
3601 mode_skip_mask[ALTREF_FRAME] = 0;
3602 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME) & 0xff;
3603 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3604 }
3605 }
3606
3607 if (sf->alt_ref_search_fp)
3608 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
3609 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
3610 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
3611
3612 if (sf->adaptive_mode_search) {
3613 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
3614 cpi->rc.frames_since_golden >= 3)
3615 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
3616 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
3617 }
3618
3619 if (bsize > sf->max_intra_bsize && cpi->ref_frame_flags != 0) {
3620 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
3621 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
3622 }
3623
3624 mode_skip_mask[INTRA_FRAME] |=
3625 (uint16_t) ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
3626
3627 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
3628
3629 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3630 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
3631
3632 midx = sf->schedule_mode_search ? mode_skip_start : 0;
3633
3634 while (midx > 4) {
3635 uint8_t end_pos = 0;
3636 for (i = 5; i < midx; ++i) {
3637 if (mode_threshold[tile_mode_map[i - 1]] >
3638 mode_threshold[tile_mode_map[i]]) {
3639 uint8_t tmp = tile_mode_map[i];
3640 tile_mode_map[i] = tile_mode_map[i - 1];
3641 tile_mode_map[i - 1] = tmp;
3642 end_pos = i;
3643 }
3644 }
3645 midx = end_pos;
3646 }
3647
3648 memcpy(mode_map, tile_mode_map, sizeof(mode_map));
3649
3650 for (midx = 0; midx < MAX_MODES; ++midx) {
3651 int mode_index = mode_map[midx];
3652 int mode_excluded = 0;
3653 int64_t this_rd = INT64_MAX;
3654 int disable_skip = 0;
3655 int compmode_cost = 0;
3656 int rate2 = 0, rate_y = 0, rate_uv = 0;
3657 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3658 int skippable = 0;
3659 int this_skip2 = 0;
3660 int64_t total_sse = INT64_MAX;
3661 int early_term = 0;
3662
3663 this_mode = vp9_mode_order[mode_index].mode;
3664 ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3665 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3666
3667 vp9_zero(x->sum_y_eobs);
3668 comp_pred = second_ref_frame > INTRA_FRAME;
3669 if (!comp_pred && ref_frame != INTRA_FRAME &&
3670 sf->prune_single_mode_based_on_mv_diff_mode_rate)
3671 single_mode_rate[ref_frame][INTER_OFFSET(this_mode)] = INT_MAX;
3672
3673 if (is_rect_partition) {
3674 if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue;
3675 if (second_ref_frame > 0 &&
3676 (ctx->skip_ref_frame_mask & (1 << second_ref_frame)))
3677 continue;
3678 }
3679
3680 // Look at the reference frame of the best mode so far and set the
3681 // skip mask to look at a subset of the remaining modes.
3682 if (midx == mode_skip_start && best_mode_index >= 0) {
3683 switch (best_mbmode.ref_frame[0]) {
3684 case INTRA_FRAME: break;
3685 case LAST_FRAME: ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; break;
3686 case GOLDEN_FRAME:
3687 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
3688 break;
3689 case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break;
3690 case NO_REF_FRAME:
3691 case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
3692 }
3693 }
3694
3695 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
3696 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
3697 continue;
3698
3699 if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
3700
3701 // Test best rd so far against threshold for trying this mode.
3702 if (best_mode_skippable && sf->schedule_mode_search)
3703 mode_threshold[mode_index] <<= 1;
3704
3705 if (best_rd < mode_threshold[mode_index]) continue;
3706
3707 // This is only used in motion vector unit test.
3708 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
3709
3710 if (sf->motion_field_mode_search) {
3711 const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize],
3712 tile_info->mi_col_end - mi_col);
3713 const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize],
3714 tile_info->mi_row_end - mi_row);
3715 const int bsl = mi_width_log2_lookup[bsize];
3716 int cb_partition_search_ctrl =
3717 (((mi_row + mi_col) >> bsl) +
3718 get_chessboard_index(cm->current_video_frame)) &
3719 0x1;
3720 MODE_INFO *ref_mi;
3721 int const_motion = 1;
3722 int skip_ref_frame = !cb_partition_search_ctrl;
3723 MV_REFERENCE_FRAME rf = NO_REF_FRAME;
3724 int_mv ref_mv;
3725 ref_mv.as_int = INVALID_MV;
3726
3727 if ((mi_row - 1) >= tile_info->mi_row_start) {
3728 ref_mv = xd->mi[-xd->mi_stride]->mv[0];
3729 rf = xd->mi[-xd->mi_stride]->ref_frame[0];
3730 for (i = 0; i < mi_width; ++i) {
3731 ref_mi = xd->mi[-xd->mi_stride + i];
3732 const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
3733 (ref_frame == ref_mi->ref_frame[0]);
3734 skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
3735 }
3736 }
3737
3738 if ((mi_col - 1) >= tile_info->mi_col_start) {
3739 if (ref_mv.as_int == INVALID_MV) ref_mv = xd->mi[-1]->mv[0];
3740 if (rf == NO_REF_FRAME) rf = xd->mi[-1]->ref_frame[0];
3741 for (i = 0; i < mi_height; ++i) {
3742 ref_mi = xd->mi[i * xd->mi_stride - 1];
3743 const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
3744 (ref_frame == ref_mi->ref_frame[0]);
3745 skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
3746 }
3747 }
3748
3749 if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
3750 if (rf > INTRA_FRAME)
3751 if (ref_frame != rf) continue;
3752
3753 if (const_motion)
3754 if (this_mode == NEARMV || this_mode == ZEROMV) continue;
3755 }
3756
3757 if (comp_pred) {
3758 if (!cpi->allow_comp_inter_inter) continue;
3759
3760 if (cm->ref_frame_sign_bias[ref_frame] ==
3761 cm->ref_frame_sign_bias[second_ref_frame])
3762 continue;
3763
3764 // Skip compound inter modes if ARF is not available.
3765 if (!(cpi->ref_frame_flags & ref_frame_to_flag(second_ref_frame)))
3766 continue;
3767
3768 // Do not allow compound prediction if the segment level reference frame
3769 // feature is in use as in this case there can only be one reference.
3770 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
3771
3772 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3773 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
3774 continue;
3775
3776 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3777 } else {
3778 if (ref_frame != INTRA_FRAME)
3779 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3780 }
3781
3782 if (ref_frame == INTRA_FRAME) {
3783 if (sf->adaptive_mode_search)
3784 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
3785 continue;
3786
3787 if (this_mode != DC_PRED) {
3788 // Disable intra modes other than DC_PRED for blocks with low variance
3789 // Threshold for intra skipping based on source variance
3790 // TODO(debargha): Specialize the threshold for super block sizes
3791 const unsigned int skip_intra_var_thresh =
3792 (cpi->oxcf.content == VP9E_CONTENT_FILM) ? 0 : 64;
3793 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3794 x->source_variance < skip_intra_var_thresh)
3795 continue;
3796 // Only search the oblique modes if the best so far is
3797 // one of the neighboring directional modes
3798 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3799 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3800 if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
3801 continue;
3802 }
3803 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3804 if (conditional_skipintra(this_mode, best_intra_mode)) continue;
3805 }
3806 }
3807 } else {
3808 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
3809 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode,
3810 ref_frames))
3811 continue;
3812 }
3813
3814 mi->mode = this_mode;
3815 mi->uv_mode = DC_PRED;
3816 mi->ref_frame[0] = ref_frame;
3817 mi->ref_frame[1] = second_ref_frame;
3818 // Evaluate all sub-pel filters irrespective of whether we can use
3819 // them for this frame.
3820 mi->interp_filter =
3821 cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
3822 mi->mv[0].as_int = mi->mv[1].as_int = 0;
3823
3824 x->skip = 0;
3825 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3826
3827 // Select prediction reference frames.
3828 for (i = 0; i < MAX_MB_PLANE; i++) {
3829 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3830 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3831 }
3832
3833 if (ref_frame == INTRA_FRAME) {
3834 TX_SIZE uv_tx;
3835 struct macroblockd_plane *const pd = &xd->plane[1];
3836 #if CONFIG_COLLECT_COMPONENT_TIMING
3837 start_timing(cpi, intra_mode_search_time);
3838 #endif
3839 memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
3840 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
3841 best_rd, recon);
3842 #if CONFIG_COLLECT_COMPONENT_TIMING
3843 end_timing(cpi, intra_mode_search_time);
3844 #endif
3845 if (rate_y == INT_MAX) continue;
3846
3847 uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x]
3848 [pd->subsampling_y];
3849 #if CONFIG_COLLECT_COMPONENT_TIMING
3850 start_timing(cpi, intra_mode_search_time);
3851 #endif
3852 if (rate_uv_intra[uv_tx] == INT_MAX) {
3853 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
3854 &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
3855 &skip_uv[uv_tx], &mode_uv[uv_tx]);
3856 }
3857 #if CONFIG_COLLECT_COMPONENT_TIMING
3858 end_timing(cpi, intra_mode_search_time);
3859 #endif
3860 rate_uv = rate_uv_tokenonly[uv_tx];
3861 distortion_uv = dist_uv[uv_tx];
3862 skippable = skippable && skip_uv[uv_tx];
3863 mi->uv_mode = mode_uv[uv_tx];
3864
3865 rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx];
3866 if (this_mode != DC_PRED && this_mode != TM_PRED)
3867 rate2 += intra_cost_penalty;
3868 distortion2 = distortion_y + distortion_uv;
3869 } else {
3870 #if CONFIG_COLLECT_COMPONENT_TIMING
3871 start_timing(cpi, handle_inter_mode_time);
3872 #endif
3873 this_rd = handle_inter_mode(
3874 cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv,
3875 recon, &disable_skip, frame_mv, mi_row, mi_col, single_newmv,
3876 single_inter_filter, single_skippable,
3877 &single_mode_rate[ref_frame][0], &total_sse, best_rd, &mask_filter,
3878 filter_cache, best_mode_index);
3879 #if CONFIG_COLLECT_COMPONENT_TIMING
3880 end_timing(cpi, handle_inter_mode_time);
3881 #endif
3882 if (this_rd == INT64_MAX) continue;
3883
3884 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3885
3886 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
3887 }
3888
3889 // Estimate the reference frame signaling cost and add it
3890 // to the rolling cost variable.
3891 if (comp_pred) {
3892 rate2 += ref_costs_comp[ref_frame];
3893 } else {
3894 rate2 += ref_costs_single[ref_frame];
3895 }
3896
3897 if (!disable_skip) {
3898 const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
3899 const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
3900 const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
3901
3902 if (skippable) {
3903 // Back out the coefficient coding costs
3904 rate2 -= (rate_y + rate_uv);
3905
3906 // Cost the skip mb case
3907 rate2 += skip_cost1;
3908 } else if (ref_frame != INTRA_FRAME && !xd->lossless &&
3909 !cpi->oxcf.sharpness) {
3910 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
3911 distortion2) <
3912 RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
3913 // Add in the cost of the no skip flag.
3914 rate2 += skip_cost0;
3915 } else {
3916 // FIXME(rbultje) make this work for splitmv also
3917 assert(total_sse >= 0);
3918
3919 rate2 += skip_cost1;
3920 distortion2 = total_sse;
3921 rate2 -= (rate_y + rate_uv);
3922 this_skip2 = 1;
3923 }
3924 } else {
3925 // Add in the cost of the no skip flag.
3926 rate2 += skip_cost0;
3927 }
3928
3929 // Calculate the final RD estimate for this mode.
3930 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3931 }
3932
3933 if (recon) {
3934 // In film mode bias against DC pred and other intra if there is a
3935 // significant difference between the variance of the sub blocks in the
3936 // the source. Also apply some bias against compound modes which also
3937 // tend to blur fine texture such as film grain over time.
3938 //
3939 // The sub block test here acts in the case where one or more sub
3940 // blocks have high relatively variance but others relatively low
3941 // variance. Here the high variance sub blocks may push the
3942 // total variance for the current block size over the thresholds
3943 // used in rd_variance_adjustment() below.
3944 if (cpi->oxcf.content == VP9E_CONTENT_FILM) {
3945 if (bsize >= BLOCK_16X16) {
3946 int min_energy, max_energy;
3947 vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
3948 &max_energy);
3949 if (max_energy > min_energy) {
3950 if (ref_frame == INTRA_FRAME) {
3951 if (this_mode == DC_PRED)
3952 this_rd += (this_rd * (max_energy - min_energy));
3953 else
3954 this_rd += (this_rd * (max_energy - min_energy)) / 4;
3955 } else if (second_ref_frame > INTRA_FRAME) {
3956 this_rd += this_rd / 4;
3957 }
3958 }
3959 }
3960 }
3961 // Apply an adjustment to the rd value based on the similarity of the
3962 // source variance and reconstructed variance.
3963 rd_variance_adjustment(cpi, x, bsize, &this_rd, recon, ref_frame,
3964 second_ref_frame, this_mode);
3965 }
3966
3967 if (ref_frame == INTRA_FRAME) {
3968 // Keep record of best intra rd
3969 if (this_rd < best_intra_rd) {
3970 best_intra_rd = this_rd;
3971 best_intra_mode = mi->mode;
3972 }
3973 }
3974
3975 if (!disable_skip && ref_frame == INTRA_FRAME) {
3976 for (i = 0; i < REFERENCE_MODES; ++i)
3977 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
3978 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3979 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
3980 }
3981
3982 // Did this mode help.. i.e. is it the new best mode
3983 if (this_rd < best_rd || x->skip) {
3984 int max_plane = MAX_MB_PLANE;
3985 if (!mode_excluded) {
3986 // Note index of best mode so far
3987 best_mode_index = mode_index;
3988
3989 if (ref_frame == INTRA_FRAME) {
3990 /* required for left and above block mv */
3991 mi->mv[0].as_int = 0;
3992 max_plane = 1;
3993 // Initialize interp_filter here so we do not have to check for
3994 // inter block modes in get_pred_context_switchable_interp()
3995 mi->interp_filter = SWITCHABLE_FILTERS;
3996 } else {
3997 best_pred_sse = x->pred_sse[ref_frame];
3998 }
3999
4000 rd_cost->rate = rate2;
4001 rd_cost->dist = distortion2;
4002 rd_cost->rdcost = this_rd;
4003 best_rd = this_rd;
4004 best_mbmode = *mi;
4005 best_skip2 = this_skip2;
4006 best_mode_skippable = skippable;
4007
4008 if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
4009 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size],
4010 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
4011 ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size];
4012
4013 // TODO(debargha): enhance this test with a better distortion prediction
4014 // based on qp, activity mask and history
4015 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4016 (mode_index > MIN_EARLY_TERM_INDEX)) {
4017 int qstep = xd->plane[0].dequant[1];
4018 // TODO(debargha): Enhance this by specializing for each mode_index
4019 int scale = 4;
4020 #if CONFIG_VP9_HIGHBITDEPTH
4021 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4022 qstep >>= (xd->bd - 8);
4023 }
4024 #endif // CONFIG_VP9_HIGHBITDEPTH
4025 if (x->source_variance < UINT_MAX) {
4026 const int var_adjust = (x->source_variance < 16);
4027 scale -= var_adjust;
4028 }
4029 if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
4030 early_term = 1;
4031 }
4032 }
4033 }
4034 }
4035
4036 /* keep record of best compound/single-only prediction */
4037 if (!disable_skip && ref_frame != INTRA_FRAME) {
4038 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4039
4040 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4041 single_rate = rate2 - compmode_cost;
4042 hybrid_rate = rate2;
4043 } else {
4044 single_rate = rate2;
4045 hybrid_rate = rate2 + compmode_cost;
4046 }
4047
4048 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4049 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4050
4051 if (!comp_pred) {
4052 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
4053 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4054 } else {
4055 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
4056 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4057 }
4058 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4059 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4060
4061 /* keep record of best filter type */
4062 if (!mode_excluded && cm->interp_filter != BILINEAR) {
4063 int64_t ref =
4064 filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
4065 : cm->interp_filter];
4066
4067 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4068 int64_t adj_rd;
4069 if (ref == INT64_MAX)
4070 adj_rd = 0;
4071 else if (filter_cache[i] == INT64_MAX)
4072 // when early termination is triggered, the encoder does not have
4073 // access to the rate-distortion cost. it only knows that the cost
4074 // should be above the maximum valid value. hence it takes the known
4075 // maximum plus an arbitrary constant as the rate-distortion cost.
4076 adj_rd = mask_filter - ref + 10;
4077 else
4078 adj_rd = filter_cache[i] - ref;
4079
4080 adj_rd += this_rd;
4081 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
4082 }
4083 }
4084 }
4085
4086 if (early_term) break;
4087
4088 if (x->skip && !comp_pred) break;
4089 }
4090
4091 // The inter modes' rate costs are not calculated precisely in some cases.
4092 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
4093 // ZEROMV. Here, checks are added for those cases, and the mode decisions
4094 // are corrected.
4095 if (best_mbmode.mode == NEWMV) {
4096 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
4097 best_mbmode.ref_frame[1] };
4098 int comp_pred_mode = refs[1] > INTRA_FRAME;
4099
4100 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
4101 ((comp_pred_mode &&
4102 frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
4103 !comp_pred_mode))
4104 best_mbmode.mode = NEARESTMV;
4105 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
4106 ((comp_pred_mode &&
4107 frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
4108 !comp_pred_mode))
4109 best_mbmode.mode = NEARMV;
4110 else if (best_mbmode.mv[0].as_int == 0 &&
4111 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) ||
4112 !comp_pred_mode))
4113 best_mbmode.mode = ZEROMV;
4114 }
4115
4116 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
4117 // If adaptive interp filter is enabled, then the current leaf node of 8x8
4118 // data is needed for sub8x8. Hence preserve the context.
4119 if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
4120 rd_cost->rate = INT_MAX;
4121 rd_cost->rdcost = INT64_MAX;
4122 return;
4123 }
4124
4125 // If we used an estimate for the uv intra rd in the loop above...
4126 if (sf->use_uv_intra_rd_estimate) {
4127 // Do Intra UV best rd mode selection if best mode choice above was intra.
4128 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4129 TX_SIZE uv_tx_size;
4130 *mi = best_mbmode;
4131 uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
4132 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4133 &rate_uv_tokenonly[uv_tx_size],
4134 &dist_uv[uv_tx_size], &skip_uv[uv_tx_size],
4135 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
4136 uv_tx_size);
4137 }
4138 }
4139
4140 assert((cm->interp_filter == SWITCHABLE) ||
4141 (cm->interp_filter == best_mbmode.interp_filter) ||
4142 !is_inter_block(&best_mbmode));
4143
4144 if (!cpi->rc.is_src_frame_alt_ref)
4145 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
4146 sf->adaptive_rd_thresh, bsize, best_mode_index);
4147
4148 // macroblock modes
4149 *mi = best_mbmode;
4150 x->skip |= best_skip2;
4151
4152 for (i = 0; i < REFERENCE_MODES; ++i) {
4153 if (best_pred_rd[i] == INT64_MAX)
4154 best_pred_diff[i] = INT_MIN;
4155 else
4156 best_pred_diff[i] = best_rd - best_pred_rd[i];
4157 }
4158
4159 if (!x->skip) {
4160 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4161 if (best_filter_rd[i] == INT64_MAX)
4162 best_filter_diff[i] = 0;
4163 else
4164 best_filter_diff[i] = best_rd - best_filter_rd[i];
4165 }
4166 if (cm->interp_filter == SWITCHABLE)
4167 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4168 } else {
4169 vp9_zero(best_filter_diff);
4170 }
4171
4172 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
4173 // updating code causes PSNR loss. Need to figure out the confliction.
4174 x->skip |= best_mode_skippable;
4175
4176 if (!x->skip && !x->select_tx_size) {
4177 int has_high_freq_coeff = 0;
4178 int plane;
4179 int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1;
4180 for (plane = 0; plane < max_plane; ++plane) {
4181 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
4182 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
4183 }
4184
4185 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
4186 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
4187 has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
4188 }
4189
4190 best_mode_skippable |= !has_high_freq_coeff;
4191 }
4192
4193 assert(best_mode_index >= 0);
4194
4195 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
4196 best_filter_diff, best_mode_skippable);
4197 }
4198
vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)4199 void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data,
4200 MACROBLOCK *x, RD_COST *rd_cost,
4201 BLOCK_SIZE bsize,
4202 PICK_MODE_CONTEXT *ctx,
4203 int64_t best_rd_so_far) {
4204 VP9_COMMON *const cm = &cpi->common;
4205 MACROBLOCKD *const xd = &x->e_mbd;
4206 MODE_INFO *const mi = xd->mi[0];
4207 unsigned char segment_id = mi->segment_id;
4208 const int comp_pred = 0;
4209 int i;
4210 int64_t best_pred_diff[REFERENCE_MODES];
4211 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4212 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4213 vpx_prob comp_mode_p;
4214 INTERP_FILTER best_filter = SWITCHABLE;
4215 int64_t this_rd = INT64_MAX;
4216 int rate2 = 0;
4217 const int64_t distortion2 = 0;
4218
4219 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
4220
4221 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4222 &comp_mode_p);
4223
4224 for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
4225 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
4226
4227 rd_cost->rate = INT_MAX;
4228
4229 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
4230
4231 mi->mode = ZEROMV;
4232 mi->uv_mode = DC_PRED;
4233 mi->ref_frame[0] = LAST_FRAME;
4234 mi->ref_frame[1] = NO_REF_FRAME;
4235 mi->mv[0].as_int = 0;
4236 x->skip = 1;
4237
4238 ctx->sum_y_eobs = 0;
4239
4240 if (cm->interp_filter != BILINEAR) {
4241 best_filter = EIGHTTAP;
4242 if (cm->interp_filter == SWITCHABLE &&
4243 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
4244 int rs;
4245 int best_rs = INT_MAX;
4246 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
4247 mi->interp_filter = i;
4248 rs = vp9_get_switchable_rate(cpi, xd);
4249 if (rs < best_rs) {
4250 best_rs = rs;
4251 best_filter = mi->interp_filter;
4252 }
4253 }
4254 }
4255 }
4256 // Set the appropriate filter
4257 if (cm->interp_filter == SWITCHABLE) {
4258 mi->interp_filter = best_filter;
4259 rate2 += vp9_get_switchable_rate(cpi, xd);
4260 } else {
4261 mi->interp_filter = cm->interp_filter;
4262 }
4263
4264 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4265 rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
4266
4267 // Estimate the reference frame signaling cost and add it
4268 // to the rolling cost variable.
4269 rate2 += ref_costs_single[LAST_FRAME];
4270 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4271
4272 rd_cost->rate = rate2;
4273 rd_cost->dist = distortion2;
4274 rd_cost->rdcost = this_rd;
4275
4276 if (this_rd >= best_rd_so_far) {
4277 rd_cost->rate = INT_MAX;
4278 rd_cost->rdcost = INT64_MAX;
4279 return;
4280 }
4281
4282 assert((cm->interp_filter == SWITCHABLE) ||
4283 (cm->interp_filter == mi->interp_filter));
4284
4285 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
4286 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
4287
4288 vp9_zero(best_pred_diff);
4289 vp9_zero(best_filter_diff);
4290
4291 if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
4292 store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, best_filter_diff, 0);
4293 }
4294
vp9_rd_pick_inter_mode_sub8x8(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)4295 void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
4296 MACROBLOCK *x, int mi_row, int mi_col,
4297 RD_COST *rd_cost, BLOCK_SIZE bsize,
4298 PICK_MODE_CONTEXT *ctx,
4299 int64_t best_rd_so_far) {
4300 VP9_COMMON *const cm = &cpi->common;
4301 RD_OPT *const rd_opt = &cpi->rd;
4302 SPEED_FEATURES *const sf = &cpi->sf;
4303 MACROBLOCKD *const xd = &x->e_mbd;
4304 MODE_INFO *const mi = xd->mi[0];
4305 const struct segmentation *const seg = &cm->seg;
4306 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
4307 unsigned char segment_id = mi->segment_id;
4308 int comp_pred, i;
4309 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
4310 struct buf_2d yv12_mb[4][MAX_MB_PLANE] = { 0 };
4311 int64_t best_rd = best_rd_so_far;
4312 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
4313 int64_t best_pred_diff[REFERENCE_MODES];
4314 int64_t best_pred_rd[REFERENCE_MODES];
4315 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
4316 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4317 MODE_INFO best_mbmode;
4318 int ref_index, best_ref_index = 0;
4319 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4320 vpx_prob comp_mode_p;
4321 INTERP_FILTER tmp_best_filter = SWITCHABLE;
4322 int rate_uv_intra, rate_uv_tokenonly;
4323 int64_t dist_uv;
4324 int skip_uv;
4325 PREDICTION_MODE mode_uv = DC_PRED;
4326 const int intra_cost_penalty =
4327 vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q);
4328 int_mv seg_mvs[4][MAX_REF_FRAMES];
4329 b_mode_info best_bmodes[4];
4330 int best_skip2 = 0;
4331 int ref_frame_skip_mask[2] = { 0 };
4332 int64_t mask_filter = 0;
4333 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
4334 int internal_active_edge =
4335 vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi);
4336 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
4337
4338 x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
4339 memset(x->zcoeff_blk[TX_4X4], 0, 4);
4340 vp9_zero(best_mbmode);
4341
4342 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
4343
4344 for (i = 0; i < 4; i++) {
4345 int j;
4346 for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV;
4347 }
4348
4349 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4350 &comp_mode_p);
4351
4352 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
4353 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4354 best_filter_rd[i] = INT64_MAX;
4355 rate_uv_intra = INT_MAX;
4356
4357 rd_cost->rate = INT_MAX;
4358
4359 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
4360 if (cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) {
4361 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
4362 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
4363 } else {
4364 ref_frame_skip_mask[0] |= (1 << ref_frame);
4365 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4366 }
4367 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
4368 frame_mv[ZEROMV][ref_frame].as_int = 0;
4369 }
4370
4371 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
4372 int mode_excluded = 0;
4373 int64_t this_rd = INT64_MAX;
4374 int disable_skip = 0;
4375 int compmode_cost = 0;
4376 int rate2 = 0, rate_y = 0, rate_uv = 0;
4377 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
4378 int skippable = 0;
4379 int this_skip2 = 0;
4380 int64_t total_sse = INT_MAX;
4381 int early_term = 0;
4382 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
4383
4384 ref_frame = vp9_ref_order[ref_index].ref_frame[0];
4385 second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
4386
4387 vp9_zero(x->sum_y_eobs);
4388
4389 #if CONFIG_BETTER_HW_COMPATIBILITY
4390 // forbid 8X4 and 4X8 partitions if any reference frame is scaled.
4391 if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
4392 int ref_scaled = ref_frame > INTRA_FRAME &&
4393 vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
4394 if (second_ref_frame > INTRA_FRAME)
4395 ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf);
4396 if (ref_scaled) continue;
4397 }
4398 #endif
4399 // Look at the reference frame of the best mode so far and set the
4400 // skip mask to look at a subset of the remaining modes.
4401 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
4402 if (ref_index == 3) {
4403 switch (best_mbmode.ref_frame[0]) {
4404 case INTRA_FRAME: break;
4405 case LAST_FRAME:
4406 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
4407 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4408 break;
4409 case GOLDEN_FRAME:
4410 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
4411 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4412 break;
4413 case ALTREF_FRAME:
4414 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
4415 break;
4416 case NO_REF_FRAME:
4417 case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
4418 }
4419 }
4420 }
4421
4422 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
4423 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
4424 continue;
4425
4426 // Test best rd so far against threshold for trying this mode.
4427 if (!internal_active_edge &&
4428 rd_less_than_thresh(best_rd,
4429 rd_opt->threshes[segment_id][bsize][ref_index],
4430 &rd_thresh_freq_fact[ref_index]))
4431 continue;
4432
4433 // This is only used in motion vector unit test.
4434 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
4435
4436 comp_pred = second_ref_frame > INTRA_FRAME;
4437 if (comp_pred) {
4438 if (!cpi->allow_comp_inter_inter) continue;
4439
4440 if (cm->ref_frame_sign_bias[ref_frame] ==
4441 cm->ref_frame_sign_bias[second_ref_frame])
4442 continue;
4443
4444 if (!(cpi->ref_frame_flags & ref_frame_to_flag(second_ref_frame)))
4445 continue;
4446 // Do not allow compound prediction if the segment level reference frame
4447 // feature is in use as in this case there can only be one reference.
4448 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
4449
4450 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
4451 best_mbmode.ref_frame[0] == INTRA_FRAME)
4452 continue;
4453 }
4454
4455 if (comp_pred)
4456 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
4457 else if (ref_frame != INTRA_FRAME)
4458 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
4459
4460 // If the segment reference frame feature is enabled....
4461 // then do nothing if the current ref frame is not allowed..
4462 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4463 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4464 continue;
4465 // Disable this drop out case if the ref frame
4466 // segment level feature is enabled for this segment. This is to
4467 // prevent the possibility that we end up unable to pick any mode.
4468 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4469 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4470 // unless ARNR filtering is enabled in which case we want
4471 // an unfiltered alternative. We allow near/nearest as well
4472 // because they may result in zero-zero MVs but be cheaper.
4473 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
4474 continue;
4475 }
4476
4477 mi->tx_size = TX_4X4;
4478 mi->uv_mode = DC_PRED;
4479 mi->ref_frame[0] = ref_frame;
4480 mi->ref_frame[1] = second_ref_frame;
4481 // Evaluate all sub-pel filters irrespective of whether we can use
4482 // them for this frame.
4483 mi->interp_filter =
4484 cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
4485 x->skip = 0;
4486 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4487
4488 // Select prediction reference frames.
4489 for (i = 0; i < MAX_MB_PLANE; i++) {
4490 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4491 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4492 }
4493
4494 if (ref_frame == INTRA_FRAME) {
4495 int rate;
4496 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y,
4497 best_rd) >= best_rd)
4498 continue;
4499 rate2 += rate;
4500 rate2 += intra_cost_penalty;
4501 distortion2 += distortion_y;
4502
4503 if (rate_uv_intra == INT_MAX) {
4504 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra,
4505 &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv);
4506 }
4507 rate2 += rate_uv_intra;
4508 rate_uv = rate_uv_tokenonly;
4509 distortion2 += dist_uv;
4510 distortion_uv = dist_uv;
4511 mi->uv_mode = mode_uv;
4512 } else {
4513 int rate;
4514 int64_t distortion;
4515 int64_t this_rd_thresh;
4516 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4517 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4518 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4519 int tmp_best_skippable = 0;
4520 int switchable_filter_index;
4521 int_mv *second_ref =
4522 comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
4523 b_mode_info tmp_best_bmodes[16];
4524 MODE_INFO tmp_best_mbmode;
4525 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4526 int pred_exists = 0;
4527 int uv_skippable;
4528
4529 YV12_BUFFER_CONFIG *scaled_ref_frame[2] = { NULL, NULL };
4530 int ref;
4531
4532 for (ref = 0; ref < 2; ++ref) {
4533 scaled_ref_frame[ref] =
4534 mi->ref_frame[ref] > INTRA_FRAME
4535 ? vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref])
4536 : NULL;
4537
4538 if (scaled_ref_frame[ref]) {
4539 // Swap out the reference frame for a version that's been scaled to
4540 // match the resolution of the current frame, allowing the existing
4541 // motion search code to be used without additional modifications.
4542 for (i = 0; i < MAX_MB_PLANE; i++)
4543 backup_yv12[ref][i] = xd->plane[i].pre[ref];
4544 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
4545 NULL);
4546 }
4547 }
4548
4549 this_rd_thresh = (ref_frame == LAST_FRAME)
4550 ? rd_opt->threshes[segment_id][bsize][THR_LAST]
4551 : rd_opt->threshes[segment_id][bsize][THR_ALTR];
4552 this_rd_thresh = (ref_frame == GOLDEN_FRAME)
4553 ? rd_opt->threshes[segment_id][bsize][THR_GOLD]
4554 : this_rd_thresh;
4555 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4556 filter_cache[i] = INT64_MAX;
4557
4558 if (cm->interp_filter != BILINEAR) {
4559 tmp_best_filter = EIGHTTAP;
4560 if (x->source_variance < sf->disable_filter_search_var_thresh) {
4561 tmp_best_filter = EIGHTTAP;
4562 } else if (sf->adaptive_pred_interp_filter == 1 &&
4563 ctx->pred_interp_filter < SWITCHABLE) {
4564 tmp_best_filter = ctx->pred_interp_filter;
4565 } else if (sf->adaptive_pred_interp_filter == 2) {
4566 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE
4567 ? ctx->pred_interp_filter
4568 : 0;
4569 } else {
4570 for (switchable_filter_index = 0;
4571 switchable_filter_index < SWITCHABLE_FILTERS;
4572 ++switchable_filter_index) {
4573 int newbest, rs;
4574 int64_t rs_rd;
4575 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
4576 mi->interp_filter = switchable_filter_index;
4577 tmp_rd = rd_pick_best_sub8x8_mode(
4578 cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
4579 &rate, &rate_y, &distortion, &skippable, &total_sse,
4580 (int)this_rd_thresh, seg_mvs, bsi, switchable_filter_index,
4581 mi_row, mi_col);
4582
4583 if (tmp_rd == INT64_MAX) continue;
4584 rs = vp9_get_switchable_rate(cpi, xd);
4585 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4586 filter_cache[switchable_filter_index] = tmp_rd;
4587 filter_cache[SWITCHABLE_FILTERS] =
4588 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
4589 if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd;
4590
4591 mask_filter = VPXMAX(mask_filter, tmp_rd);
4592
4593 newbest = (tmp_rd < tmp_best_rd);
4594 if (newbest) {
4595 tmp_best_filter = mi->interp_filter;
4596 tmp_best_rd = tmp_rd;
4597 }
4598 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4599 (mi->interp_filter == cm->interp_filter &&
4600 cm->interp_filter != SWITCHABLE)) {
4601 tmp_best_rdu = tmp_rd;
4602 tmp_best_rate = rate;
4603 tmp_best_ratey = rate_y;
4604 tmp_best_distortion = distortion;
4605 tmp_best_sse = total_sse;
4606 tmp_best_skippable = skippable;
4607 tmp_best_mbmode = *mi;
4608 x->sum_y_eobs[TX_4X4] = 0;
4609 for (i = 0; i < 4; i++) {
4610 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4611 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4612 x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i];
4613 }
4614 pred_exists = 1;
4615 if (switchable_filter_index == 0 && sf->use_rd_breakout &&
4616 best_rd < INT64_MAX) {
4617 if (tmp_best_rdu / 2 > best_rd) {
4618 // skip searching the other filters if the first is
4619 // already substantially larger than the best so far
4620 tmp_best_filter = mi->interp_filter;
4621 tmp_best_rdu = INT64_MAX;
4622 break;
4623 }
4624 }
4625 }
4626 } // switchable_filter_index loop
4627 }
4628 }
4629
4630 if (tmp_best_rdu == INT64_MAX && pred_exists) continue;
4631
4632 mi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter
4633 : cm->interp_filter);
4634 if (!pred_exists) {
4635 // Handles the special case when a filter that is not in the
4636 // switchable list (bilinear, 6-tap) is indicated at the frame level
4637 tmp_rd = rd_pick_best_sub8x8_mode(
4638 cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
4639 &rate, &rate_y, &distortion, &skippable, &total_sse,
4640 (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col);
4641 if (tmp_rd == INT64_MAX) continue;
4642 x->sum_y_eobs[TX_4X4] = 0;
4643 for (i = 0; i < 4; i++) {
4644 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4645 x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i];
4646 }
4647 } else {
4648 total_sse = tmp_best_sse;
4649 rate = tmp_best_rate;
4650 rate_y = tmp_best_ratey;
4651 distortion = tmp_best_distortion;
4652 skippable = tmp_best_skippable;
4653 *mi = tmp_best_mbmode;
4654 for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4655 }
4656
4657 rate2 += rate;
4658 distortion2 += distortion;
4659
4660 if (cm->interp_filter == SWITCHABLE)
4661 rate2 += vp9_get_switchable_rate(cpi, xd);
4662
4663 if (!mode_excluded)
4664 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4665 : cm->reference_mode == COMPOUND_REFERENCE;
4666
4667 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4668
4669 tmp_best_rdu =
4670 best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4671 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4672
4673 if (tmp_best_rdu > 0) {
4674 // If even the 'Y' rd value of split is higher than best so far
4675 // then don't bother looking at UV
4676 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8);
4677 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
4678 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4679 &uv_sse, BLOCK_8X8, tmp_best_rdu)) {
4680 for (ref = 0; ref < 2; ++ref) {
4681 if (scaled_ref_frame[ref]) {
4682 for (i = 0; i < MAX_MB_PLANE; ++i)
4683 xd->plane[i].pre[ref] = backup_yv12[ref][i];
4684 }
4685 }
4686 continue;
4687 }
4688
4689 rate2 += rate_uv;
4690 distortion2 += distortion_uv;
4691 skippable = skippable && uv_skippable;
4692 total_sse += uv_sse;
4693 }
4694
4695 for (ref = 0; ref < 2; ++ref) {
4696 if (scaled_ref_frame[ref]) {
4697 // Restore the prediction frame pointers to their unscaled versions.
4698 for (i = 0; i < MAX_MB_PLANE; ++i)
4699 xd->plane[i].pre[ref] = backup_yv12[ref][i];
4700 }
4701 }
4702 }
4703
4704 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
4705
4706 // Estimate the reference frame signaling cost and add it
4707 // to the rolling cost variable.
4708 if (second_ref_frame > INTRA_FRAME) {
4709 rate2 += ref_costs_comp[ref_frame];
4710 } else {
4711 rate2 += ref_costs_single[ref_frame];
4712 }
4713
4714 if (!disable_skip) {
4715 const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
4716 const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
4717 const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
4718
4719 // Skip is never coded at the segment level for sub8x8 blocks and instead
4720 // always coded in the bitstream at the mode info level.
4721 if (ref_frame != INTRA_FRAME && !xd->lossless) {
4722 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
4723 distortion2) <
4724 RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
4725 // Add in the cost of the no skip flag.
4726 rate2 += skip_cost0;
4727 } else {
4728 // FIXME(rbultje) make this work for splitmv also
4729 rate2 += skip_cost1;
4730 distortion2 = total_sse;
4731 assert(total_sse >= 0);
4732 rate2 -= (rate_y + rate_uv);
4733 rate_y = 0;
4734 rate_uv = 0;
4735 this_skip2 = 1;
4736 }
4737 } else {
4738 // Add in the cost of the no skip flag.
4739 rate2 += skip_cost0;
4740 }
4741
4742 // Calculate the final RD estimate for this mode.
4743 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4744 }
4745
4746 if (!disable_skip && ref_frame == INTRA_FRAME) {
4747 for (i = 0; i < REFERENCE_MODES; ++i)
4748 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
4749 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4750 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
4751 }
4752
4753 // Did this mode help.. i.e. is it the new best mode
4754 if (this_rd < best_rd || x->skip) {
4755 if (!mode_excluded) {
4756 int max_plane = MAX_MB_PLANE;
4757 // Note index of best mode so far
4758 best_ref_index = ref_index;
4759
4760 if (ref_frame == INTRA_FRAME) {
4761 /* required for left and above block mv */
4762 mi->mv[0].as_int = 0;
4763 max_plane = 1;
4764 // Initialize interp_filter here so we do not have to check for
4765 // inter block modes in get_pred_context_switchable_interp()
4766 mi->interp_filter = SWITCHABLE_FILTERS;
4767 }
4768
4769 rd_cost->rate = rate2;
4770 rd_cost->dist = distortion2;
4771 rd_cost->rdcost = this_rd;
4772 best_rd = this_rd;
4773 best_yrd =
4774 best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4775 best_mbmode = *mi;
4776 best_skip2 = this_skip2;
4777 if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
4778 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
4779 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
4780 ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4];
4781
4782 for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
4783
4784 // TODO(debargha): enhance this test with a better distortion prediction
4785 // based on qp, activity mask and history
4786 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4787 (ref_index > MIN_EARLY_TERM_INDEX)) {
4788 int qstep = xd->plane[0].dequant[1];
4789 // TODO(debargha): Enhance this by specializing for each mode_index
4790 int scale = 4;
4791 #if CONFIG_VP9_HIGHBITDEPTH
4792 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4793 qstep >>= (xd->bd - 8);
4794 }
4795 #endif // CONFIG_VP9_HIGHBITDEPTH
4796 if (x->source_variance < UINT_MAX) {
4797 const int var_adjust = (x->source_variance < 16);
4798 scale -= var_adjust;
4799 }
4800 if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
4801 early_term = 1;
4802 }
4803 }
4804 }
4805 }
4806
4807 /* keep record of best compound/single-only prediction */
4808 if (!disable_skip && ref_frame != INTRA_FRAME) {
4809 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4810
4811 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4812 single_rate = rate2 - compmode_cost;
4813 hybrid_rate = rate2;
4814 } else {
4815 single_rate = rate2;
4816 hybrid_rate = rate2 + compmode_cost;
4817 }
4818
4819 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4820 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4821
4822 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
4823 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4824 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
4825 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4826
4827 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4828 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4829 }
4830
4831 /* keep record of best filter type */
4832 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4833 cm->interp_filter != BILINEAR) {
4834 int64_t ref =
4835 filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
4836 : cm->interp_filter];
4837 int64_t adj_rd;
4838 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4839 if (ref == INT64_MAX)
4840 adj_rd = 0;
4841 else if (filter_cache[i] == INT64_MAX)
4842 // when early termination is triggered, the encoder does not have
4843 // access to the rate-distortion cost. it only knows that the cost
4844 // should be above the maximum valid value. hence it takes the known
4845 // maximum plus an arbitrary constant as the rate-distortion cost.
4846 adj_rd = mask_filter - ref + 10;
4847 else
4848 adj_rd = filter_cache[i] - ref;
4849
4850 adj_rd += this_rd;
4851 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
4852 }
4853 }
4854
4855 if (early_term) break;
4856
4857 if (x->skip && !comp_pred) break;
4858 }
4859
4860 if (best_rd >= best_rd_so_far) {
4861 rd_cost->rate = INT_MAX;
4862 rd_cost->rdcost = INT64_MAX;
4863 return;
4864 }
4865
4866 // If we used an estimate for the uv intra rd in the loop above...
4867 if (sf->use_uv_intra_rd_estimate) {
4868 // Do Intra UV best rd mode selection if best mode choice above was intra.
4869 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4870 *mi = best_mbmode;
4871 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly,
4872 &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4);
4873 }
4874 }
4875
4876 if (best_rd == INT64_MAX) {
4877 rd_cost->rate = INT_MAX;
4878 rd_cost->dist = INT64_MAX;
4879 rd_cost->rdcost = INT64_MAX;
4880 return;
4881 }
4882
4883 assert((cm->interp_filter == SWITCHABLE) ||
4884 (cm->interp_filter == best_mbmode.interp_filter) ||
4885 !is_inter_block(&best_mbmode));
4886
4887 vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh,
4888 bsize, best_ref_index);
4889
4890 // macroblock modes
4891 *mi = best_mbmode;
4892 x->skip |= best_skip2;
4893 if (!is_inter_block(&best_mbmode)) {
4894 for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4895 } else {
4896 for (i = 0; i < 4; ++i)
4897 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4898
4899 mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4900 mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4901 }
4902 // If the second reference does not exist, set the corresponding mv to zero.
4903 if (mi->ref_frame[1] == NO_REF_FRAME) {
4904 mi->mv[1].as_int = 0;
4905 for (i = 0; i < 4; ++i) {
4906 mi->bmi[i].as_mv[1].as_int = 0;
4907 }
4908 }
4909
4910 for (i = 0; i < REFERENCE_MODES; ++i) {
4911 if (best_pred_rd[i] == INT64_MAX)
4912 best_pred_diff[i] = INT_MIN;
4913 else
4914 best_pred_diff[i] = best_rd - best_pred_rd[i];
4915 }
4916
4917 if (!x->skip) {
4918 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4919 if (best_filter_rd[i] == INT64_MAX)
4920 best_filter_diff[i] = 0;
4921 else
4922 best_filter_diff[i] = best_rd - best_filter_rd[i];
4923 }
4924 if (cm->interp_filter == SWITCHABLE)
4925 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4926 } else {
4927 vp9_zero(best_filter_diff);
4928 }
4929
4930 store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff,
4931 0);
4932 }
4933 #endif // !CONFIG_REALTIME_ONLY
4934