1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38 #include "config/aom_config.h"
39
40 #define RD_THRESH_POW 1.25
41
42 // The baseline rd thresholds for breaking out of the rd loop for
43 // certain modes are assumed to be based on 8x8 blocks.
44 // This table is used to correct for block size.
45 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
46 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
47 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
48 };
49
50 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
51 [EXT_TX_SIZES] = {
52 { 1, 1, 1, 1 }, // unused
53 { 1, 1, 0, 0 },
54 { 0, 0, 1, 0 },
55 };
56
57 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
58 [EXT_TX_SIZES] = {
59 { 1, 1, 1, 1 }, // unused
60 { 1, 1, 0, 0 },
61 { 0, 0, 1, 0 },
62 { 0, 1, 1, 1 },
63 };
64
65 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
66 EXT_TX_SETS_INTER)] = {
67 {
68 // Intra
69 EXT_TX_SET_DCTONLY,
70 EXT_TX_SET_DTT4_IDTX_1DDCT,
71 EXT_TX_SET_DTT4_IDTX,
72 },
73 {
74 // Inter
75 EXT_TX_SET_DCTONLY,
76 EXT_TX_SET_ALL16,
77 EXT_TX_SET_DTT9_IDTX_1DDCT,
78 EXT_TX_SET_DCT_IDTX,
79 },
80 };
81
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)82 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
83 FRAME_CONTEXT *fc) {
84 int i, j;
85
86 for (i = 0; i < PARTITION_CONTEXTS; ++i)
87 av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
88 fc->partition_cdf[i], NULL);
89
90 if (cm->current_frame.skip_mode_info.skip_mode_flag) {
91 for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
92 av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
93 fc->skip_mode_cdfs[i], NULL);
94 }
95 }
96
97 for (i = 0; i < SKIP_CONTEXTS; ++i) {
98 av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
99 fc->skip_txfm_cdfs[i], NULL);
100 }
101
102 for (i = 0; i < KF_MODE_CONTEXTS; ++i)
103 for (j = 0; j < KF_MODE_CONTEXTS; ++j)
104 av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
105 fc->kf_y_cdf[i][j], NULL);
106
107 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
108 av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
109 NULL);
110 for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
111 for (j = 0; j < INTRA_MODES; ++j)
112 av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
113 fc->uv_mode_cdf[i][j], NULL);
114
115 av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
116 fc->filter_intra_mode_cdf, NULL);
117 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
118 if (av1_filter_intra_allowed_bsize(cm, i))
119 av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
120 fc->filter_intra_cdfs[i], NULL);
121 }
122
123 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
124 av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
125 fc->switchable_interp_cdf[i], NULL);
126
127 for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
128 av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
129 fc->palette_y_size_cdf[i], NULL);
130 av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
131 fc->palette_uv_size_cdf[i], NULL);
132 for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
133 av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
134 fc->palette_y_mode_cdf[i][j], NULL);
135 }
136 }
137
138 for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
139 av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
140 fc->palette_uv_mode_cdf[i], NULL);
141 }
142
143 for (i = 0; i < PALETTE_SIZES; ++i) {
144 for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
145 av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
146 fc->palette_y_color_index_cdf[i][j], NULL);
147 av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
148 fc->palette_uv_color_index_cdf[i][j], NULL);
149 }
150 }
151
152 int sign_cost[CFL_JOINT_SIGNS];
153 av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
154 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
155 int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
156 int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
157 if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
158 memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
159 } else {
160 const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
161 av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
162 }
163 if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
164 memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
165 } else {
166 const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
167 av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
168 }
169 for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
170 cost_u[u] += sign_cost[joint_sign];
171 }
172
173 for (i = 0; i < MAX_TX_CATS; ++i)
174 for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
175 av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
176 fc->tx_size_cdf[i][j], NULL);
177
178 for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
179 av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
180 fc->txfm_partition_cdf[i], NULL);
181 }
182
183 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
184 int s;
185 for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
186 if (use_inter_ext_tx_for_txsize[s][i]) {
187 av1_cost_tokens_from_cdf(
188 mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
189 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
190 }
191 }
192 for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
193 if (use_intra_ext_tx_for_txsize[s][i]) {
194 for (j = 0; j < INTRA_MODES; ++j) {
195 av1_cost_tokens_from_cdf(
196 mode_costs->intra_tx_type_costs[s][i][j],
197 fc->intra_ext_tx_cdf[s][i][j],
198 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
199 }
200 }
201 }
202 }
203 for (i = 0; i < DIRECTIONAL_MODES; ++i) {
204 av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
205 fc->angle_delta_cdf[i], NULL);
206 }
207 av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
208
209 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
210 av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
211 fc->seg.spatial_pred_seg_cdf[i], NULL);
212 }
213
214 for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
215 av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
216 NULL);
217 }
218
219 if (!frame_is_intra_only(cm)) {
220 for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
221 av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
222 fc->comp_inter_cdf[i], NULL);
223 }
224
225 for (i = 0; i < REF_CONTEXTS; ++i) {
226 for (j = 0; j < SINGLE_REFS - 1; ++j) {
227 av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
228 fc->single_ref_cdf[i][j], NULL);
229 }
230 }
231
232 for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
233 av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
234 fc->comp_ref_type_cdf[i], NULL);
235 }
236
237 for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
238 for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
239 av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
240 fc->uni_comp_ref_cdf[i][j], NULL);
241 }
242 }
243
244 for (i = 0; i < REF_CONTEXTS; ++i) {
245 for (j = 0; j < FWD_REFS - 1; ++j) {
246 av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
247 fc->comp_ref_cdf[i][j], NULL);
248 }
249 }
250
251 for (i = 0; i < REF_CONTEXTS; ++i) {
252 for (j = 0; j < BWD_REFS - 1; ++j) {
253 av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
254 fc->comp_bwdref_cdf[i][j], NULL);
255 }
256 }
257
258 for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
259 av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
260 fc->intra_inter_cdf[i], NULL);
261 }
262
263 for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
264 av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
265 NULL);
266 }
267
268 for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
269 av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
270 fc->zeromv_cdf[i], NULL);
271 }
272
273 for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
274 av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
275 NULL);
276 }
277
278 for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
279 av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
280 NULL);
281 }
282 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
283 av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
284 fc->inter_compound_mode_cdf[i], NULL);
285 for (i = 0; i < BLOCK_SIZES_ALL; ++i)
286 av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
287 fc->compound_type_cdf[i], NULL);
288 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
289 if (av1_is_wedge_used(i)) {
290 av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
291 fc->wedge_idx_cdf[i], NULL);
292 }
293 }
294 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
295 av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
296 fc->interintra_cdf[i], NULL);
297 av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
298 fc->interintra_mode_cdf[i], NULL);
299 }
300 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
301 av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
302 fc->wedge_interintra_cdf[i], NULL);
303 }
304 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
305 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
306 fc->motion_mode_cdf[i], NULL);
307 }
308 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
309 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
310 fc->obmc_cdf[i], NULL);
311 }
312 for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
313 av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
314 fc->compound_index_cdf[i], NULL);
315 }
316 for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
317 av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
318 fc->comp_group_idx_cdf[i], NULL);
319 }
320 }
321 }
322
323 #if !CONFIG_REALTIME_ONLY
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)324 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
325 av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
326 fc->switchable_restore_cdf, NULL);
327 av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
328 fc->wiener_restore_cdf, NULL);
329 av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
330 fc->sgrproj_restore_cdf, NULL);
331 }
332 #endif // !CONFIG_REALTIME_ONLY
333
334 // Values are now correlated to quantizer.
335 static int sad_per_bit_lut_8[QINDEX_RANGE];
336 static int sad_per_bit_lut_10[QINDEX_RANGE];
337 static int sad_per_bit_lut_12[QINDEX_RANGE];
338
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)339 static void init_me_luts_bd(int *bit16lut, int range,
340 aom_bit_depth_t bit_depth) {
341 int i;
342 // Initialize the sad lut tables using a formulaic calculation for now.
343 // This is to make it easier to resolve the impact of experimental changes
344 // to the quantizer tables.
345 for (i = 0; i < range; i++) {
346 const double q = av1_convert_qindex_to_q(i, bit_depth);
347 bit16lut[i] = (int)(0.0418 * q + 2.4107);
348 }
349 }
350
init_me_luts(void)351 static void init_me_luts(void) {
352 init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
353 init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
354 init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
355 }
356
av1_init_me_luts(void)357 void av1_init_me_luts(void) { aom_once(init_me_luts); }
358
359 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
360 8, 8, 4, 4, 2, 2, 1, 0 };
361
362 static const int rd_layer_depth_factor[7] = {
363 160, 160, 160, 160, 192, 208, 224
364 };
365
366 // Returns the default rd multiplier for inter frames for a given qindex.
367 // The function here is a first pass estimate based on data from
368 // a previous Vizer run
def_inter_rd_multiplier(int qindex)369 static double def_inter_rd_multiplier(int qindex) {
370 return 3.2 + (0.0015 * (double)qindex);
371 }
372
373 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
374 // The function here is a first pass estimate based on data from
375 // a previous Vizer run
def_arf_rd_multiplier(int qindex)376 static double def_arf_rd_multiplier(int qindex) {
377 return 3.25 + (0.0015 * (double)qindex);
378 }
379
380 // Returns the default rd multiplier for key frames for a given qindex.
381 // The function here is a first pass estimate based on data from
382 // a previous Vizer run
def_kf_rd_multiplier(int qindex)383 static double def_kf_rd_multiplier(int qindex) {
384 return 3.3 + (0.0015 * (double)qindex);
385 }
386
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)387 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
388 FRAME_UPDATE_TYPE update_type,
389 int qindex) {
390 const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
391 int64_t rdmult = q * q;
392 if (update_type == KF_UPDATE) {
393 double def_rd_q_mult = def_kf_rd_multiplier(q);
394 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395 } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
396 double def_rd_q_mult = def_arf_rd_multiplier(q);
397 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398 } else {
399 double def_rd_q_mult = def_inter_rd_multiplier(q);
400 rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
401 }
402
403 switch (bit_depth) {
404 case AOM_BITS_8: break;
405 case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
406 case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
407 default:
408 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
409 return -1;
410 }
411 return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
412 }
413
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage)414 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
415 const FRAME_UPDATE_TYPE update_type,
416 const int layer_depth, const int boost_index,
417 const FRAME_TYPE frame_type,
418 const int use_fixed_qp_offsets,
419 const int is_stat_consumption_stage) {
420 int64_t rdmult =
421 av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
422 if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
423 (frame_type != KEY_FRAME)) {
424 // Layer depth adjustment
425 rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
426 // ARF boost adjustment
427 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
428 }
429 return (int)rdmult;
430 }
431
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)432 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
433 assert(beta > 0.0);
434 int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
435 int newq = (int)rint(q / sqrt(beta));
436 int orig_qindex = qindex;
437 if (newq == q) {
438 return 0;
439 }
440 if (newq < q) {
441 while (qindex > 0) {
442 qindex--;
443 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
444 if (newq >= q) {
445 break;
446 }
447 }
448 } else {
449 while (qindex < MAXQ) {
450 qindex++;
451 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
452 if (newq <= q) {
453 break;
454 }
455 }
456 }
457 return qindex - orig_qindex;
458 }
459
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)460 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
461 int curr_qindex) {
462 curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
463 const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
464 const int deltaq_deadzone = delta_q_res / 4;
465 const int qmask = ~(delta_q_res - 1);
466 int abs_deltaq_index = abs(curr_qindex - prev_qindex);
467 abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
468 int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
469 adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
470 return adjust_qindex;
471 }
472
473 #if !CONFIG_REALTIME_ONLY
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)474 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
475 assert(beta > 0.0);
476 const AV1_COMMON *cm = &cpi->common;
477
478 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
479 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
480 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
481 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
482
483 const int qindex_rdmult = cm->quant_params.base_qindex;
484 return (int)(av1_compute_rd_mult(
485 qindex_rdmult, cm->seq_params->bit_depth,
486 cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
487 layer_depth, boost_index, frame_type,
488 cpi->oxcf.q_cfg.use_fixed_qp_offsets,
489 is_stat_consumption_stage(cpi)) /
490 beta);
491 }
492 #endif // !CONFIG_REALTIME_ONLY
493
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)494 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
495 double q;
496 switch (bit_depth) {
497 case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
498 case AOM_BITS_10:
499 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
500 break;
501 case AOM_BITS_12:
502 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
503 break;
504 default:
505 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
506 return -1;
507 }
508 // TODO(debargha): Adjust the function below.
509 return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
510 }
511
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)512 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
513 switch (cpi->common.seq_params->bit_depth) {
514 case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
515 case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
516 case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
517 default:
518 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
519 }
520 }
521
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)522 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
523 int use_nonrd_pick_mode) {
524 int i, bsize, segment_id;
525 THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
526 int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
527
528 if (use_nonrd_pick_mode) {
529 for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
530 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
531 if (ref != INTRA_FRAME) {
532 for (i = 0; i < RTC_INTER_MODES; i++)
533 mode_indices[num_modes_count++] =
534 mode_idx[ref][mode_offset(inter_mode_list[i])];
535 } else {
536 for (i = 0; i < RTC_INTRA_MODES; i++)
537 mode_indices[num_modes_count++] =
538 mode_idx[ref][mode_offset(intra_mode_list[i])];
539 }
540 }
541 }
542
543 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
544 const int qindex = clamp(
545 av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
546 cm->quant_params.y_dc_delta_q,
547 0, MAXQ);
548 const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
549
550 for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
551 // Threshold here seems unnecessarily harsh but fine given actual
552 // range of values used for cpi->sf.thresh_mult[].
553 const int t = q * rd_thresh_block_size_factor[bsize];
554 const int thresh_max = INT_MAX / t;
555
556 for (i = 0; i < num_modes_count; ++i) {
557 const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
558 rd->threshes[segment_id][bsize][mode_index] =
559 rd->thresh_mult[mode_index] < thresh_max
560 ? rd->thresh_mult[mode_index] * t / 4
561 : INT_MAX;
562 }
563 }
564 }
565 }
566
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)567 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
568 const int num_planes) {
569 const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
570 for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
571 for (int plane = 0; plane < nplanes; ++plane) {
572 LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
573
574 for (int ctx = 0; ctx < 2; ++ctx) {
575 aom_cdf_prob *pcdf;
576 switch (eob_multi_size) {
577 case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
578 case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
579 case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
580 case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
581 case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
582 case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
583 case 6:
584 default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
585 }
586 av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
587 }
588 }
589 }
590 for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
591 for (int plane = 0; plane < nplanes; ++plane) {
592 LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
593
594 for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
595 av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
596 fc->txb_skip_cdf[tx_size][ctx], NULL);
597
598 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
599 av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
600 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
601 NULL);
602 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
603 av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
604 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
605
606 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
607 pcost->base_cost[ctx][4] = 0;
608 pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
609 av1_cost_literal(1) -
610 pcost->base_cost[ctx][0];
611 pcost->base_cost[ctx][6] =
612 pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
613 pcost->base_cost[ctx][7] =
614 pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
615 }
616
617 for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
618 av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
619 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
620
621 for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
622 av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
623 fc->dc_sign_cdf[plane][ctx], NULL);
624
625 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
626 int br_rate[BR_CDF_SIZE];
627 int prev_cost = 0;
628 int i, j;
629 av1_cost_tokens_from_cdf(
630 br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
631 NULL);
632 // printf("br_rate: ");
633 // for(j = 0; j < BR_CDF_SIZE; j++)
634 // printf("%4d ", br_rate[j]);
635 // printf("\n");
636 for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
637 for (j = 0; j < BR_CDF_SIZE - 1; j++) {
638 pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
639 }
640 prev_cost += br_rate[j];
641 }
642 pcost->lps_cost[ctx][i] = prev_cost;
643 // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
644 // for (i = 0; i <= COEFF_BASE_RANGE; i++)
645 // printf("%5d ", pcost->lps_cost[ctx][i]);
646 // printf("\n");
647 }
648 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
649 pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
650 pcost->lps_cost[ctx][0];
651 for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
652 pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
653 pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
654 }
655 }
656 }
657 }
658 }
659
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)660 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
661 MvCosts *mv_costs) {
662 // Avoid accessing 'mv_costs' when it is not allocated.
663 if (mv_costs == NULL) return;
664
665 mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
666 mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
667 mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
668 mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
669 if (integer_mv) {
670 mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
671 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
672 nmvc, MV_SUBPEL_NONE);
673 } else {
674 mv_costs->mv_cost_stack =
675 usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
676 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
677 nmvc, usehp);
678 }
679 }
680
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)681 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
682 dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
683 dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
684 av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
685 MV_SUBPEL_NONE);
686 }
687
688 // Populates speed features based on codec control settings (of type
689 // COST_UPDATE_TYPE) and expected speed feature settings (of type
690 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
691 // The populated/updated speed features are used for cost updates in the
692 // encoder.
693 // WARNING: Population of unified cost update frequency needs to be taken care
694 // accordingly, in case of any modifications/additions to the enum
695 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)696 static inline void populate_unified_cost_update_freq(
697 const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
698 INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
699 // Mapping of entropy cost update frequency from the encoder's codec control
700 // settings of type COST_UPDATE_TYPE to speed features of type
701 // INTERNAL_COST_UPDATE_TYPE.
702 static const INTERNAL_COST_UPDATE_TYPE
703 map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
704 INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
705 INTERNAL_COST_UPD_OFF
706 };
707
708 inter_sf->mv_cost_upd_level =
709 AOMMIN(inter_sf->mv_cost_upd_level,
710 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
711 inter_sf->coeff_cost_upd_level =
712 AOMMIN(inter_sf->coeff_cost_upd_level,
713 map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
714 inter_sf->mode_cost_upd_level =
715 AOMMIN(inter_sf->mode_cost_upd_level,
716 map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
717 sf->intra_sf.dv_cost_upd_level =
718 AOMMIN(sf->intra_sf.dv_cost_upd_level,
719 map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
720 }
721
722 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)723 static inline int is_frame_level_cost_upd_freq_set(
724 const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
725 const int use_nonrd_pick_mode, const int frames_since_key) {
726 const int fill_costs =
727 frame_is_intra_only(cm) ||
728 (use_nonrd_pick_mode ? frames_since_key < 2
729 : (cm->current_frame.frame_number & 0x07) == 1);
730 return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
731 cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
732 }
733
734 // Decide whether we want to update the mode entropy cost for the current frame.
735 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)736 static inline int should_force_mode_cost_update(const AV1_COMP *cpi) {
737 const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
738 if (!rt_sf->frame_level_mode_cost_update) {
739 return false;
740 }
741
742 if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
743 return cpi->frames_since_last_update == 1;
744 } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
745 if (cpi->svc.number_spatial_layers == 1 &&
746 cpi->svc.number_temporal_layers == 1) {
747 const AV1_COMMON *const cm = &cpi->common;
748 const RATE_CONTROL *const rc = &cpi->rc;
749
750 return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
751 rc->high_source_sad || rc->frames_since_key < 10 ||
752 cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
753 cm->current_frame.frame_number % 8 == 0;
754 } else if (cpi->svc.number_temporal_layers > 1) {
755 return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
756 }
757 }
758
759 return false;
760 }
761
av1_initialize_rd_consts(AV1_COMP * cpi)762 void av1_initialize_rd_consts(AV1_COMP *cpi) {
763 AV1_COMMON *const cm = &cpi->common;
764 MACROBLOCK *const x = &cpi->td.mb;
765 SPEED_FEATURES *const sf = &cpi->sf;
766 RD_OPT *const rd = &cpi->rd;
767 int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
768 int frames_since_key = cpi->rc.frames_since_key;
769
770 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
771 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
772 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
773 const FRAME_TYPE frame_type = cm->current_frame.frame_type;
774
775 const int qindex_rdmult =
776 cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
777 rd->RDMULT = av1_compute_rd_mult(
778 qindex_rdmult, cm->seq_params->bit_depth,
779 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
780 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
781 is_stat_consumption_stage(cpi));
782 #if CONFIG_RD_COMMAND
783 if (cpi->oxcf.pass == 2) {
784 const RD_COMMAND *rd_command = &cpi->rd_command;
785 if (rd_command->option_ls[rd_command->frame_index] ==
786 RD_OPTION_SET_Q_RDMULT) {
787 rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
788 }
789 }
790 #endif // CONFIG_RD_COMMAND
791
792 av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
793
794 set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
795
796 populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
797 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
798 // Frame level mv cost update
799 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
800 use_nonrd_pick_mode, frames_since_key))
801 av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
802 cm->features.allow_high_precision_mv, x->mv_costs);
803
804 // Frame level coefficient cost update
805 if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
806 use_nonrd_pick_mode, frames_since_key))
807 av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
808
809 // Frame level mode cost update
810 if (should_force_mode_cost_update(cpi) ||
811 is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
812 use_nonrd_pick_mode, frames_since_key))
813 av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
814
815 // Frame level dv cost update
816 if (av1_need_dv_costs(cpi)) {
817 if (cpi->td.dv_costs_alloc == NULL) {
818 CHECK_MEM_ERROR(
819 cm, cpi->td.dv_costs_alloc,
820 (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
821 cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
822 }
823 av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
824 }
825 }
826
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)827 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
828 // NOTE: The tables below must be of the same size.
829
830 // The functions described below are sampled at the four most significant
831 // bits of x^2 + 8 / 256.
832
833 // Normalized rate:
834 // This table models the rate for a Laplacian source with given variance
835 // when quantized with a uniform quantizer with given stepsize. The
836 // closed form expression is:
837 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
838 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
839 // and H(x) is the binary entropy function.
840 static const int rate_tab_q10[] = {
841 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
842 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
843 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
844 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
845 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
846 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
847 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
848 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
849 5, 3, 2, 1, 1, 1, 0, 0,
850 };
851 // Normalized distortion:
852 // This table models the normalized distortion for a Laplacian source
853 // with given variance when quantized with a uniform quantizer
854 // with given stepsize. The closed form expression is:
855 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
856 // where x = qpstep / sqrt(variance).
857 // Note the actual distortion is Dn * variance.
858 static const int dist_tab_q10[] = {
859 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
860 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
861 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
862 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
863 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
864 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
865 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
866 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
867 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
868 };
869 static const int xsq_iq_q10[] = {
870 0, 4, 8, 12, 16, 20, 24, 28, 32,
871 40, 48, 56, 64, 72, 80, 88, 96, 112,
872 128, 144, 160, 176, 192, 208, 224, 256, 288,
873 320, 352, 384, 416, 448, 480, 544, 608, 672,
874 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
875 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
876 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
877 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
878 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
879 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
880 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
881 180192, 196576, 212960, 229344, 245728,
882 };
883 const int tmp = (xsq_q10 >> 2) + 8;
884 const int k = get_msb(tmp) - 3;
885 const int xq = (k << 3) + ((tmp >> k) & 0x7);
886 const int one_q10 = 1 << 10;
887 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
888 const int b_q10 = one_q10 - a_q10;
889 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
890 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
891 }
892
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)893 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
894 unsigned int qstep, int *rate,
895 int64_t *dist) {
896 // This function models the rate and distortion for a Laplacian
897 // source with given variance when quantized with a uniform quantizer
898 // with given stepsize. The closed form expressions are in:
899 // Hang and Chen, "Source Model for transform video coder and its
900 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
901 // Sys. for Video Tech., April 1997.
902 if (var == 0) {
903 *rate = 0;
904 *dist = 0;
905 } else {
906 int d_q10, r_q10;
907 static const uint32_t MAX_XSQ_Q10 = 245727;
908 const uint64_t xsq_q10_64 =
909 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
910 const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
911 model_rd_norm(xsq_q10, &r_q10, &d_q10);
912 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
913 *dist = (var * (int64_t)d_q10 + 512) >> 10;
914 }
915 }
916
interp_cubic(const double * p,double x)917 static double interp_cubic(const double *p, double x) {
918 return p[1] + 0.5 * x *
919 (p[2] - p[0] +
920 x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
921 x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
922 }
923
924 /*
925 static double interp_bicubic(const double *p, int p_stride, double x,
926 double y) {
927 double q[4];
928 q[0] = interp_cubic(p, x);
929 q[1] = interp_cubic(p + p_stride, x);
930 q[2] = interp_cubic(p + 2 * p_stride, x);
931 q[3] = interp_cubic(p + 3 * p_stride, x);
932 return interp_cubic(q, y);
933 }
934 */
935
936 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
937 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
938 };
939
sse_norm_curvfit_model_cat_lookup(double sse_norm)940 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
941 return (sse_norm > 16.0);
942 }
943
944 static const double interp_rgrid_curv[4][65] = {
945 {
946 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
947 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
948 0.000000, 118.257702, 120.210658, 121.434853, 122.100487,
949 122.377758, 122.436865, 72.290102, 96.974289, 101.652727,
950 126.830141, 140.417377, 157.644879, 184.315291, 215.823873,
951 262.300169, 335.919859, 420.624173, 519.185032, 619.854243,
952 726.053595, 827.663369, 933.127475, 1037.988755, 1138.839609,
953 1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
954 1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
955 2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
956 2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
957 2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
958 3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
959 },
960 {
961 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
962 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
963 0.000000, 13.087244, 15.919735, 25.930313, 24.412411,
964 28.567417, 29.924194, 30.857010, 32.742979, 36.382570,
965 39.210386, 42.265690, 47.378572, 57.014850, 82.740067,
966 137.346562, 219.968084, 316.781856, 415.643773, 516.706538,
967 614.914364, 714.303763, 815.512135, 911.210485, 1008.501528,
968 1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
969 1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
970 1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
971 2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
972 2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
973 3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
974 },
975 {
976 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
977 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
978 0.000000, 4.656893, 5.123633, 5.594132, 6.162376,
979 6.918433, 7.768444, 8.739415, 10.105862, 11.477328,
980 13.236604, 15.421030, 19.093623, 25.801871, 46.724612,
981 98.841054, 181.113466, 272.586364, 359.499769, 445.546343,
982 525.944439, 605.188743, 681.793483, 756.668359, 838.486885,
983 926.950356, 1015.482542, 1113.353926, 1204.897193, 1288.871992,
984 1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
985 1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
986 2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
987 2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
988 3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
989 },
990 {
991 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
992 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
993 0.000000, 0.337370, 0.391916, 0.468839, 0.566334,
994 0.762564, 1.069225, 1.384361, 1.787581, 2.293948,
995 3.251909, 4.412991, 8.050068, 11.606073, 27.668092,
996 65.227758, 128.463938, 202.097653, 262.715851, 312.464873,
997 355.601398, 400.609054, 447.201352, 495.761568, 552.871938,
998 619.067625, 691.984883, 773.753288, 860.628503, 946.262808,
999 1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1000 1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1001 1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1002 2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1003 3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1004 },
1005 };
1006
1007 static const double interp_dgrid_curv[3][65] = {
1008 {
1009 16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1010 15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1011 15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1012 13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1013 7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
1014 1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
1015 0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
1016 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1017 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1018 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1019 0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
1020 },
1021 {
1022 16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1023 15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1024 15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1025 13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
1026 5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
1027 1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
1028 0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
1029 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1030 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1031 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1032 0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
1033 },
1034 };
1035
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1036 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1037 double *rate_f, double *distbysse_f) {
1038 const double x_start = -15.5;
1039 const double x_end = 16.5;
1040 const double x_step = 0.5;
1041 const double epsilon = 1e-6;
1042 const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1043 const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1044 (void)x_end;
1045
1046 xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1047 xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1048 const double x = (xqr - x_start) / x_step;
1049 const int xi = (int)floor(x);
1050 const double xo = x - xi;
1051
1052 assert(xi > 0);
1053
1054 const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1055 *rate_f = interp_cubic(prate, xo);
1056 const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1057 *distbysse_f = interp_cubic(pdist, xo);
1058 }
1059
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1060 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1061 const struct macroblockd_plane *pd,
1062 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1063 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1064 const int num_4x4_w = mi_size_wide[plane_bsize];
1065 const int num_4x4_h = mi_size_high[plane_bsize];
1066 const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1067 const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1068
1069 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1070 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1071 }
1072
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1073 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1074 const struct macroblockd_plane *pd,
1075 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1076 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1077 assert(plane_bsize < BLOCK_SIZES_ALL);
1078 get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1079 }
1080
1081 // Special clamping used in the encoder when calculating a prediction
1082 //
1083 // Logically, all pixel fetches used for prediction are clamped against the
1084 // edges of the frame. But doing this directly is slow, so instead we allocate
1085 // a finite border around the frame and fill it with copies of the outermost
1086 // pixels.
1087 //
1088 // Since this border is finite, we need to clamp the motion vector before
1089 // prediction in order to avoid out-of-bounds reads. At the same time, this
1090 // clamp must not change the prediction result.
1091 //
1092 // We can balance both of these concerns by calculating how far we would have
1093 // to go in each direction before the extended prediction region (the current
1094 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1095 // so that it touches the frame only at one row or column. This is a special
1096 // point because any more extreme MV will always lead to the same prediction.
1097 // So it is safe to clamp at that point.
1098 //
1099 // In the worst case, this requires a border of
1100 // max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1101 // around the frame edges.
enc_clamp_mv(const AV1_COMMON * cm,const MACROBLOCKD * xd,MV * mv)1102 static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1103 MV *mv) {
1104 int bw = xd->width << MI_SIZE_LOG2;
1105 int bh = xd->height << MI_SIZE_LOG2;
1106
1107 int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1108 int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1109 int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1110 int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1111
1112 const SubpelMvLimits mv_limits = {
1113 .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1114 .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1115 .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1116 .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1117 };
1118 clamp_mv(mv, &mv_limits);
1119 }
1120
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1121 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1122 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1123 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1124 const int_mv ref_mv =
1125 av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1126 const int_mv ref_mv1 =
1127 av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1128 MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1129 int num_mv_refs = 0;
1130 pred_mv[num_mv_refs++] = ref_mv.as_mv;
1131 if (ref_mv.as_int != ref_mv1.as_int) {
1132 pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1133 }
1134
1135 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1136
1137 const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1138 int zero_seen = 0;
1139 int best_sad = INT_MAX;
1140 int max_mv = 0;
1141 // Get the sad for each candidate reference mv.
1142 for (int i = 0; i < num_mv_refs; ++i) {
1143 MV *this_mv = &pred_mv[i];
1144 enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1145
1146 const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1147 const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1148 max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1149
1150 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1151 zero_seen |= (fp_row == 0 && fp_col == 0);
1152
1153 const uint8_t *const ref_y_ptr =
1154 &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1155 // Find sad for current vector.
1156 const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1157 src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1158 // Note if it is the best so far.
1159 if (this_sad < best_sad) {
1160 best_sad = this_sad;
1161 }
1162 if (i == 0)
1163 x->pred_mv0_sad[ref_frame] = this_sad;
1164 else if (i == 1)
1165 x->pred_mv1_sad[ref_frame] = this_sad;
1166 }
1167
1168 // Note the index of the mv that worked best in the reference list.
1169 x->max_mv_context[ref_frame] = max_mv;
1170 x->pred_mv_sad[ref_frame] = best_sad;
1171 }
1172
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1173 void av1_setup_pred_block(const MACROBLOCKD *xd,
1174 struct buf_2d dst[MAX_MB_PLANE],
1175 const YV12_BUFFER_CONFIG *src,
1176 const struct scale_factors *scale,
1177 const struct scale_factors *scale_uv,
1178 const int num_planes) {
1179 dst[0].buf = src->y_buffer;
1180 dst[0].stride = src->y_stride;
1181 dst[1].buf = src->u_buffer;
1182 dst[2].buf = src->v_buffer;
1183 dst[1].stride = dst[2].stride = src->uv_stride;
1184
1185 const int mi_row = xd->mi_row;
1186 const int mi_col = xd->mi_col;
1187 for (int i = 0; i < num_planes; ++i) {
1188 setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1189 i ? src->uv_crop_width : src->y_crop_width,
1190 i ? src->uv_crop_height : src->y_crop_height,
1191 dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1192 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1193 }
1194 }
1195
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1196 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1197 int ref_frame) {
1198 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1199 RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1200 const RefCntBuffer *const ref_buf =
1201 get_ref_frame_buf(&cpi->common, ref_frame);
1202 return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1203 : NULL;
1204 }
1205
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1206 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1207 InterpFilter interp_filter, int dual_filter) {
1208 if (interp_filter == SWITCHABLE) {
1209 const MB_MODE_INFO *const mbmi = xd->mi[0];
1210 int inter_filter_cost = 0;
1211 for (int dir = 0; dir < 2; ++dir) {
1212 if (dir && !dual_filter) break;
1213 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1214 const InterpFilter filter =
1215 av1_extract_interp_filter(mbmi->interp_filters, dir);
1216 inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1217 }
1218 return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1219 } else {
1220 return 0;
1221 }
1222 }
1223
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1224 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1225 RD_OPT *const rd = &cpi->rd;
1226
1227 // Set baseline threshold values.
1228 av1_zero(rd->thresh_mult);
1229
1230 rd->thresh_mult[THR_NEARESTMV] = 300;
1231 rd->thresh_mult[THR_NEARESTL2] = 300;
1232 rd->thresh_mult[THR_NEARESTL3] = 300;
1233 rd->thresh_mult[THR_NEARESTB] = 300;
1234 rd->thresh_mult[THR_NEARESTA2] = 300;
1235 rd->thresh_mult[THR_NEARESTA] = 300;
1236 rd->thresh_mult[THR_NEARESTG] = 300;
1237
1238 rd->thresh_mult[THR_NEWMV] = 1000;
1239 rd->thresh_mult[THR_NEWL2] = 1000;
1240 rd->thresh_mult[THR_NEWL3] = 1000;
1241 rd->thresh_mult[THR_NEWB] = 1000;
1242 rd->thresh_mult[THR_NEWA2] = 1100;
1243 rd->thresh_mult[THR_NEWA] = 1000;
1244 rd->thresh_mult[THR_NEWG] = 1000;
1245
1246 rd->thresh_mult[THR_NEARMV] = 1000;
1247 rd->thresh_mult[THR_NEARL2] = 1000;
1248 rd->thresh_mult[THR_NEARL3] = 1000;
1249 rd->thresh_mult[THR_NEARB] = 1000;
1250 rd->thresh_mult[THR_NEARA2] = 1000;
1251 rd->thresh_mult[THR_NEARA] = 1000;
1252 rd->thresh_mult[THR_NEARG] = 1000;
1253
1254 rd->thresh_mult[THR_GLOBALMV] = 2200;
1255 rd->thresh_mult[THR_GLOBALL2] = 2000;
1256 rd->thresh_mult[THR_GLOBALL3] = 2000;
1257 rd->thresh_mult[THR_GLOBALB] = 2400;
1258 rd->thresh_mult[THR_GLOBALA2] = 2000;
1259 rd->thresh_mult[THR_GLOBALG] = 2000;
1260 rd->thresh_mult[THR_GLOBALA] = 2400;
1261
1262 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1263 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1264 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1265 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1266 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1267 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1268 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1269 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1270 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1271 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1272 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1273 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1274
1275 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1276 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1277 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1278 rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1279
1280 rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1281 rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1282 rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1283 rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1284 rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1285 rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1286 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1287
1288 rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1289 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1290 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1291 rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1292 rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1293 rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1294 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1295
1296 rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1297 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1298 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1299 rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1300 rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1301 rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1302 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1303
1304 rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1305 rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1306 rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1307 rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1308 rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1309 rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1310 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1311
1312 rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1313 rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1314 rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1315 rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1316 rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1317 rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1318 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1319
1320 rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1321 rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1322 rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1323 rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1324 rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1325 rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1326 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1327
1328 rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1329 rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1330 rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1331 rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1332 rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1333 rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1334 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1335
1336 rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1337 rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1338 rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1339 rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1340 rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1341 rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1342 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1343
1344 rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1345 rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1346 rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1347 rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1348 rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1349 rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1350 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1351
1352 rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1353 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1354 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1355 rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1356 rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1357 rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1358 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1359
1360 rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1361 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1362 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1363 rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1364 rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1365 rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1366 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1367
1368 rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1369 rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1370 rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1371 rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1372 rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1373 rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1374 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1375
1376 rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1377 rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1378 rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1379 rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1380 rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1381 rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1382 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1383
1384 rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1385 rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1386 rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1387 rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1388 rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1389 rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1390 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1391
1392 rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1393 rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1394 rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1395 rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1396 rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1397 rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1398 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1399
1400 rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1401 rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1402 rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1403 rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1404 rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1405 rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1406 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1407
1408 rd->thresh_mult[THR_DC] = 1000;
1409 rd->thresh_mult[THR_PAETH] = 1000;
1410 rd->thresh_mult[THR_SMOOTH] = 2200;
1411 rd->thresh_mult[THR_SMOOTH_V] = 2000;
1412 rd->thresh_mult[THR_SMOOTH_H] = 2000;
1413 rd->thresh_mult[THR_H_PRED] = 2000;
1414 rd->thresh_mult[THR_V_PRED] = 1800;
1415 rd->thresh_mult[THR_D135_PRED] = 2500;
1416 rd->thresh_mult[THR_D203_PRED] = 2000;
1417 rd->thresh_mult[THR_D157_PRED] = 2500;
1418 rd->thresh_mult[THR_D67_PRED] = 2000;
1419 rd->thresh_mult[THR_D113_PRED] = 2500;
1420 rd->thresh_mult[THR_D45_PRED] = 2500;
1421 }
1422
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1423 static inline void update_thr_fact(int (*factor_buf)[MAX_MODES],
1424 THR_MODES best_mode_index,
1425 THR_MODES mode_start, THR_MODES mode_end,
1426 BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1427 int max_rd_thresh_factor) {
1428 for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1429 for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1430 int *const fact = &factor_buf[bs][mode];
1431 if (mode == best_mode_index) {
1432 *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1433 } else {
1434 *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1435 }
1436 }
1437 }
1438 }
1439
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1440 void av1_update_rd_thresh_fact(
1441 const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1442 int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1443 THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1444 THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1445 assert(use_adaptive_rd_thresh > 0);
1446 const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1447
1448 const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1449 BLOCK_SIZE min_size, max_size;
1450 if (bsize_is_1_to_4) {
1451 // This part handles block sizes with 1:4 and 4:1 aspect ratios
1452 // TODO(any): Experiment with threshold update for parent/child blocks
1453 min_size = bsize;
1454 max_size = bsize;
1455 } else {
1456 min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1457 max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1458 }
1459
1460 update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1461 min_size, max_size, max_rd_thresh_factor);
1462 update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1463 min_size, max_size, max_rd_thresh_factor);
1464 }
1465
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1466 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1467 aom_bit_depth_t bit_depth) {
1468 const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1469 switch (bit_depth) {
1470 case AOM_BITS_8: return 20 * q;
1471 case AOM_BITS_10: return 5 * q;
1472 case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1473 default:
1474 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1475 return -1;
1476 }
1477 }
1478