xref: /aosp_15_r20/external/libaom/av1/encoder/rd.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <limits.h>
14 #include <math.h>
15 #include <stdio.h>
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/bitops.h"
20 #include "aom_ports/mem.h"
21 #include "aom_ports/aom_once.h"
22 
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/pred_common.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 #include "av1/common/seg_common.h"
31 
32 #include "av1/encoder/cost.h"
33 #include "av1/encoder/encodemv.h"
34 #include "av1/encoder/encoder.h"
35 #include "av1/encoder/nonrd_opt.h"
36 #include "av1/encoder/ratectrl.h"
37 #include "av1/encoder/rd.h"
38 #include "config/aom_config.h"
39 
40 #define RD_THRESH_POW 1.25
41 
42 // The baseline rd thresholds for breaking out of the rd loop for
43 // certain modes are assumed to be based on 8x8 blocks.
44 // This table is used to correct for block size.
45 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
46 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
47   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
48 };
49 
50 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
51                                             [EXT_TX_SIZES] = {
52                                               { 1, 1, 1, 1 },  // unused
53                                               { 1, 1, 0, 0 },
54                                               { 0, 0, 1, 0 },
55                                             };
56 
57 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
58                                             [EXT_TX_SIZES] = {
59                                               { 1, 1, 1, 1 },  // unused
60                                               { 1, 1, 0, 0 },
61                                               { 0, 0, 1, 0 },
62                                               { 0, 1, 1, 1 },
63                                             };
64 
65 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
66                                                       EXT_TX_SETS_INTER)] = {
67   {
68       // Intra
69       EXT_TX_SET_DCTONLY,
70       EXT_TX_SET_DTT4_IDTX_1DDCT,
71       EXT_TX_SET_DTT4_IDTX,
72   },
73   {
74       // Inter
75       EXT_TX_SET_DCTONLY,
76       EXT_TX_SET_ALL16,
77       EXT_TX_SET_DTT9_IDTX_1DDCT,
78       EXT_TX_SET_DCT_IDTX,
79   },
80 };
81 
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)82 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
83                          FRAME_CONTEXT *fc) {
84   int i, j;
85 
86   for (i = 0; i < PARTITION_CONTEXTS; ++i)
87     av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
88                              fc->partition_cdf[i], NULL);
89 
90   if (cm->current_frame.skip_mode_info.skip_mode_flag) {
91     for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
92       av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
93                                fc->skip_mode_cdfs[i], NULL);
94     }
95   }
96 
97   for (i = 0; i < SKIP_CONTEXTS; ++i) {
98     av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
99                              fc->skip_txfm_cdfs[i], NULL);
100   }
101 
102   for (i = 0; i < KF_MODE_CONTEXTS; ++i)
103     for (j = 0; j < KF_MODE_CONTEXTS; ++j)
104       av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
105                                fc->kf_y_cdf[i][j], NULL);
106 
107   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
108     av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
109                              NULL);
110   for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
111     for (j = 0; j < INTRA_MODES; ++j)
112       av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
113                                fc->uv_mode_cdf[i][j], NULL);
114 
115   av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
116                            fc->filter_intra_mode_cdf, NULL);
117   for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
118     if (av1_filter_intra_allowed_bsize(cm, i))
119       av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
120                                fc->filter_intra_cdfs[i], NULL);
121   }
122 
123   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
124     av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
125                              fc->switchable_interp_cdf[i], NULL);
126 
127   for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
128     av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
129                              fc->palette_y_size_cdf[i], NULL);
130     av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
131                              fc->palette_uv_size_cdf[i], NULL);
132     for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
133       av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
134                                fc->palette_y_mode_cdf[i][j], NULL);
135     }
136   }
137 
138   for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
139     av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
140                              fc->palette_uv_mode_cdf[i], NULL);
141   }
142 
143   for (i = 0; i < PALETTE_SIZES; ++i) {
144     for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
145       av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
146                                fc->palette_y_color_index_cdf[i][j], NULL);
147       av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
148                                fc->palette_uv_color_index_cdf[i][j], NULL);
149     }
150   }
151 
152   int sign_cost[CFL_JOINT_SIGNS];
153   av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
154   for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
155     int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
156     int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
157     if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
158       memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
159     } else {
160       const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
161       av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
162     }
163     if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
164       memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
165     } else {
166       const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
167       av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
168     }
169     for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
170       cost_u[u] += sign_cost[joint_sign];
171   }
172 
173   for (i = 0; i < MAX_TX_CATS; ++i)
174     for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
175       av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
176                                fc->tx_size_cdf[i][j], NULL);
177 
178   for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
179     av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
180                              fc->txfm_partition_cdf[i], NULL);
181   }
182 
183   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
184     int s;
185     for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
186       if (use_inter_ext_tx_for_txsize[s][i]) {
187         av1_cost_tokens_from_cdf(
188             mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
189             av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
190       }
191     }
192     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
193       if (use_intra_ext_tx_for_txsize[s][i]) {
194         for (j = 0; j < INTRA_MODES; ++j) {
195           av1_cost_tokens_from_cdf(
196               mode_costs->intra_tx_type_costs[s][i][j],
197               fc->intra_ext_tx_cdf[s][i][j],
198               av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
199         }
200       }
201     }
202   }
203   for (i = 0; i < DIRECTIONAL_MODES; ++i) {
204     av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
205                              fc->angle_delta_cdf[i], NULL);
206   }
207   av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
208 
209   for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
210     av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
211                              fc->seg.spatial_pred_seg_cdf[i], NULL);
212   }
213 
214   for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
215     av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
216                              NULL);
217   }
218 
219   if (!frame_is_intra_only(cm)) {
220     for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
221       av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
222                                fc->comp_inter_cdf[i], NULL);
223     }
224 
225     for (i = 0; i < REF_CONTEXTS; ++i) {
226       for (j = 0; j < SINGLE_REFS - 1; ++j) {
227         av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
228                                  fc->single_ref_cdf[i][j], NULL);
229       }
230     }
231 
232     for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
233       av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
234                                fc->comp_ref_type_cdf[i], NULL);
235     }
236 
237     for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
238       for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
239         av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
240                                  fc->uni_comp_ref_cdf[i][j], NULL);
241       }
242     }
243 
244     for (i = 0; i < REF_CONTEXTS; ++i) {
245       for (j = 0; j < FWD_REFS - 1; ++j) {
246         av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
247                                  fc->comp_ref_cdf[i][j], NULL);
248       }
249     }
250 
251     for (i = 0; i < REF_CONTEXTS; ++i) {
252       for (j = 0; j < BWD_REFS - 1; ++j) {
253         av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
254                                  fc->comp_bwdref_cdf[i][j], NULL);
255       }
256     }
257 
258     for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
259       av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
260                                fc->intra_inter_cdf[i], NULL);
261     }
262 
263     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
264       av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
265                                NULL);
266     }
267 
268     for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
269       av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
270                                fc->zeromv_cdf[i], NULL);
271     }
272 
273     for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
274       av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
275                                NULL);
276     }
277 
278     for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
279       av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
280                                NULL);
281     }
282     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
283       av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
284                                fc->inter_compound_mode_cdf[i], NULL);
285     for (i = 0; i < BLOCK_SIZES_ALL; ++i)
286       av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
287                                fc->compound_type_cdf[i], NULL);
288     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
289       if (av1_is_wedge_used(i)) {
290         av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
291                                  fc->wedge_idx_cdf[i], NULL);
292       }
293     }
294     for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
295       av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
296                                fc->interintra_cdf[i], NULL);
297       av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
298                                fc->interintra_mode_cdf[i], NULL);
299     }
300     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
301       av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
302                                fc->wedge_interintra_cdf[i], NULL);
303     }
304     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
305       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
306                                fc->motion_mode_cdf[i], NULL);
307     }
308     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
309       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
310                                fc->obmc_cdf[i], NULL);
311     }
312     for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
313       av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
314                                fc->compound_index_cdf[i], NULL);
315     }
316     for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
317       av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
318                                fc->comp_group_idx_cdf[i], NULL);
319     }
320   }
321 }
322 
323 #if !CONFIG_REALTIME_ONLY
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)324 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
325   av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
326                            fc->switchable_restore_cdf, NULL);
327   av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
328                            fc->wiener_restore_cdf, NULL);
329   av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
330                            fc->sgrproj_restore_cdf, NULL);
331 }
332 #endif  // !CONFIG_REALTIME_ONLY
333 
334 // Values are now correlated to quantizer.
335 static int sad_per_bit_lut_8[QINDEX_RANGE];
336 static int sad_per_bit_lut_10[QINDEX_RANGE];
337 static int sad_per_bit_lut_12[QINDEX_RANGE];
338 
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)339 static void init_me_luts_bd(int *bit16lut, int range,
340                             aom_bit_depth_t bit_depth) {
341   int i;
342   // Initialize the sad lut tables using a formulaic calculation for now.
343   // This is to make it easier to resolve the impact of experimental changes
344   // to the quantizer tables.
345   for (i = 0; i < range; i++) {
346     const double q = av1_convert_qindex_to_q(i, bit_depth);
347     bit16lut[i] = (int)(0.0418 * q + 2.4107);
348   }
349 }
350 
init_me_luts(void)351 static void init_me_luts(void) {
352   init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
353   init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
354   init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
355 }
356 
av1_init_me_luts(void)357 void av1_init_me_luts(void) { aom_once(init_me_luts); }
358 
359 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
360                                          8,  8,  4,  4,  2,  2,  1,  0 };
361 
362 static const int rd_layer_depth_factor[7] = {
363   160, 160, 160, 160, 192, 208, 224
364 };
365 
366 // Returns the default rd multiplier for inter frames for a given qindex.
367 // The function here is a first pass estimate based on data from
368 // a previous Vizer run
def_inter_rd_multiplier(int qindex)369 static double def_inter_rd_multiplier(int qindex) {
370   return 3.2 + (0.0015 * (double)qindex);
371 }
372 
373 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
374 // The function here is a first pass estimate based on data from
375 // a previous Vizer run
def_arf_rd_multiplier(int qindex)376 static double def_arf_rd_multiplier(int qindex) {
377   return 3.25 + (0.0015 * (double)qindex);
378 }
379 
380 // Returns the default rd multiplier for key frames for a given qindex.
381 // The function here is a first pass estimate based on data from
382 // a previous Vizer run
def_kf_rd_multiplier(int qindex)383 static double def_kf_rd_multiplier(int qindex) {
384   return 3.3 + (0.0015 * (double)qindex);
385 }
386 
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)387 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
388                                         FRAME_UPDATE_TYPE update_type,
389                                         int qindex) {
390   const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
391   int64_t rdmult = q * q;
392   if (update_type == KF_UPDATE) {
393     double def_rd_q_mult = def_kf_rd_multiplier(q);
394     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395   } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
396     double def_rd_q_mult = def_arf_rd_multiplier(q);
397     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398   } else {
399     double def_rd_q_mult = def_inter_rd_multiplier(q);
400     rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
401   }
402 
403   switch (bit_depth) {
404     case AOM_BITS_8: break;
405     case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
406     case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
407     default:
408       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
409       return -1;
410   }
411   return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
412 }
413 
av1_compute_rd_mult(const int qindex,const aom_bit_depth_t bit_depth,const FRAME_UPDATE_TYPE update_type,const int layer_depth,const int boost_index,const FRAME_TYPE frame_type,const int use_fixed_qp_offsets,const int is_stat_consumption_stage)414 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
415                         const FRAME_UPDATE_TYPE update_type,
416                         const int layer_depth, const int boost_index,
417                         const FRAME_TYPE frame_type,
418                         const int use_fixed_qp_offsets,
419                         const int is_stat_consumption_stage) {
420   int64_t rdmult =
421       av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
422   if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
423       (frame_type != KEY_FRAME)) {
424     // Layer depth adjustment
425     rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
426     // ARF boost adjustment
427     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
428   }
429   return (int)rdmult;
430 }
431 
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)432 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
433   assert(beta > 0.0);
434   int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
435   int newq = (int)rint(q / sqrt(beta));
436   int orig_qindex = qindex;
437   if (newq == q) {
438     return 0;
439   }
440   if (newq < q) {
441     while (qindex > 0) {
442       qindex--;
443       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
444       if (newq >= q) {
445         break;
446       }
447     }
448   } else {
449     while (qindex < MAXQ) {
450       qindex++;
451       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
452       if (newq <= q) {
453         break;
454       }
455     }
456   }
457   return qindex - orig_qindex;
458 }
459 
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)460 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
461                                   int curr_qindex) {
462   curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
463   const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
464   const int deltaq_deadzone = delta_q_res / 4;
465   const int qmask = ~(delta_q_res - 1);
466   int abs_deltaq_index = abs(curr_qindex - prev_qindex);
467   abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
468   int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
469   adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
470   return adjust_qindex;
471 }
472 
473 #if !CONFIG_REALTIME_ONLY
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)474 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
475   assert(beta > 0.0);
476   const AV1_COMMON *cm = &cpi->common;
477 
478   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
479   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
480   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
481   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
482 
483   const int qindex_rdmult = cm->quant_params.base_qindex;
484   return (int)(av1_compute_rd_mult(
485                    qindex_rdmult, cm->seq_params->bit_depth,
486                    cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
487                    layer_depth, boost_index, frame_type,
488                    cpi->oxcf.q_cfg.use_fixed_qp_offsets,
489                    is_stat_consumption_stage(cpi)) /
490                beta);
491 }
492 #endif  // !CONFIG_REALTIME_ONLY
493 
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)494 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
495   double q;
496   switch (bit_depth) {
497     case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
498     case AOM_BITS_10:
499       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
500       break;
501     case AOM_BITS_12:
502       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
503       break;
504     default:
505       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
506       return -1;
507   }
508   // TODO(debargha): Adjust the function below.
509   return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
510 }
511 
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)512 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
513   switch (cpi->common.seq_params->bit_depth) {
514     case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
515     case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
516     case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
517     default:
518       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
519   }
520 }
521 
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd,int use_nonrd_pick_mode)522 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
523                                  int use_nonrd_pick_mode) {
524   int i, bsize, segment_id;
525   THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
526   int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
527 
528   if (use_nonrd_pick_mode) {
529     for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
530       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
531       if (ref != INTRA_FRAME) {
532         for (i = 0; i < RTC_INTER_MODES; i++)
533           mode_indices[num_modes_count++] =
534               mode_idx[ref][mode_offset(inter_mode_list[i])];
535       } else {
536         for (i = 0; i < RTC_INTRA_MODES; i++)
537           mode_indices[num_modes_count++] =
538               mode_idx[ref][mode_offset(intra_mode_list[i])];
539       }
540     }
541   }
542 
543   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
544     const int qindex = clamp(
545         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
546             cm->quant_params.y_dc_delta_q,
547         0, MAXQ);
548     const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
549 
550     for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
551       // Threshold here seems unnecessarily harsh but fine given actual
552       // range of values used for cpi->sf.thresh_mult[].
553       const int t = q * rd_thresh_block_size_factor[bsize];
554       const int thresh_max = INT_MAX / t;
555 
556       for (i = 0; i < num_modes_count; ++i) {
557         const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
558         rd->threshes[segment_id][bsize][mode_index] =
559             rd->thresh_mult[mode_index] < thresh_max
560                 ? rd->thresh_mult[mode_index] * t / 4
561                 : INT_MAX;
562       }
563     }
564   }
565 }
566 
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)567 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
568                           const int num_planes) {
569   const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
570   for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
571     for (int plane = 0; plane < nplanes; ++plane) {
572       LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
573 
574       for (int ctx = 0; ctx < 2; ++ctx) {
575         aom_cdf_prob *pcdf;
576         switch (eob_multi_size) {
577           case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
578           case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
579           case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
580           case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
581           case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
582           case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
583           case 6:
584           default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
585         }
586         av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
587       }
588     }
589   }
590   for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
591     for (int plane = 0; plane < nplanes; ++plane) {
592       LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
593 
594       for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
595         av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
596                                  fc->txb_skip_cdf[tx_size][ctx], NULL);
597 
598       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
599         av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
600                                  fc->coeff_base_eob_cdf[tx_size][plane][ctx],
601                                  NULL);
602       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
603         av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
604                                  fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
605 
606       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
607         pcost->base_cost[ctx][4] = 0;
608         pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
609                                    av1_cost_literal(1) -
610                                    pcost->base_cost[ctx][0];
611         pcost->base_cost[ctx][6] =
612             pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
613         pcost->base_cost[ctx][7] =
614             pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
615       }
616 
617       for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
618         av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
619                                  fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
620 
621       for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
622         av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
623                                  fc->dc_sign_cdf[plane][ctx], NULL);
624 
625       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
626         int br_rate[BR_CDF_SIZE];
627         int prev_cost = 0;
628         int i, j;
629         av1_cost_tokens_from_cdf(
630             br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
631             NULL);
632         // printf("br_rate: ");
633         // for(j = 0; j < BR_CDF_SIZE; j++)
634         //  printf("%4d ", br_rate[j]);
635         // printf("\n");
636         for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
637           for (j = 0; j < BR_CDF_SIZE - 1; j++) {
638             pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
639           }
640           prev_cost += br_rate[j];
641         }
642         pcost->lps_cost[ctx][i] = prev_cost;
643         // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
644         // for (i = 0; i <= COEFF_BASE_RANGE; i++)
645         //  printf("%5d ", pcost->lps_cost[ctx][i]);
646         // printf("\n");
647       }
648       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
649         pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
650             pcost->lps_cost[ctx][0];
651         for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
652           pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
653               pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
654         }
655       }
656     }
657   }
658 }
659 
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)660 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
661                        MvCosts *mv_costs) {
662   // Avoid accessing 'mv_costs' when it is not allocated.
663   if (mv_costs == NULL) return;
664 
665   mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
666   mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
667   mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
668   mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
669   if (integer_mv) {
670     mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
671     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
672                              nmvc, MV_SUBPEL_NONE);
673   } else {
674     mv_costs->mv_cost_stack =
675         usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
676     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
677                              nmvc, usehp);
678   }
679 }
680 
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)681 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
682   dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
683   dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
684   av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
685                            MV_SUBPEL_NONE);
686 }
687 
688 // Populates speed features based on codec control settings (of type
689 // COST_UPDATE_TYPE) and expected speed feature settings (of type
690 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
691 // The populated/updated speed features are used for cost updates in the
692 // encoder.
693 // WARNING: Population of unified cost update frequency needs to be taken care
694 // accordingly, in case of any modifications/additions to the enum
695 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
populate_unified_cost_update_freq(const CostUpdateFreq cost_upd_freq,SPEED_FEATURES * const sf)696 static inline void populate_unified_cost_update_freq(
697     const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
698   INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
699   // Mapping of entropy cost update frequency from the encoder's codec control
700   // settings of type COST_UPDATE_TYPE to speed features of type
701   // INTERNAL_COST_UPDATE_TYPE.
702   static const INTERNAL_COST_UPDATE_TYPE
703       map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
704         INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
705         INTERNAL_COST_UPD_OFF
706       };
707 
708   inter_sf->mv_cost_upd_level =
709       AOMMIN(inter_sf->mv_cost_upd_level,
710              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
711   inter_sf->coeff_cost_upd_level =
712       AOMMIN(inter_sf->coeff_cost_upd_level,
713              map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
714   inter_sf->mode_cost_upd_level =
715       AOMMIN(inter_sf->mode_cost_upd_level,
716              map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
717   sf->intra_sf.dv_cost_upd_level =
718       AOMMIN(sf->intra_sf.dv_cost_upd_level,
719              map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
720 }
721 
722 // Checks if entropy costs should be initialized/updated at frame level or not.
is_frame_level_cost_upd_freq_set(const AV1_COMMON * const cm,const INTERNAL_COST_UPDATE_TYPE cost_upd_level,const int use_nonrd_pick_mode,const int frames_since_key)723 static inline int is_frame_level_cost_upd_freq_set(
724     const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
725     const int use_nonrd_pick_mode, const int frames_since_key) {
726   const int fill_costs =
727       frame_is_intra_only(cm) ||
728       (use_nonrd_pick_mode ? frames_since_key < 2
729                            : (cm->current_frame.frame_number & 0x07) == 1);
730   return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
731           cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
732 }
733 
734 // Decide whether we want to update the mode entropy cost for the current frame.
735 // The logit is currently inherited from selective_disable_cdf_rtc.
should_force_mode_cost_update(const AV1_COMP * cpi)736 static inline int should_force_mode_cost_update(const AV1_COMP *cpi) {
737   const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
738   if (!rt_sf->frame_level_mode_cost_update) {
739     return false;
740   }
741 
742   if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
743     return cpi->frames_since_last_update == 1;
744   } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
745     if (cpi->svc.number_spatial_layers == 1 &&
746         cpi->svc.number_temporal_layers == 1) {
747       const AV1_COMMON *const cm = &cpi->common;
748       const RATE_CONTROL *const rc = &cpi->rc;
749 
750       return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
751              rc->high_source_sad || rc->frames_since_key < 10 ||
752              cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
753              cm->current_frame.frame_number % 8 == 0;
754     } else if (cpi->svc.number_temporal_layers > 1) {
755       return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
756     }
757   }
758 
759   return false;
760 }
761 
av1_initialize_rd_consts(AV1_COMP * cpi)762 void av1_initialize_rd_consts(AV1_COMP *cpi) {
763   AV1_COMMON *const cm = &cpi->common;
764   MACROBLOCK *const x = &cpi->td.mb;
765   SPEED_FEATURES *const sf = &cpi->sf;
766   RD_OPT *const rd = &cpi->rd;
767   int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
768   int frames_since_key = cpi->rc.frames_since_key;
769 
770   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
771   const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
772   const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
773   const FRAME_TYPE frame_type = cm->current_frame.frame_type;
774 
775   const int qindex_rdmult =
776       cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
777   rd->RDMULT = av1_compute_rd_mult(
778       qindex_rdmult, cm->seq_params->bit_depth,
779       cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
780       boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
781       is_stat_consumption_stage(cpi));
782 #if CONFIG_RD_COMMAND
783   if (cpi->oxcf.pass == 2) {
784     const RD_COMMAND *rd_command = &cpi->rd_command;
785     if (rd_command->option_ls[rd_command->frame_index] ==
786         RD_OPTION_SET_Q_RDMULT) {
787       rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
788     }
789   }
790 #endif  // CONFIG_RD_COMMAND
791 
792   av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
793 
794   set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
795 
796   populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
797   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
798   // Frame level mv cost update
799   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
800                                        use_nonrd_pick_mode, frames_since_key))
801     av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
802                       cm->features.allow_high_precision_mv, x->mv_costs);
803 
804   // Frame level coefficient cost update
805   if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
806                                        use_nonrd_pick_mode, frames_since_key))
807     av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
808 
809   // Frame level mode cost update
810   if (should_force_mode_cost_update(cpi) ||
811       is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
812                                        use_nonrd_pick_mode, frames_since_key))
813     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
814 
815   // Frame level dv cost update
816   if (av1_need_dv_costs(cpi)) {
817     if (cpi->td.dv_costs_alloc == NULL) {
818       CHECK_MEM_ERROR(
819           cm, cpi->td.dv_costs_alloc,
820           (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
821       cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
822     }
823     av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
824   }
825 }
826 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)827 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
828   // NOTE: The tables below must be of the same size.
829 
830   // The functions described below are sampled at the four most significant
831   // bits of x^2 + 8 / 256.
832 
833   // Normalized rate:
834   // This table models the rate for a Laplacian source with given variance
835   // when quantized with a uniform quantizer with given stepsize. The
836   // closed form expression is:
837   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
838   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
839   // and H(x) is the binary entropy function.
840   static const int rate_tab_q10[] = {
841     65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
842     4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
843     3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
844     2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
845     1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
846     911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
847     395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
848     73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
849     5,     3,    2,    1,    1,    1,    0,    0,
850   };
851   // Normalized distortion:
852   // This table models the normalized distortion for a Laplacian source
853   // with given variance when quantized with a uniform quantizer
854   // with given stepsize. The closed form expression is:
855   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
856   // where x = qpstep / sqrt(variance).
857   // Note the actual distortion is Dn * variance.
858   static const int dist_tab_q10[] = {
859     0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
860     5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
861     18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
862     59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
863     151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
864     375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
865     680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
866     949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
867     1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
868   };
869   static const int xsq_iq_q10[] = {
870     0,      4,      8,      12,     16,     20,     24,     28,     32,
871     40,     48,     56,     64,     72,     80,     88,     96,     112,
872     128,    144,    160,    176,    192,    208,    224,    256,    288,
873     320,    352,    384,    416,    448,    480,    544,    608,    672,
874     736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
875     1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
876     3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
877     7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
878     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
879     36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
880     81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
881     180192, 196576, 212960, 229344, 245728,
882   };
883   const int tmp = (xsq_q10 >> 2) + 8;
884   const int k = get_msb(tmp) - 3;
885   const int xq = (k << 3) + ((tmp >> k) & 0x7);
886   const int one_q10 = 1 << 10;
887   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
888   const int b_q10 = one_q10 - a_q10;
889   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
890   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
891 }
892 
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)893 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
894                                   unsigned int qstep, int *rate,
895                                   int64_t *dist) {
896   // This function models the rate and distortion for a Laplacian
897   // source with given variance when quantized with a uniform quantizer
898   // with given stepsize. The closed form expressions are in:
899   // Hang and Chen, "Source Model for transform video coder and its
900   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
901   // Sys. for Video Tech., April 1997.
902   if (var == 0) {
903     *rate = 0;
904     *dist = 0;
905   } else {
906     int d_q10, r_q10;
907     static const uint32_t MAX_XSQ_Q10 = 245727;
908     const uint64_t xsq_q10_64 =
909         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
910     const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
911     model_rd_norm(xsq_q10, &r_q10, &d_q10);
912     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
913     *dist = (var * (int64_t)d_q10 + 512) >> 10;
914   }
915 }
916 
interp_cubic(const double * p,double x)917 static double interp_cubic(const double *p, double x) {
918   return p[1] + 0.5 * x *
919                     (p[2] - p[0] +
920                      x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
921                           x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
922 }
923 
924 /*
925 static double interp_bicubic(const double *p, int p_stride, double x,
926                              double y) {
927   double q[4];
928   q[0] = interp_cubic(p, x);
929   q[1] = interp_cubic(p + p_stride, x);
930   q[2] = interp_cubic(p + 2 * p_stride, x);
931   q[3] = interp_cubic(p + 3 * p_stride, x);
932   return interp_cubic(q, y);
933 }
934 */
935 
936 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
937   0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
938 };
939 
sse_norm_curvfit_model_cat_lookup(double sse_norm)940 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
941   return (sse_norm > 16.0);
942 }
943 
944 static const double interp_rgrid_curv[4][65] = {
945   {
946       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
947       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
948       0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
949       122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
950       126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
951       262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
952       726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
953       1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
954       1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
955       2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
956       2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
957       2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
958       3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
959   },
960   {
961       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
962       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
963       0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
964       28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
965       39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
966       137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
967       614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
968       1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
969       1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
970       1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
971       2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
972       2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
973       3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
974   },
975   {
976       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
977       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
978       0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
979       6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
980       13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
981       98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
982       525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
983       926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
984       1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
985       1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
986       2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
987       2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
988       3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
989   },
990   {
991       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
992       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
993       0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
994       0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
995       3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
996       65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
997       355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
998       619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
999       1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1000       1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1001       1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1002       2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1003       3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1004   },
1005 };
1006 
1007 static const double interp_dgrid_curv[3][65] = {
1008   {
1009       16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1010       15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1011       15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1012       13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1013       7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
1014       1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
1015       0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
1016       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1017       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1018       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1019       0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
1020   },
1021   {
1022       16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1023       15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1024       15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1025       13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
1026       5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1027       1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1028       0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1029       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1030       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1031       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1032       0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1033   },
1034 };
1035 
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1036 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1037                           double *rate_f, double *distbysse_f) {
1038   const double x_start = -15.5;
1039   const double x_end = 16.5;
1040   const double x_step = 0.5;
1041   const double epsilon = 1e-6;
1042   const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1043   const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1044   (void)x_end;
1045 
1046   xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1047   xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1048   const double x = (xqr - x_start) / x_step;
1049   const int xi = (int)floor(x);
1050   const double xo = x - xi;
1051 
1052   assert(xi > 0);
1053 
1054   const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1055   *rate_f = interp_cubic(prate, xo);
1056   const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1057   *distbysse_f = interp_cubic(pdist, xo);
1058 }
1059 
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1060 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1061                                        const struct macroblockd_plane *pd,
1062                                        ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1063                                        ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1064   const int num_4x4_w = mi_size_wide[plane_bsize];
1065   const int num_4x4_h = mi_size_high[plane_bsize];
1066   const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1067   const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1068 
1069   memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1070   memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1071 }
1072 
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1073 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1074                               const struct macroblockd_plane *pd,
1075                               ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1076                               ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1077   assert(plane_bsize < BLOCK_SIZES_ALL);
1078   get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1079 }
1080 
1081 // Special clamping used in the encoder when calculating a prediction
1082 //
1083 // Logically, all pixel fetches used for prediction are clamped against the
1084 // edges of the frame. But doing this directly is slow, so instead we allocate
1085 // a finite border around the frame and fill it with copies of the outermost
1086 // pixels.
1087 //
1088 // Since this border is finite, we need to clamp the motion vector before
1089 // prediction in order to avoid out-of-bounds reads. At the same time, this
1090 // clamp must not change the prediction result.
1091 //
1092 // We can balance both of these concerns by calculating how far we would have
1093 // to go in each direction before the extended prediction region (the current
1094 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1095 // so that it touches the frame only at one row or column. This is a special
1096 // point because any more extreme MV will always lead to the same prediction.
1097 // So it is safe to clamp at that point.
1098 //
1099 // In the worst case, this requires a border of
1100 //   max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1101 // around the frame edges.
enc_clamp_mv(const AV1_COMMON * cm,const MACROBLOCKD * xd,MV * mv)1102 static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1103                                 MV *mv) {
1104   int bw = xd->width << MI_SIZE_LOG2;
1105   int bh = xd->height << MI_SIZE_LOG2;
1106 
1107   int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1108   int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1109   int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1110   int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1111 
1112   const SubpelMvLimits mv_limits = {
1113     .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1114     .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1115     .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1116     .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1117   };
1118   clamp_mv(mv, &mv_limits);
1119 }
1120 
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1121 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1122                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1123   const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1124   const int_mv ref_mv =
1125       av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1126   const int_mv ref_mv1 =
1127       av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1128   MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1129   int num_mv_refs = 0;
1130   pred_mv[num_mv_refs++] = ref_mv.as_mv;
1131   if (ref_mv.as_int != ref_mv1.as_int) {
1132     pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1133   }
1134 
1135   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1136 
1137   const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1138   int zero_seen = 0;
1139   int best_sad = INT_MAX;
1140   int max_mv = 0;
1141   // Get the sad for each candidate reference mv.
1142   for (int i = 0; i < num_mv_refs; ++i) {
1143     MV *this_mv = &pred_mv[i];
1144     enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1145 
1146     const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1147     const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1148     max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1149 
1150     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1151     zero_seen |= (fp_row == 0 && fp_col == 0);
1152 
1153     const uint8_t *const ref_y_ptr =
1154         &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1155     // Find sad for current vector.
1156     const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1157         src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1158     // Note if it is the best so far.
1159     if (this_sad < best_sad) {
1160       best_sad = this_sad;
1161     }
1162     if (i == 0)
1163       x->pred_mv0_sad[ref_frame] = this_sad;
1164     else if (i == 1)
1165       x->pred_mv1_sad[ref_frame] = this_sad;
1166   }
1167 
1168   // Note the index of the mv that worked best in the reference list.
1169   x->max_mv_context[ref_frame] = max_mv;
1170   x->pred_mv_sad[ref_frame] = best_sad;
1171 }
1172 
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1173 void av1_setup_pred_block(const MACROBLOCKD *xd,
1174                           struct buf_2d dst[MAX_MB_PLANE],
1175                           const YV12_BUFFER_CONFIG *src,
1176                           const struct scale_factors *scale,
1177                           const struct scale_factors *scale_uv,
1178                           const int num_planes) {
1179   dst[0].buf = src->y_buffer;
1180   dst[0].stride = src->y_stride;
1181   dst[1].buf = src->u_buffer;
1182   dst[2].buf = src->v_buffer;
1183   dst[1].stride = dst[2].stride = src->uv_stride;
1184 
1185   const int mi_row = xd->mi_row;
1186   const int mi_col = xd->mi_col;
1187   for (int i = 0; i < num_planes; ++i) {
1188     setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1189                      i ? src->uv_crop_width : src->y_crop_width,
1190                      i ? src->uv_crop_height : src->y_crop_height,
1191                      dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1192                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1193   }
1194 }
1195 
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1196 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1197                                              int ref_frame) {
1198   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1199   RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1200   const RefCntBuffer *const ref_buf =
1201       get_ref_frame_buf(&cpi->common, ref_frame);
1202   return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1203                                                        : NULL;
1204 }
1205 
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1206 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1207                             InterpFilter interp_filter, int dual_filter) {
1208   if (interp_filter == SWITCHABLE) {
1209     const MB_MODE_INFO *const mbmi = xd->mi[0];
1210     int inter_filter_cost = 0;
1211     for (int dir = 0; dir < 2; ++dir) {
1212       if (dir && !dual_filter) break;
1213       const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1214       const InterpFilter filter =
1215           av1_extract_interp_filter(mbmi->interp_filters, dir);
1216       inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1217     }
1218     return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1219   } else {
1220     return 0;
1221   }
1222 }
1223 
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1224 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1225   RD_OPT *const rd = &cpi->rd;
1226 
1227   // Set baseline threshold values.
1228   av1_zero(rd->thresh_mult);
1229 
1230   rd->thresh_mult[THR_NEARESTMV] = 300;
1231   rd->thresh_mult[THR_NEARESTL2] = 300;
1232   rd->thresh_mult[THR_NEARESTL3] = 300;
1233   rd->thresh_mult[THR_NEARESTB] = 300;
1234   rd->thresh_mult[THR_NEARESTA2] = 300;
1235   rd->thresh_mult[THR_NEARESTA] = 300;
1236   rd->thresh_mult[THR_NEARESTG] = 300;
1237 
1238   rd->thresh_mult[THR_NEWMV] = 1000;
1239   rd->thresh_mult[THR_NEWL2] = 1000;
1240   rd->thresh_mult[THR_NEWL3] = 1000;
1241   rd->thresh_mult[THR_NEWB] = 1000;
1242   rd->thresh_mult[THR_NEWA2] = 1100;
1243   rd->thresh_mult[THR_NEWA] = 1000;
1244   rd->thresh_mult[THR_NEWG] = 1000;
1245 
1246   rd->thresh_mult[THR_NEARMV] = 1000;
1247   rd->thresh_mult[THR_NEARL2] = 1000;
1248   rd->thresh_mult[THR_NEARL3] = 1000;
1249   rd->thresh_mult[THR_NEARB] = 1000;
1250   rd->thresh_mult[THR_NEARA2] = 1000;
1251   rd->thresh_mult[THR_NEARA] = 1000;
1252   rd->thresh_mult[THR_NEARG] = 1000;
1253 
1254   rd->thresh_mult[THR_GLOBALMV] = 2200;
1255   rd->thresh_mult[THR_GLOBALL2] = 2000;
1256   rd->thresh_mult[THR_GLOBALL3] = 2000;
1257   rd->thresh_mult[THR_GLOBALB] = 2400;
1258   rd->thresh_mult[THR_GLOBALA2] = 2000;
1259   rd->thresh_mult[THR_GLOBALG] = 2000;
1260   rd->thresh_mult[THR_GLOBALA] = 2400;
1261 
1262   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1263   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1264   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1265   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1266   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1267   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1268   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1269   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1270   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1271   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1272   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1273   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1274 
1275   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1276   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1277   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1278   rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1279 
1280   rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1281   rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1282   rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1283   rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1284   rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1285   rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1286   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1287 
1288   rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1289   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1290   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1291   rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1292   rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1293   rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1294   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1295 
1296   rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1297   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1298   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1299   rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1300   rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1301   rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1302   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1303 
1304   rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1305   rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1306   rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1307   rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1308   rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1309   rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1310   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1311 
1312   rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1313   rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1314   rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1315   rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1316   rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1317   rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1318   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1319 
1320   rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1321   rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1322   rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1323   rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1324   rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1325   rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1326   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1327 
1328   rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1329   rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1330   rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1331   rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1332   rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1333   rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1334   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1335 
1336   rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1337   rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1338   rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1339   rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1340   rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1341   rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1342   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1343 
1344   rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1345   rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1346   rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1347   rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1348   rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1349   rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1350   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1351 
1352   rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1353   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1354   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1355   rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1356   rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1357   rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1358   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1359 
1360   rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1361   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1362   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1363   rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1364   rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1365   rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1366   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1367 
1368   rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1369   rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1370   rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1371   rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1372   rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1373   rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1374   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1375 
1376   rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1377   rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1378   rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1379   rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1380   rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1381   rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1382   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1383 
1384   rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1385   rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1386   rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1387   rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1388   rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1389   rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1390   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1391 
1392   rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1393   rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1394   rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1395   rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1396   rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1397   rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1398   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1399 
1400   rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1401   rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1402   rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1403   rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1404   rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1405   rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1406   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1407 
1408   rd->thresh_mult[THR_DC] = 1000;
1409   rd->thresh_mult[THR_PAETH] = 1000;
1410   rd->thresh_mult[THR_SMOOTH] = 2200;
1411   rd->thresh_mult[THR_SMOOTH_V] = 2000;
1412   rd->thresh_mult[THR_SMOOTH_H] = 2000;
1413   rd->thresh_mult[THR_H_PRED] = 2000;
1414   rd->thresh_mult[THR_V_PRED] = 1800;
1415   rd->thresh_mult[THR_D135_PRED] = 2500;
1416   rd->thresh_mult[THR_D203_PRED] = 2000;
1417   rd->thresh_mult[THR_D157_PRED] = 2500;
1418   rd->thresh_mult[THR_D67_PRED] = 2000;
1419   rd->thresh_mult[THR_D113_PRED] = 2500;
1420   rd->thresh_mult[THR_D45_PRED] = 2500;
1421 }
1422 
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1423 static inline void update_thr_fact(int (*factor_buf)[MAX_MODES],
1424                                    THR_MODES best_mode_index,
1425                                    THR_MODES mode_start, THR_MODES mode_end,
1426                                    BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1427                                    int max_rd_thresh_factor) {
1428   for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1429     for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1430       int *const fact = &factor_buf[bs][mode];
1431       if (mode == best_mode_index) {
1432         *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1433       } else {
1434         *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1435       }
1436     }
1437   }
1438 }
1439 
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1440 void av1_update_rd_thresh_fact(
1441     const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1442     int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1443     THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1444     THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1445   assert(use_adaptive_rd_thresh > 0);
1446   const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1447 
1448   const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1449   BLOCK_SIZE min_size, max_size;
1450   if (bsize_is_1_to_4) {
1451     // This part handles block sizes with 1:4 and 4:1 aspect ratios
1452     // TODO(any): Experiment with threshold update for parent/child blocks
1453     min_size = bsize;
1454     max_size = bsize;
1455   } else {
1456     min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1457     max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1458   }
1459 
1460   update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1461                   min_size, max_size, max_rd_thresh_factor);
1462   update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1463                   min_size, max_size, max_rd_thresh_factor);
1464 }
1465 
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1466 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1467                                aom_bit_depth_t bit_depth) {
1468   const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1469   switch (bit_depth) {
1470     case AOM_BITS_8: return 20 * q;
1471     case AOM_BITS_10: return 5 * q;
1472     case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1473     default:
1474       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1475       return -1;
1476   }
1477 }
1478