xref: /aosp_15_r20/external/libaom/av1/encoder/encodeframe.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21 
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_util/aom_pthread.h"
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif  // CONFIG_MISMATCH_DEBUG
30 
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/common_data.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46 
47 #include "av1/encoder/allintra_vis.h"
48 #include "av1/encoder/aq_complexity.h"
49 #include "av1/encoder/aq_cyclicrefresh.h"
50 #include "av1/encoder/aq_variance.h"
51 #include "av1/encoder/global_motion_facade.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodeframe_utils.h"
54 #include "av1/encoder/encodemb.h"
55 #include "av1/encoder/encodemv.h"
56 #include "av1/encoder/encodetxb.h"
57 #include "av1/encoder/ethread.h"
58 #include "av1/encoder/extend.h"
59 #include "av1/encoder/intra_mode_search_utils.h"
60 #include "av1/encoder/ml.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/partition_strategy.h"
63 #if !CONFIG_REALTIME_ONLY
64 #include "av1/encoder/partition_model_weights.h"
65 #endif
66 #include "av1/encoder/partition_search.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/segmentation.h"
71 #include "av1/encoder/tokenize.h"
72 #include "av1/encoder/tpl_model.h"
73 #include "av1/encoder/var_based_part.h"
74 
75 #if CONFIG_TUNE_VMAF
76 #include "av1/encoder/tune_vmaf.h"
77 #endif
78 
79 /*!\cond */
80 // This is used as a reference when computing the source variance for the
81 //  purposes of activity masking.
82 // Eventually this should be replaced by custom no-reference routines,
83 //  which will be faster.
84 static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
85   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93   128, 128, 128, 128, 128, 128, 128, 128
94 };
95 
96 #if CONFIG_AV1_HIGHBITDEPTH
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106   128, 128, 128, 128, 128, 128, 128, 128
107 };
108 
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127 
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147   128 * 16, 128 * 16
148 };
149 #endif  // CONFIG_AV1_HIGHBITDEPTH
150 /*!\endcond */
151 
152 // For the given bit depth, returns a constant array used to assist the
153 // calculation of source block variance, which will then be used to decide
154 // adaptive quantizers.
get_var_offs(int use_hbd,int bd)155 static const uint8_t *get_var_offs(int use_hbd, int bd) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157   if (use_hbd) {
158     assert(bd == 8 || bd == 10 || bd == 12);
159     const int off_index = (bd - 8) >> 1;
160     static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
161                                                 AV1_HIGH_VAR_OFFS_10,
162                                                 AV1_HIGH_VAR_OFFS_12 };
163     return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
164   }
165 #else
166   (void)use_hbd;
167   (void)bd;
168   assert(!use_hbd);
169 #endif
170   assert(bd == 8);
171   return AV1_VAR_OFFS;
172 }
173 
av1_init_rtc_counters(MACROBLOCK * const x)174 void av1_init_rtc_counters(MACROBLOCK *const x) {
175   av1_init_cyclic_refresh_counters(x);
176   x->cnt_zeromv = 0;
177 }
178 
av1_accumulate_rtc_counters(AV1_COMP * cpi,const MACROBLOCK * const x)179 void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
180   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
181     av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
182   cpi->rc.cnt_zeromv += x->cnt_zeromv;
183 }
184 
av1_get_perpixel_variance(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane,int use_hbd)185 unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
186                                        const MACROBLOCKD *xd,
187                                        const struct buf_2d *ref,
188                                        BLOCK_SIZE bsize, int plane,
189                                        int use_hbd) {
190   const int subsampling_x = xd->plane[plane].subsampling_x;
191   const int subsampling_y = xd->plane[plane].subsampling_y;
192   const BLOCK_SIZE plane_bsize =
193       get_plane_block_size(bsize, subsampling_x, subsampling_y);
194   unsigned int sse;
195   const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
196       ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
197   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
198 }
199 
av1_get_perpixel_variance_facade(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane)200 unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
201                                               const MACROBLOCKD *xd,
202                                               const struct buf_2d *ref,
203                                               BLOCK_SIZE bsize, int plane) {
204   const int use_hbd = is_cur_buf_hbd(xd);
205   return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
206 }
207 
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)208 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
209                           int mi_row, int mi_col, const int num_planes,
210                           BLOCK_SIZE bsize) {
211   // Set current frame pointer.
212   x->e_mbd.cur_buf = src;
213 
214   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
215   // the static analysis warnings.
216   for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
217     const int is_uv = i > 0;
218     setup_pred_plane(
219         &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
220         src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
221         x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
222   }
223 }
224 
225 #if !CONFIG_REALTIME_ONLY
226 /*!\brief Assigns different quantization parameters to each super
227  * block based on its TPL weight.
228  *
229  * \ingroup tpl_modelling
230  *
231  * \param[in]     cpi         Top level encoder instance structure
232  * \param[in,out] td          Thread data structure
233  * \param[in,out] x           Macro block level data for this block.
234  * \param[in]     tile_info   Tile infromation / identification
235  * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
236  * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
237  * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
238  *
239  * \remark No return value but updates macroblock and thread data
240  * related to the q / q delta to be used.
241  */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)242 static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
243                                  MACROBLOCK *const x,
244                                  const TileInfo *const tile_info, int mi_row,
245                                  int mi_col, int num_planes) {
246   AV1_COMMON *const cm = &cpi->common;
247   const CommonModeInfoParams *const mi_params = &cm->mi_params;
248   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
249   assert(delta_q_info->delta_q_present_flag);
250 
251   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
252   // Delta-q modulation based on variance
253   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254 
255   const int delta_q_res = delta_q_info->delta_q_res;
256   int current_qindex = cm->quant_params.base_qindex;
257   if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
258                                    DUCKY_ENCODE_FRAME_MODE_QINDEX) {
259     const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
260     const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
261     const int sb_cols =
262         CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
263     const int sb_index = sb_row * sb_cols + sb_col;
264     current_qindex =
265         cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
266   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
267     if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
268       const int block_wavelet_energy_level =
269           av1_block_wavelet_energy_level(cpi, x, sb_size);
270       x->sb_energy_level = block_wavelet_energy_level;
271       current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
272           cpi, block_wavelet_energy_level);
273     } else {
274       const int block_var_level = av1_log_block_var(cpi, x, sb_size);
275       x->sb_energy_level = block_var_level;
276       current_qindex =
277           av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
278     }
279   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
280              cpi->oxcf.algo_cfg.enable_tpl_model) {
281     // Setup deltaq based on tpl stats
282     current_qindex =
283         av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
284   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
285     current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
286   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
287     current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
288   } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
289     current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
290   }
291 
292   x->rdmult_cur_qindex = current_qindex;
293   MACROBLOCKD *const xd = &x->e_mbd;
294   const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
295       delta_q_res, xd->current_base_qindex, current_qindex);
296   if (cpi->use_ducky_encode) {
297     assert(adjusted_qindex == current_qindex);
298   }
299   current_qindex = adjusted_qindex;
300 
301   x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
302   x->rdmult_delta_qindex = x->delta_qindex;
303 
304   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
305   xd->mi[0]->current_qindex = current_qindex;
306   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
307 
308   // keep track of any non-zero delta-q used
309   td->deltaq_used |= (x->delta_qindex != 0);
310 
311   if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
312     const int delta_lf_res = delta_q_info->delta_lf_res;
313     const int lfmask = ~(delta_lf_res - 1);
314     const int delta_lf_from_base =
315         ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
316     const int8_t delta_lf =
317         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
318     const int frame_lf_count =
319         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
320     const int mib_size = cm->seq_params->mib_size;
321 
322     // pre-set the delta lf for loop filter. Note that this value is set
323     // before mi is assigned for each block in current superblock
324     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
325       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
326         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
327         mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
328         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
329           mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
330         }
331       }
332     }
333   }
334 }
335 
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)336 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
337                                  int mi_col) {
338   const AV1_COMMON *cm = &cpi->common;
339   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
340   const CommonModeInfoParams *const mi_params = &cm->mi_params;
341   MACROBLOCK *x = &td->mb;
342   const int frame_idx = cpi->gf_frame_index;
343   TplParams *const tpl_data = &cpi->ppi->tpl_data;
344   const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
345 
346   av1_zero(x->tpl_keep_ref_frame);
347 
348   if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
349   if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
350   if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
351 
352   const int is_overlay =
353       cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
354   if (is_overlay) {
355     memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
356     return;
357   }
358 
359   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
360   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
361   const int tpl_stride = tpl_frame->stride;
362   int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
363   const int step = 1 << block_mis_log2;
364   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
365 
366   const int mi_row_end =
367       AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
368   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
369   const int mi_col_sr =
370       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
371   const int mi_col_end_sr =
372       AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
373                                   cm->superres_scale_denominator),
374              mi_cols_sr);
375   const int row_step = step;
376   const int col_step_sr =
377       coded_to_superres_mi(step, cm->superres_scale_denominator);
378   for (int row = mi_row; row < mi_row_end; row += row_step) {
379     for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
380       const TplDepStats *this_stats =
381           &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
382       int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
383       // Find the winner ref frame idx for the current block
384       int64_t best_inter_cost = this_stats->pred_error[0];
385       int best_rf_idx = 0;
386       for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
387         if ((this_stats->pred_error[idx] < best_inter_cost) &&
388             (this_stats->pred_error[idx] != 0)) {
389           best_inter_cost = this_stats->pred_error[idx];
390           best_rf_idx = idx;
391         }
392       }
393       // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
394       // LAST_FRAME.
395       tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
396                                     this_stats->pred_error[LAST_FRAME - 1];
397 
398       for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
399         inter_cost[rf_idx] += tpl_pred_error[rf_idx];
400     }
401   }
402 
403   int rank_index[INTER_REFS_PER_FRAME - 1];
404   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
405     rank_index[idx] = idx + 1;
406     for (int i = idx; i > 0; --i) {
407       if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
408         const int tmp = rank_index[i - 1];
409         rank_index[i - 1] = rank_index[i];
410         rank_index[i] = tmp;
411       }
412     }
413   }
414 
415   x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
416   x->tpl_keep_ref_frame[LAST_FRAME] = 1;
417 
418   int cutoff_ref = 0;
419   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
420     x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
421     if (idx > 2) {
422       if (!cutoff_ref) {
423         // If the predictive coding gains are smaller than the previous more
424         // relevant frame over certain amount, discard this frame and all the
425         // frames afterwards.
426         if (llabs(inter_cost[rank_index[idx]]) <
427                 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
428             inter_cost[rank_index[idx]] == 0)
429           cutoff_ref = 1;
430       }
431 
432       if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
433     }
434   }
435 }
436 
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)437 static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
438                                            int mi_row, int mi_col) {
439   const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
440   const int orig_rdmult = cpi->rd.RDMULT;
441 
442   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
443                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
444   const int gf_group_index = cpi->gf_frame_index;
445   if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
446       cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
447       cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
448     const int dr =
449         av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
450     x->rdmult = dr;
451   }
452 }
453 #endif  // !CONFIG_REALTIME_ONLY
454 
455 #if CONFIG_RT_ML_PARTITIONING
456 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)457 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
458                                MACROBLOCK *x, int mi_row, int mi_col) {
459   AV1_COMMON *const cm = &cpi->common;
460   const int is_key_frame = frame_is_intra_only(cm);
461   MACROBLOCKD *xd = &x->e_mbd;
462 
463   // TODO(kyslov) Extend to 128x128
464   assert(cm->seq_params->sb_size == BLOCK_64X64);
465 
466   av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
467 
468   if (!is_key_frame) {
469     MB_MODE_INFO *mi = xd->mi[0];
470     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
471 
472     assert(yv12 != NULL);
473 
474     av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
475                          get_ref_scale_factors(cm, LAST_FRAME), 1);
476     mi->ref_frame[0] = LAST_FRAME;
477     mi->ref_frame[1] = NONE;
478     mi->bsize = BLOCK_64X64;
479     mi->mv[0].as_int = 0;
480     mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
481 
482     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
483 
484     xd->plane[0].dst.buf = x->est_pred;
485     xd->plane[0].dst.stride = 64;
486     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
487   } else {
488 #if CONFIG_AV1_HIGHBITDEPTH
489     switch (xd->bd) {
490       case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
491       case 10:
492         memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
493         break;
494       case 12:
495         memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
496         break;
497     }
498 #else
499     memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
500 #endif  // CONFIG_VP9_HIGHBITDEPTH
501   }
502 }
503 #endif  // CONFIG_RT_ML_PARTITIONING
504 
505 #define AVG_CDF_WEIGHT_LEFT 3
506 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
507 
508 /*!\brief Encode a superblock (minimal RD search involved)
509  *
510  * \ingroup partition_search
511  * Encodes the superblock by a pre-determined partition pattern, only minor
512  * rd-based searches are allowed to adjust the initial pattern. It is only used
513  * by realtime encoding.
514  */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)515 static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
516                                    TileDataEnc *tile_data, TokenExtra **tp,
517                                    const int mi_row, const int mi_col,
518                                    const int seg_skip) {
519   AV1_COMMON *const cm = &cpi->common;
520   MACROBLOCK *const x = &td->mb;
521   const SPEED_FEATURES *const sf = &cpi->sf;
522   const TileInfo *const tile_info = &tile_data->tile_info;
523   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
524                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
525   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
526   PC_TREE *const pc_root = td->pc_root;
527 
528 #if CONFIG_RT_ML_PARTITIONING
529   if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
530     RD_STATS dummy_rdc;
531     get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
532     av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
533                              BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
534     return;
535   }
536 #endif
537   // Set the partition
538   if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
539       (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
540        (!frame_is_intra_only(cm) &&
541         (!cpi->ppi->use_svc ||
542          !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
543     // set a fixed-size partition
544     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
545     BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
546     if (sf->rt_sf.use_fast_fixed_part &&
547         x->content_state_sb.source_sad_nonrd < kLowSad) {
548       bsize_select = cm->seq_params->sb_size;
549     }
550     const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
551     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
552   } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
553     // set a variance-based partition
554     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
555     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
556   }
557   assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
558          sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
559   set_cb_offsets(td->mb.cb_offset, 0, 0);
560 
561   // Initialize the flag to skip cdef to 1.
562   if (sf->rt_sf.skip_cdef_sb) {
563     const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
564     // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
565     // "blocks".
566     for (int r = 0; r < block64_in_sb; ++r) {
567       for (int c = 0; c < block64_in_sb; ++c) {
568         const int idx_in_sb =
569             r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
570         if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
571       }
572     }
573   }
574 
575 #if CONFIG_COLLECT_COMPONENT_TIMING
576   start_timing(cpi, nonrd_use_partition_time);
577 #endif
578   av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
579                           pc_root);
580 #if CONFIG_COLLECT_COMPONENT_TIMING
581   end_timing(cpi, nonrd_use_partition_time);
582 #endif
583 }
584 
585 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)586 static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
587                                      const TileDataEnc *tile_data,
588                                      SIMPLE_MOTION_DATA_TREE *sms_root,
589                                      RD_STATS *rd_cost, int mi_row, int mi_col,
590                                      int gather_tpl_data) {
591   const AV1_COMMON *cm = &cpi->common;
592   const TileInfo *tile_info = &tile_data->tile_info;
593   MACROBLOCK *x = &td->mb;
594 
595   const SPEED_FEATURES *sf = &cpi->sf;
596   const int use_simple_motion_search =
597       (sf->part_sf.simple_motion_search_split ||
598        sf->part_sf.simple_motion_search_prune_rect ||
599        sf->part_sf.simple_motion_search_early_term_none ||
600        sf->part_sf.ml_early_term_after_part_split_level) &&
601       !frame_is_intra_only(cm);
602   if (use_simple_motion_search) {
603     av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
604                                              mi_row, mi_col);
605   }
606 
607 #if !CONFIG_REALTIME_ONLY
608   if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
609         cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
610     init_ref_frame_space(cpi, td, mi_row, mi_col);
611     x->sb_energy_level = 0;
612     x->part_search_info.cnn_output_valid = 0;
613     if (gather_tpl_data) {
614       if (cm->delta_q_info.delta_q_present_flag) {
615         const int num_planes = av1_num_planes(cm);
616         const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
617         setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
618         av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
619       }
620 
621       // TODO(jingning): revisit this function.
622       if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
623         adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
624       }
625     }
626   }
627 #else
628   (void)tile_info;
629   (void)mi_row;
630   (void)mi_col;
631   (void)gather_tpl_data;
632 #endif
633 
634   x->reuse_inter_pred = false;
635   x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
636   reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
637   av1_zero(x->picked_ref_frames_mask);
638   av1_invalid_rd_stats(rd_cost);
639 }
640 
641 #if !CONFIG_REALTIME_ONLY
sb_qp_sweep_init_quantizers(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_tree,RD_STATS * rd_cost,int mi_row,int mi_col,int delta_qp_ofs)642 static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
643                                         const TileDataEnc *tile_data,
644                                         SIMPLE_MOTION_DATA_TREE *sms_tree,
645                                         RD_STATS *rd_cost, int mi_row,
646                                         int mi_col, int delta_qp_ofs) {
647   AV1_COMMON *const cm = &cpi->common;
648   MACROBLOCK *const x = &td->mb;
649   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
650   const TileInfo *tile_info = &tile_data->tile_info;
651   const CommonModeInfoParams *const mi_params = &cm->mi_params;
652   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
653   assert(delta_q_info->delta_q_present_flag);
654   const int delta_q_res = delta_q_info->delta_q_res;
655 
656   const SPEED_FEATURES *sf = &cpi->sf;
657   const int use_simple_motion_search =
658       (sf->part_sf.simple_motion_search_split ||
659        sf->part_sf.simple_motion_search_prune_rect ||
660        sf->part_sf.simple_motion_search_early_term_none ||
661        sf->part_sf.ml_early_term_after_part_split_level) &&
662       !frame_is_intra_only(cm);
663   if (use_simple_motion_search) {
664     av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
665                                              mi_row, mi_col);
666   }
667 
668   int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
669 
670   MACROBLOCKD *const xd = &x->e_mbd;
671   current_qindex = av1_adjust_q_from_delta_q_res(
672       delta_q_res, xd->current_base_qindex, current_qindex);
673 
674   x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
675 
676   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
677   xd->mi[0]->current_qindex = current_qindex;
678   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
679 
680   // keep track of any non-zero delta-q used
681   td->deltaq_used |= (x->delta_qindex != 0);
682 
683   if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
684     const int delta_lf_res = delta_q_info->delta_lf_res;
685     const int lfmask = ~(delta_lf_res - 1);
686     const int delta_lf_from_base =
687         ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
688     const int8_t delta_lf =
689         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
690     const int frame_lf_count =
691         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
692     const int mib_size = cm->seq_params->mib_size;
693 
694     // pre-set the delta lf for loop filter. Note that this value is set
695     // before mi is assigned for each block in current superblock
696     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
697       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
698         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
699         mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
700         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
701           mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
702         }
703       }
704     }
705   }
706 
707   x->reuse_inter_pred = false;
708   x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
709   reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
710   av1_zero(x->picked_ref_frames_mask);
711   av1_invalid_rd_stats(rd_cost);
712 }
713 
sb_qp_sweep(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,SIMPLE_MOTION_DATA_TREE * sms_tree,SB_FIRST_PASS_STATS * sb_org_stats)714 static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
715                        TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
716                        int mi_col, BLOCK_SIZE bsize,
717                        SIMPLE_MOTION_DATA_TREE *sms_tree,
718                        SB_FIRST_PASS_STATS *sb_org_stats) {
719   AV1_COMMON *const cm = &cpi->common;
720   MACROBLOCK *const x = &td->mb;
721   RD_STATS rdc_winner, cur_rdc;
722   av1_invalid_rd_stats(&rdc_winner);
723 
724   int best_qindex = td->mb.rdmult_delta_qindex;
725   const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
726   const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
727   const int step = cm->delta_q_info.delta_q_res;
728 
729   for (int sweep_qp_delta = start; sweep_qp_delta <= end;
730        sweep_qp_delta += step) {
731     sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
732                                 mi_col, sweep_qp_delta);
733 
734     const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
735     const int backup_current_qindex =
736         cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
737 
738     av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
739     av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
740     cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
741 
742     td->pc_root = av1_alloc_pc_tree_node(bsize);
743     if (!td->pc_root)
744       aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
745                          "Failed to allocate PC_TREE");
746     av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
747                           &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
748                           SB_DRY_PASS, NULL);
749 
750     if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
751         (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
752          rdc_winner.rdcost == cur_rdc.rdcost)) {
753       rdc_winner = cur_rdc;
754       best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
755     }
756   }
757 
758   return best_qindex;
759 }
760 #endif  //! CONFIG_REALTIME_ONLY
761 
762 /*!\brief Encode a superblock (RD-search-based)
763  *
764  * \ingroup partition_search
765  * Conducts partition search for a superblock, based on rate-distortion costs,
766  * from scratch or adjusting from a pre-calculated partition pattern.
767  */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)768 static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
769                                 TileDataEnc *tile_data, TokenExtra **tp,
770                                 const int mi_row, const int mi_col,
771                                 const int seg_skip) {
772   AV1_COMMON *const cm = &cpi->common;
773   MACROBLOCK *const x = &td->mb;
774   MACROBLOCKD *const xd = &x->e_mbd;
775   const SPEED_FEATURES *const sf = &cpi->sf;
776   const TileInfo *const tile_info = &tile_data->tile_info;
777   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
778                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
779   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
780   const int num_planes = av1_num_planes(cm);
781   int dummy_rate;
782   int64_t dummy_dist;
783   RD_STATS dummy_rdc;
784   SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
785 
786 #if CONFIG_REALTIME_ONLY
787   (void)seg_skip;
788 #endif  // CONFIG_REALTIME_ONLY
789 
790   init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
791                     1);
792 
793   // Encode the superblock
794   if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
795     // partition search starting from a variance-based partition
796     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
797     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
798 
799 #if CONFIG_COLLECT_COMPONENT_TIMING
800     start_timing(cpi, rd_use_partition_time);
801 #endif
802     td->pc_root = av1_alloc_pc_tree_node(sb_size);
803     if (!td->pc_root)
804       aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
805                          "Failed to allocate PC_TREE");
806     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
807                          &dummy_rate, &dummy_dist, 1, td->pc_root);
808     av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
809                                sf->part_sf.partition_search_type);
810     td->pc_root = NULL;
811 #if CONFIG_COLLECT_COMPONENT_TIMING
812     end_timing(cpi, rd_use_partition_time);
813 #endif
814   }
815 #if !CONFIG_REALTIME_ONLY
816   else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
817     // partition search by adjusting a fixed-size partition
818     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
819     const BLOCK_SIZE bsize =
820         seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
821     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
822     td->pc_root = av1_alloc_pc_tree_node(sb_size);
823     if (!td->pc_root)
824       aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
825                          "Failed to allocate PC_TREE");
826     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
827                          &dummy_rate, &dummy_dist, 1, td->pc_root);
828     av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
829                                sf->part_sf.partition_search_type);
830     td->pc_root = NULL;
831   } else {
832     // The most exhaustive recursive partition search
833     SuperBlockEnc *sb_enc = &x->sb_enc;
834     // No stats for overlay frames. Exclude key frame.
835     av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
836 
837     // Reset the tree for simple motion search data
838     av1_reset_simple_motion_tree_partition(sms_root, sb_size);
839 
840 #if CONFIG_COLLECT_COMPONENT_TIMING
841     start_timing(cpi, rd_pick_partition_time);
842 #endif
843 
844     // Estimate the maximum square partition block size, which will be used
845     // as the starting block size for partitioning the sb
846     set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
847 
848     // The superblock can be searched only once, or twice consecutively for
849     // better quality. Note that the meaning of passes here is different from
850     // the general concept of 1-pass/2-pass encoders.
851     const int num_passes =
852         cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
853 
854     if (cpi->oxcf.sb_qp_sweep &&
855         !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
856           cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
857         cm->delta_q_info.delta_q_present_flag) {
858       AOM_CHECK_MEM_ERROR(
859           x->e_mbd.error_info, td->mb.sb_stats_cache,
860           (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
861       av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
862                           mi_col);
863       assert(x->rdmult_delta_qindex == x->delta_qindex);
864 
865       const int best_qp_diff =
866           sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
867                       td->mb.sb_stats_cache) -
868           x->rdmult_delta_qindex;
869 
870       sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
871                                   mi_row, mi_col, best_qp_diff);
872 
873       const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
874       const int backup_current_qindex =
875           cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
876 
877       av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
878       av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
879                            mi_col);
880 
881       cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
882           backup_current_qindex;
883       aom_free(td->mb.sb_stats_cache);
884       td->mb.sb_stats_cache = NULL;
885     }
886     if (num_passes == 1) {
887 #if CONFIG_PARTITION_SEARCH_ORDER
888       if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
889         av1_reset_part_sf(&cpi->sf.part_sf);
890         av1_reset_sf_for_ext_part(cpi);
891         RD_STATS this_rdc;
892         av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
893                                 mi_col, sb_size, &this_rdc);
894       } else {
895         td->pc_root = av1_alloc_pc_tree_node(sb_size);
896         if (!td->pc_root)
897           aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
898                              "Failed to allocate PC_TREE");
899         av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
900                               &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
901                               NULL, SB_SINGLE_PASS, NULL);
902       }
903 #else
904       td->pc_root = av1_alloc_pc_tree_node(sb_size);
905       if (!td->pc_root)
906         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
907                            "Failed to allocate PC_TREE");
908       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
909                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
910                             SB_SINGLE_PASS, NULL);
911 #endif  // CONFIG_PARTITION_SEARCH_ORDER
912     } else {
913       // First pass
914       AOM_CHECK_MEM_ERROR(
915           x->e_mbd.error_info, td->mb.sb_fp_stats,
916           (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
917       av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
918                           mi_col);
919       td->pc_root = av1_alloc_pc_tree_node(sb_size);
920       if (!td->pc_root)
921         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
922                            "Failed to allocate PC_TREE");
923       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
924                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
925                             SB_DRY_PASS, NULL);
926 
927       // Second pass
928       init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
929                         mi_col, 0);
930       av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
931       av1_reset_simple_motion_tree_partition(sms_root, sb_size);
932 
933       av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
934                            mi_col);
935 
936       td->pc_root = av1_alloc_pc_tree_node(sb_size);
937       if (!td->pc_root)
938         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
939                            "Failed to allocate PC_TREE");
940       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
941                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
942                             SB_WET_PASS, NULL);
943       aom_free(td->mb.sb_fp_stats);
944       td->mb.sb_fp_stats = NULL;
945     }
946 
947     // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
948     sb_enc->tpl_data_count = 0;
949 #if CONFIG_COLLECT_COMPONENT_TIMING
950     end_timing(cpi, rd_pick_partition_time);
951 #endif
952   }
953 #endif  // !CONFIG_REALTIME_ONLY
954 
955   // Update the inter rd model
956   // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
957   if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
958       cm->tiles.cols == 1 && cm->tiles.rows == 1) {
959     av1_inter_mode_data_fit(tile_data, x->rdmult);
960   }
961 }
962 
963 // Check if the cost update of symbols mode, coeff and dv are tile or off.
is_mode_coeff_dv_upd_freq_tile_or_off(const AV1_COMP * const cpi)964 static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
965     const AV1_COMP *const cpi) {
966   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
967 
968   return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
969           inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
970           cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
971 }
972 
973 // When row-mt is enabled and cost update frequencies are set to off/tile,
974 // processing of current SB can start even before processing of top-right SB
975 // is finished. This function checks if it is sufficient to wait for top SB
976 // to finish processing before current SB starts processing.
delay_wait_for_top_right_sb(const AV1_COMP * const cpi)977 static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
978   const MODE mode = cpi->oxcf.mode;
979   if (mode == GOOD) return 0;
980 
981   if (mode == ALLINTRA)
982     return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
983   else if (mode == REALTIME)
984     return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
985             cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
986   else
987     return 0;
988 }
989 
990 /*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
991  *
992  * \ingroup partition_search
993  * \callgraph
994  * \callergraph
995  */
get_sb_source_sad(const AV1_COMP * cpi,int mi_row,int mi_col)996 static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
997                                          int mi_col) {
998   if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
999 
1000   const AV1_COMMON *const cm = &cpi->common;
1001   const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1002                                    ? (cm->seq_params->mib_size >> 1)
1003                                    : cm->seq_params->mib_size;
1004   const int num_blk_64x64_cols =
1005       (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1006   const int num_blk_64x64_rows =
1007       (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1008   const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1009   const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1010   uint64_t curr_sb_sad = UINT64_MAX;
1011   // Avoid the border as sad_blk_64x64 may not be set for the border
1012   // in the scene detection.
1013   if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1014       (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1015     return curr_sb_sad;
1016   }
1017   const uint64_t *const src_sad_blk_64x64_data =
1018       &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1019                               blk_64x64_row_index * num_blk_64x64_cols];
1020   if (cm->seq_params->sb_size == BLOCK_128X128) {
1021     // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1022     // superblock
1023     curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1024                   src_sad_blk_64x64_data[num_blk_64x64_cols] +
1025                   src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1026   } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1027     curr_sb_sad = src_sad_blk_64x64_data[0];
1028   }
1029   return curr_sb_sad;
1030 }
1031 
1032 /*!\brief Determine whether grading content can be skipped based on sad stat
1033  *
1034  * \ingroup partition_search
1035  * \callgraph
1036  * \callergraph
1037  */
is_calc_src_content_needed(AV1_COMP * cpi,MACROBLOCK * const x,int mi_row,int mi_col)1038 static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1039                                               MACROBLOCK *const x, int mi_row,
1040                                               int mi_col) {
1041   if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1042     return true;
1043   const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1044   if (curr_sb_sad == UINT64_MAX) return true;
1045   if (curr_sb_sad == 0) {
1046     x->content_state_sb.source_sad_nonrd = kZeroSad;
1047     return false;
1048   }
1049   AV1_COMMON *const cm = &cpi->common;
1050   bool do_calc_src_content = true;
1051 
1052   if (cpi->oxcf.speed < 9) return do_calc_src_content;
1053 
1054   // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1055   if (AOMMIN(cm->width, cm->height) < 360) {
1056     // Derive Average 64x64 block source SAD from SB source SAD
1057     const uint64_t avg_64x64_blk_sad =
1058         (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1059                                                    : curr_sb_sad;
1060 
1061     // The threshold is determined based on kLowSad and kHighSad threshold and
1062     // test results.
1063     uint64_t thresh_low = 15000;
1064     uint64_t thresh_high = 40000;
1065 
1066     if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1067       thresh_low = thresh_low << 1;
1068       thresh_high = thresh_high << 1;
1069     }
1070 
1071     if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1072       do_calc_src_content = false;
1073       // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1074       // to RTC rd path.
1075       x->content_state_sb.source_sad_nonrd = kMedSad;
1076     }
1077   }
1078 
1079   return do_calc_src_content;
1080 }
1081 
1082 /*!\brief Determine whether grading content is needed based on sf and frame stat
1083  *
1084  * \ingroup partition_search
1085  * \callgraph
1086  * \callergraph
1087  */
1088 // TODO(any): consolidate sfs to make interface cleaner
grade_source_content_sb(AV1_COMP * cpi,MACROBLOCK * const x,TileDataEnc * tile_data,int mi_row,int mi_col)1089 static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1090                                            TileDataEnc *tile_data, int mi_row,
1091                                            int mi_col) {
1092   AV1_COMMON *const cm = &cpi->common;
1093   if (cm->current_frame.frame_type == KEY_FRAME ||
1094       (cpi->ppi->use_svc &&
1095        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1096     assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1097     assert(x->content_state_sb.source_sad_rd == kMedSad);
1098     return;
1099   }
1100   bool calc_src_content = false;
1101 
1102   if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1103     if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1104       calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1105     } else {
1106       x->content_state_sb.source_sad_nonrd = kZeroSad;
1107     }
1108   } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1109              (cm->width * cm->height <= 352 * 288)) {
1110     if (cpi->rc.frame_source_sad > 0)
1111       calc_src_content = true;
1112     else
1113       x->content_state_sb.source_sad_rd = kZeroSad;
1114   }
1115   if (calc_src_content)
1116     av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1117 }
1118 
1119 /*!\brief Encode a superblock row by breaking it into superblocks
1120  *
1121  * \ingroup partition_search
1122  * \callgraph
1123  * \callergraph
1124  * Do partition and mode search for an sb row: one row of superblocks filling up
1125  * the width of the current tile.
1126  */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)1127 static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1128                                  TileDataEnc *tile_data, int mi_row,
1129                                  TokenExtra **tp) {
1130   AV1_COMMON *const cm = &cpi->common;
1131   const TileInfo *const tile_info = &tile_data->tile_info;
1132   MultiThreadInfo *const mt_info = &cpi->mt_info;
1133   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1134   AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1135   bool row_mt_enabled = mt_info->row_mt_enabled;
1136   MACROBLOCK *const x = &td->mb;
1137   MACROBLOCKD *const xd = &x->e_mbd;
1138   const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1139   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1140   const int mib_size = cm->seq_params->mib_size;
1141   const int mib_size_log2 = cm->seq_params->mib_size_log2;
1142   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1143   const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1144 
1145 #if CONFIG_COLLECT_COMPONENT_TIMING
1146   start_timing(cpi, encode_sb_row_time);
1147 #endif
1148 
1149   // Initialize the left context for the new SB row
1150   av1_zero_left_context(xd);
1151 
1152   // Reset delta for quantizer and loof filters at the beginning of every tile
1153   if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1154     if (cm->delta_q_info.delta_q_present_flag)
1155       xd->current_base_qindex = cm->quant_params.base_qindex;
1156     if (cm->delta_q_info.delta_lf_present_flag) {
1157       av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1158     }
1159   }
1160 
1161   reset_thresh_freq_fact(x);
1162 
1163   // Code each SB in the row
1164   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1165        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1166     // In realtime/allintra mode and when frequency of cost updates is off/tile,
1167     // wait for the top superblock to finish encoding. Otherwise, wait for the
1168     // top-right superblock to finish encoding.
1169     enc_row_mt->sync_read_ptr(
1170         row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1171 
1172 #if CONFIG_MULTITHREAD
1173     if (row_mt_enabled) {
1174       pthread_mutex_lock(enc_row_mt->mutex_);
1175       const bool row_mt_exit = enc_row_mt->row_mt_exit;
1176       pthread_mutex_unlock(enc_row_mt->mutex_);
1177       // Exit in case any worker has encountered an error.
1178       if (row_mt_exit) return;
1179     }
1180 #endif
1181 
1182     const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1183     if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1184       if ((tile_info->mi_col_start == mi_col)) {
1185         // restore frame context at the 1st column sb
1186         memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
1187       } else {
1188         // update context
1189         int wt_left = AVG_CDF_WEIGHT_LEFT;
1190         int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1191         if (tile_info->mi_col_end > (mi_col + mib_size))
1192           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1193                               wt_left, wt_tr);
1194         else
1195           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1196                               wt_left, wt_tr);
1197       }
1198     }
1199 
1200     // Update the rate cost tables for some symbols
1201     av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1202 
1203     // Reset color coding related parameters
1204     av1_zero(x->color_sensitivity_sb);
1205     av1_zero(x->color_sensitivity_sb_g);
1206     av1_zero(x->color_sensitivity_sb_alt);
1207     av1_zero(x->color_sensitivity);
1208     x->content_state_sb.source_sad_nonrd = kMedSad;
1209     x->content_state_sb.source_sad_rd = kMedSad;
1210     x->content_state_sb.lighting_change = 0;
1211     x->content_state_sb.low_sumdiff = 0;
1212     x->force_zeromv_skip_for_sb = 0;
1213     x->sb_me_block = 0;
1214     x->sb_me_partition = 0;
1215     x->sb_me_mv.as_int = 0;
1216     x->sb_force_fixed_part = 1;
1217     x->color_palette_thresh = 64;
1218     x->nonrd_prune_ref_frame_search =
1219         cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1220 
1221     if (cpi->oxcf.mode == ALLINTRA) {
1222       x->intra_sb_rdmult_modifier = 128;
1223     }
1224 
1225     xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1226     x->source_variance = UINT_MAX;
1227     td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1228 
1229     // Get segment id and skip flag
1230     const struct segmentation *const seg = &cm->seg;
1231     int seg_skip = 0;
1232     if (seg->enabled) {
1233       const uint8_t *const map =
1234           seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1235       const uint8_t segment_id =
1236           map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1237               : 0;
1238       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1239     }
1240 
1241     produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1242 
1243     init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1244                                         sb_size);
1245 
1246     // Grade the temporal variation of the sb, the grade will be used to decide
1247     // fast mode search strategy for coding blocks
1248     if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1249 
1250     // encode the superblock
1251     if (use_nonrd_mode) {
1252       encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1253     } else {
1254       encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1255     }
1256 
1257     // Update the top-right context in row_mt coding
1258     if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1259       if (sb_cols_in_tile == 1)
1260         memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
1261       else if (sb_col_in_tile >= 1)
1262         memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
1263                sizeof(*xd->tile_ctx));
1264     }
1265     enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1266                                sb_cols_in_tile);
1267   }
1268 
1269 #if CONFIG_COLLECT_COMPONENT_TIMING
1270   end_timing(cpi, encode_sb_row_time);
1271 #endif
1272 }
1273 
init_encode_frame_mb_context(AV1_COMP * cpi)1274 static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1275   AV1_COMMON *const cm = &cpi->common;
1276   const int num_planes = av1_num_planes(cm);
1277   MACROBLOCK *const x = &cpi->td.mb;
1278   MACROBLOCKD *const xd = &x->e_mbd;
1279 
1280   // Copy data over into macro block data structures.
1281   av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1282                        cm->seq_params->sb_size);
1283 
1284   av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1285                          cm->seq_params->subsampling_y, num_planes);
1286 }
1287 
av1_alloc_tile_data(AV1_COMP * cpi)1288 void av1_alloc_tile_data(AV1_COMP *cpi) {
1289   AV1_COMMON *const cm = &cpi->common;
1290   AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1291   const int tile_cols = cm->tiles.cols;
1292   const int tile_rows = cm->tiles.rows;
1293 
1294   av1_row_mt_mem_dealloc(cpi);
1295 
1296   aom_free(cpi->tile_data);
1297   cpi->allocated_tiles = 0;
1298   enc_row_mt->allocated_tile_cols = 0;
1299   enc_row_mt->allocated_tile_rows = 0;
1300 
1301   CHECK_MEM_ERROR(
1302       cm, cpi->tile_data,
1303       aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1304 
1305   cpi->allocated_tiles = tile_cols * tile_rows;
1306   enc_row_mt->allocated_tile_cols = tile_cols;
1307   enc_row_mt->allocated_tile_rows = tile_rows;
1308   for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1309     for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1310       const int tile_index = tile_row * tile_cols + tile_col;
1311       TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1312       av1_zero(this_tile->row_mt_sync);
1313       this_tile->row_ctx = NULL;
1314     }
1315   }
1316 }
1317 
av1_init_tile_data(AV1_COMP * cpi)1318 void av1_init_tile_data(AV1_COMP *cpi) {
1319   AV1_COMMON *const cm = &cpi->common;
1320   const int num_planes = av1_num_planes(cm);
1321   const int tile_cols = cm->tiles.cols;
1322   const int tile_rows = cm->tiles.rows;
1323   int tile_col, tile_row;
1324   TokenInfo *const token_info = &cpi->token_info;
1325   TokenExtra *pre_tok = token_info->tile_tok[0][0];
1326   TokenList *tplist = token_info->tplist[0][0];
1327   unsigned int tile_tok = 0;
1328   int tplist_count = 0;
1329 
1330   if (!is_stat_generation_stage(cpi) &&
1331       cm->features.allow_screen_content_tools) {
1332     // Number of tokens for which token info needs to be allocated.
1333     unsigned int tokens_required =
1334         get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1335                         MAX_SB_SIZE_LOG2, num_planes);
1336     // Allocate/reallocate memory for token related info if the number of tokens
1337     // required is more than the number of tokens already allocated. This could
1338     // occur in case of the following:
1339     // 1) If the memory is not yet allocated
1340     // 2) If the frame dimensions have changed
1341     const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1342     if (realloc_tokens) {
1343       free_token_info(token_info);
1344       alloc_token_info(cm, token_info, tokens_required);
1345       pre_tok = token_info->tile_tok[0][0];
1346       tplist = token_info->tplist[0][0];
1347     }
1348   }
1349 
1350   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1351     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1352       TileDataEnc *const tile_data =
1353           &cpi->tile_data[tile_row * tile_cols + tile_col];
1354       TileInfo *const tile_info = &tile_data->tile_info;
1355       av1_tile_init(tile_info, cm, tile_row, tile_col);
1356       tile_data->firstpass_top_mv = kZeroMv;
1357       tile_data->abs_sum_level = 0;
1358 
1359       if (is_token_info_allocated(token_info)) {
1360         token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1361         pre_tok = token_info->tile_tok[tile_row][tile_col];
1362         tile_tok = allocated_tokens(
1363             tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1364             num_planes);
1365         token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1366         tplist = token_info->tplist[tile_row][tile_col];
1367         tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1368       }
1369       tile_data->allow_update_cdf = !cm->tiles.large_scale;
1370       tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1371                                     !cm->features.disable_cdf_update &&
1372                                     !delay_wait_for_top_right_sb(cpi);
1373       tile_data->tctx = *cm->fc;
1374     }
1375   }
1376 }
1377 
1378 // Populate the start palette token info prior to encoding an SB row.
get_token_start(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra ** tp)1379 static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1380                                    int tile_row, int tile_col, int mi_row,
1381                                    TokenExtra **tp) {
1382   const TokenInfo *token_info = &cpi->token_info;
1383   if (!is_token_info_allocated(token_info)) return;
1384 
1385   const AV1_COMMON *cm = &cpi->common;
1386   const int num_planes = av1_num_planes(cm);
1387   TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1388   const int sb_row_in_tile =
1389       (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1390 
1391   get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1392                 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1393   assert(tplist != NULL);
1394   tplist[sb_row_in_tile].start = *tp;
1395 }
1396 
1397 // Populate the token count after encoding an SB row.
populate_token_count(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra * tok)1398 static inline void populate_token_count(AV1_COMP *cpi,
1399                                         const TileInfo *tile_info, int tile_row,
1400                                         int tile_col, int mi_row,
1401                                         TokenExtra *tok) {
1402   const TokenInfo *token_info = &cpi->token_info;
1403   if (!is_token_info_allocated(token_info)) return;
1404 
1405   const AV1_COMMON *cm = &cpi->common;
1406   const int num_planes = av1_num_planes(cm);
1407   TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1408   const int sb_row_in_tile =
1409       (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1410   const int tile_mb_cols =
1411       (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1412   const int num_mb_rows_in_sb =
1413       ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1414   tplist[sb_row_in_tile].count =
1415       (unsigned int)(tok - tplist[sb_row_in_tile].start);
1416 
1417   assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1418          get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1419                          cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1420                          num_planes));
1421 
1422   (void)num_planes;
1423   (void)tile_mb_cols;
1424   (void)num_mb_rows_in_sb;
1425 }
1426 
1427 /*!\brief Encode a superblock row
1428  *
1429  * \ingroup partition_search
1430  */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)1431 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1432                        int tile_col, int mi_row) {
1433   AV1_COMMON *const cm = &cpi->common;
1434   const int tile_cols = cm->tiles.cols;
1435   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1436   const TileInfo *const tile_info = &this_tile->tile_info;
1437   TokenExtra *tok = NULL;
1438 
1439   get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1440 
1441   encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1442 
1443   populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1444 }
1445 
1446 /*!\brief Encode a tile
1447  *
1448  * \ingroup partition_search
1449  */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)1450 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1451                      int tile_col) {
1452   AV1_COMMON *const cm = &cpi->common;
1453   TileDataEnc *const this_tile =
1454       &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1455   const TileInfo *const tile_info = &this_tile->tile_info;
1456 
1457   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1458 
1459   av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1460                          tile_info->mi_col_end, tile_row);
1461   av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1462                          &td->mb.e_mbd);
1463 
1464 #if !CONFIG_REALTIME_ONLY
1465   if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1466     cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1467 #endif
1468 
1469   if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1470     av1_crc32c_calculator_init(
1471         &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1472   }
1473 
1474   for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1475        mi_row += cm->seq_params->mib_size) {
1476     av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1477   }
1478   this_tile->abs_sum_level = td->abs_sum_level;
1479 }
1480 
1481 /*!\brief Break one frame into tiles and encode the tiles
1482  *
1483  * \ingroup partition_search
1484  *
1485  * \param[in]    cpi    Top-level encoder structure
1486  */
encode_tiles(AV1_COMP * cpi)1487 static inline void encode_tiles(AV1_COMP *cpi) {
1488   AV1_COMMON *const cm = &cpi->common;
1489   const int tile_cols = cm->tiles.cols;
1490   const int tile_rows = cm->tiles.rows;
1491   int tile_col, tile_row;
1492 
1493   MACROBLOCK *const mb = &cpi->td.mb;
1494   assert(IMPLIES(cpi->tile_data == NULL,
1495                  cpi->allocated_tiles < tile_cols * tile_rows));
1496   if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1497 
1498   av1_init_tile_data(cpi);
1499   av1_alloc_mb_data(cpi, mb);
1500 
1501   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1502     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1503       TileDataEnc *const this_tile =
1504           &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1505       cpi->td.intrabc_used = 0;
1506       cpi->td.deltaq_used = 0;
1507       cpi->td.abs_sum_level = 0;
1508       cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1509       cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1510       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1511       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1512       av1_init_rtc_counters(&cpi->td.mb);
1513       cpi->td.mb.palette_pixels = 0;
1514       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1515       if (!frame_is_intra_only(&cpi->common))
1516         av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1517       cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1518       cpi->intrabc_used |= cpi->td.intrabc_used;
1519       cpi->deltaq_used |= cpi->td.deltaq_used;
1520     }
1521   }
1522 
1523   av1_dealloc_mb_data(mb, av1_num_planes(cm));
1524 }
1525 
1526 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1527 static inline void set_rel_frame_dist(
1528     const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1529     const int ref_frame_flags) {
1530   MV_REFERENCE_FRAME ref_frame;
1531   int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1532   ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1533   ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1534   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1535     ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1536     if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1537       int dist = av1_encoder_get_relative_dist(
1538           cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1539           cm->current_frame.display_order_hint);
1540       ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1541       // Get the nearest ref_frame in the past
1542       if (abs(dist) < min_past_dist && dist < 0) {
1543         ref_frame_dist_info->nearest_past_ref = ref_frame;
1544         min_past_dist = abs(dist);
1545       }
1546       // Get the nearest ref_frame in the future
1547       if (dist < min_future_dist && dist > 0) {
1548         ref_frame_dist_info->nearest_future_ref = ref_frame;
1549         min_future_dist = dist;
1550       }
1551     }
1552   }
1553 }
1554 
refs_are_one_sided(const AV1_COMMON * cm)1555 static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1556   assert(!frame_is_intra_only(cm));
1557 
1558   int one_sided_refs = 1;
1559   const int cur_display_order_hint = cm->current_frame.display_order_hint;
1560   for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1561     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1562     if (buf == NULL) continue;
1563     if (av1_encoder_get_relative_dist(buf->display_order_hint,
1564                                       cur_display_order_hint) > 0) {
1565       one_sided_refs = 0;  // bwd reference
1566       break;
1567     }
1568   }
1569   return one_sided_refs;
1570 }
1571 
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1572 static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1573                                              int ref_order_hint[2]) {
1574   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1575   ref_order_hint[0] = ref_order_hint[1] = 0;
1576   if (!skip_mode_info->skip_mode_allowed) return;
1577 
1578   const RefCntBuffer *const buf_0 =
1579       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1580   const RefCntBuffer *const buf_1 =
1581       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1582   assert(buf_0 != NULL && buf_1 != NULL);
1583 
1584   ref_order_hint[0] = buf_0->order_hint;
1585   ref_order_hint[1] = buf_1->order_hint;
1586 }
1587 
check_skip_mode_enabled(AV1_COMP * const cpi)1588 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1589   AV1_COMMON *const cm = &cpi->common;
1590 
1591   av1_setup_skip_mode_allowed(cm);
1592   if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1593 
1594   // Turn off skip mode if the temporal distances of the reference pair to the
1595   // current frame are different by more than 1 frame.
1596   const int cur_offset = (int)cm->current_frame.order_hint;
1597   int ref_offset[2];
1598   get_skip_mode_ref_offsets(cm, ref_offset);
1599   const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1600                                             cur_offset, ref_offset[0]);
1601   const int cur_to_ref1 = abs(get_relative_dist(
1602       &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1603   if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1604 
1605   // High Latency: Turn off skip mode if all refs are fwd.
1606   if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1607 
1608   const int ref_frame[2] = {
1609     cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1610     cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1611   };
1612   if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1613       !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1614     return 0;
1615 
1616   return 1;
1617 }
1618 
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1619 static inline void set_default_interp_skip_flags(
1620     const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1621   const int num_planes = av1_num_planes(cm);
1622   interp_search_flags->default_interp_skip_flags =
1623       (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1624                         : INTERP_SKIP_LUMA_SKIP_CHROMA;
1625 }
1626 
setup_prune_ref_frame_mask(AV1_COMP * cpi)1627 static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1628   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1629        cpi->sf.inter_sf.disable_onesided_comp) &&
1630       cpi->all_one_sided_refs) {
1631     // Disable all compound references
1632     cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1633   } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1634              cpi->sf.inter_sf.selective_ref_frame >= 2) {
1635     AV1_COMMON *const cm = &cpi->common;
1636     const int cur_frame_display_order_hint =
1637         cm->current_frame.display_order_hint;
1638     unsigned int *ref_display_order_hint =
1639         cm->cur_frame->ref_display_order_hint;
1640     const int arf2_dist = av1_encoder_get_relative_dist(
1641         ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1642         cur_frame_display_order_hint);
1643     const int bwd_dist = av1_encoder_get_relative_dist(
1644         ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1645         cur_frame_display_order_hint);
1646 
1647     for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1648       MV_REFERENCE_FRAME rf[2];
1649       av1_set_ref_frame(rf, ref_idx);
1650       if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1651           !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1652         continue;
1653       }
1654 
1655       if (!cpi->all_one_sided_refs) {
1656         int ref_dist[2];
1657         for (int i = 0; i < 2; ++i) {
1658           ref_dist[i] = av1_encoder_get_relative_dist(
1659               ref_display_order_hint[rf[i] - LAST_FRAME],
1660               cur_frame_display_order_hint);
1661         }
1662 
1663         // One-sided compound is used only when all reference frames are
1664         // one-sided.
1665         if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1666           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1667         }
1668       }
1669 
1670       if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1671           (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1672           (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1673         // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1674         if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1675           // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1676           // reference to the current frame than ALTREF2_FRAME
1677           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1678         }
1679       }
1680     }
1681   }
1682 }
1683 
allow_deltaq_mode(AV1_COMP * cpi)1684 static int allow_deltaq_mode(AV1_COMP *cpi) {
1685 #if !CONFIG_REALTIME_ONLY
1686   AV1_COMMON *const cm = &cpi->common;
1687   BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1688   int sbs_wide = mi_size_wide[sb_size];
1689   int sbs_high = mi_size_high[sb_size];
1690 
1691   int64_t delta_rdcost = 0;
1692   for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1693     for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1694       int64_t this_delta_rdcost = 0;
1695       av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1696                                      mi_row, mi_col);
1697       delta_rdcost += this_delta_rdcost;
1698     }
1699   }
1700   return delta_rdcost < 0;
1701 #else
1702   (void)cpi;
1703   return 1;
1704 #endif  // !CONFIG_REALTIME_ONLY
1705 }
1706 
1707 #define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1708 #define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1709 
1710 // Populates block level thresholds for force zeromv-skip decision
populate_thresh_to_force_zeromv_skip(AV1_COMP * cpi)1711 static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1712   if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1713 
1714   // Threshold for forcing zeromv-skip decision is as below:
1715   // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1716   // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1717   // allowing slightly higher error for smaller blocks.
1718   // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1719   // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1720   // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1721   // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1722   // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1723   // small blocks, the same is clipped to 4.
1724   const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1725   const int num_128x128_pix =
1726       block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1727 
1728   for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1729     const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1730 
1731     // Calculate the threshold for zeromv-skip decision based on area of the
1732     // partition
1733     unsigned int thresh_exit_part_blk =
1734         (unsigned int)(thresh_exit_128x128_part *
1735                            sqrt((double)num_block_pix / num_128x128_pix) +
1736                        0.5);
1737     thresh_exit_part_blk = AOMMIN(
1738         thresh_exit_part_blk,
1739         (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1740     cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1741   }
1742 }
1743 
free_block_hash_buffers(uint32_t * block_hash_values[2][2],int8_t * is_block_same[2][3])1744 static void free_block_hash_buffers(uint32_t *block_hash_values[2][2],
1745                                     int8_t *is_block_same[2][3]) {
1746   for (int k = 0; k < 2; ++k) {
1747     for (int j = 0; j < 2; ++j) {
1748       aom_free(block_hash_values[k][j]);
1749     }
1750 
1751     for (int j = 0; j < 3; ++j) {
1752       aom_free(is_block_same[k][j]);
1753     }
1754   }
1755 }
1756 
1757 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1758  * for a single frame
1759  *
1760  * \ingroup high_level_algo
1761  */
encode_frame_internal(AV1_COMP * cpi)1762 static inline void encode_frame_internal(AV1_COMP *cpi) {
1763   ThreadData *const td = &cpi->td;
1764   MACROBLOCK *const x = &td->mb;
1765   AV1_COMMON *const cm = &cpi->common;
1766   CommonModeInfoParams *const mi_params = &cm->mi_params;
1767   FeatureFlags *const features = &cm->features;
1768   MACROBLOCKD *const xd = &x->e_mbd;
1769   RD_COUNTS *const rdc = &cpi->td.rd_counts;
1770 #if CONFIG_FPMT_TEST
1771   FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
1772   FrameProbInfo *const temp_frame_probs_simulation =
1773       &cpi->ppi->temp_frame_probs_simulation;
1774 #endif
1775   FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1776   IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1777   MultiThreadInfo *const mt_info = &cpi->mt_info;
1778   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1779   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1780   const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1781   int i;
1782 
1783   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1784     mi_params->setup_mi(mi_params);
1785   }
1786 
1787   set_mi_offsets(mi_params, xd, 0, 0);
1788 
1789   av1_zero(*td->counts);
1790   av1_zero(rdc->tx_type_used);
1791   av1_zero(rdc->obmc_used);
1792   av1_zero(rdc->warped_used);
1793   av1_zero(rdc->seg_tmp_pred_cost);
1794 
1795   // Reset the flag.
1796   cpi->intrabc_used = 0;
1797   // Need to disable intrabc when superres is selected
1798   if (av1_superres_scaled(cm)) {
1799     features->allow_intrabc = 0;
1800   }
1801 
1802   features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1803 
1804   if (features->allow_warped_motion &&
1805       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1806     const FRAME_UPDATE_TYPE update_type =
1807         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1808     int warped_probability =
1809 #if CONFIG_FPMT_TEST
1810         cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
1811             ? temp_frame_probs->warped_probs[update_type]
1812             :
1813 #endif  // CONFIG_FPMT_TEST
1814             frame_probs->warped_probs[update_type];
1815     if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
1816       features->allow_warped_motion = 0;
1817   }
1818 
1819   int hash_table_created = 0;
1820   if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1821       !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1822     // TODO(any): move this outside of the recoding loop to avoid recalculating
1823     // the hash table.
1824     // add to hash table
1825     const int pic_width = cpi->source->y_crop_width;
1826     const int pic_height = cpi->source->y_crop_height;
1827     uint32_t *block_hash_values[2][2] = { { NULL } };
1828     int8_t *is_block_same[2][3] = { { NULL } };
1829     int k, j;
1830     bool error = false;
1831 
1832     for (k = 0; k < 2 && !error; ++k) {
1833       for (j = 0; j < 2; ++j) {
1834         block_hash_values[k][j] = (uint32_t *)aom_malloc(
1835             sizeof(*block_hash_values[0][0]) * pic_width * pic_height);
1836         if (!block_hash_values[k][j]) {
1837           error = true;
1838           break;
1839         }
1840       }
1841 
1842       for (j = 0; j < 3 && !error; ++j) {
1843         is_block_same[k][j] = (int8_t *)aom_malloc(
1844             sizeof(*is_block_same[0][0]) * pic_width * pic_height);
1845         if (!is_block_same[k][j]) error = true;
1846       }
1847     }
1848 
1849     av1_hash_table_init(intrabc_hash_info);
1850     if (error ||
1851         !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
1852       free_block_hash_buffers(block_hash_values, is_block_same);
1853       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1854                          "Error allocating intrabc_hash_table and buffers");
1855     }
1856     hash_table_created = 1;
1857     av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1858                                       block_hash_values[0], is_block_same[0]);
1859     // Hash data generated for screen contents is used for intraBC ME
1860     const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1861     const int max_sb_size =
1862         (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1863     int src_idx = 0;
1864     for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1865       const int dst_idx = !src_idx;
1866       av1_generate_block_hash_value(
1867           intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1868           block_hash_values[dst_idx], is_block_same[src_idx],
1869           is_block_same[dst_idx]);
1870       if (size >= min_alloc_size) {
1871         if (!av1_add_to_hash_map_by_row_with_precal_data(
1872                 &intrabc_hash_info->intrabc_hash_table,
1873                 block_hash_values[dst_idx], is_block_same[dst_idx][2],
1874                 pic_width, pic_height, size)) {
1875           error = true;
1876           break;
1877         }
1878       }
1879     }
1880 
1881     free_block_hash_buffers(block_hash_values, is_block_same);
1882 
1883     if (error) {
1884       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1885                          "Error adding data to intrabc_hash_table");
1886     }
1887   }
1888 
1889   const CommonQuantParams *quant_params = &cm->quant_params;
1890   for (i = 0; i < MAX_SEGMENTS; ++i) {
1891     const int qindex =
1892         cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1893                         : quant_params->base_qindex;
1894     xd->lossless[i] =
1895         qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1896         quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1897         quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1898     if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1899     xd->qindex[i] = qindex;
1900     if (xd->lossless[i]) {
1901       cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1902     } else {
1903       cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1904     }
1905   }
1906   features->coded_lossless = is_coded_lossless(cm, xd);
1907   features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1908 
1909   // Fix delta q resolution for the moment
1910 
1911   cm->delta_q_info.delta_q_res = 0;
1912   if (cpi->use_ducky_encode) {
1913     cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
1914   } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1915     if (deltaq_mode == DELTA_Q_OBJECTIVE)
1916       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1917     else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1918       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1919     else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1920       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1921     else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1922       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1923     else if (deltaq_mode == DELTA_Q_HDR)
1924       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1925     // Set delta_q_present_flag before it is used for the first time
1926     cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1927     cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1928 
1929     // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1930     // is used for ineligible frames. That effectively will turn off row_mt
1931     // usage. Note objective delta_q and tpl eligible frames are only altref
1932     // frames currently.
1933     const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1934     if (cm->delta_q_info.delta_q_present_flag) {
1935       if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1936           gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
1937         cm->delta_q_info.delta_q_present_flag = 0;
1938 
1939       if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1940           cm->delta_q_info.delta_q_present_flag) {
1941         cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
1942       }
1943     }
1944 
1945     // Reset delta_q_used flag
1946     cpi->deltaq_used = 0;
1947 
1948     cm->delta_q_info.delta_lf_present_flag =
1949         cm->delta_q_info.delta_q_present_flag &&
1950         oxcf->tool_cfg.enable_deltalf_mode;
1951     cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
1952 
1953     // update delta_q_present_flag and delta_lf_present_flag based on
1954     // base_qindex
1955     cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
1956     cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
1957   } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
1958              cpi->svc.number_temporal_layers == 1) {
1959     cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
1960     cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
1961   }
1962   cpi->rc.cnt_zeromv = 0;
1963 
1964   av1_frame_init_quantizer(cpi);
1965   init_encode_frame_mb_context(cpi);
1966   set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
1967 
1968   if (cm->prev_frame && cm->prev_frame->seg.enabled)
1969     cm->last_frame_seg_map = cm->prev_frame->seg_map;
1970   else
1971     cm->last_frame_seg_map = NULL;
1972   if (features->allow_intrabc || features->coded_lossless) {
1973     av1_set_default_ref_deltas(cm->lf.ref_deltas);
1974     av1_set_default_mode_deltas(cm->lf.mode_deltas);
1975   } else if (cm->prev_frame) {
1976     memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
1977     memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
1978   }
1979   memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
1980   memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
1981 
1982   cpi->all_one_sided_refs =
1983       frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
1984 
1985   cpi->prune_ref_frame_mask = 0;
1986   // Figure out which ref frames can be skipped at frame level.
1987   setup_prune_ref_frame_mask(cpi);
1988 
1989   x->txfm_search_info.txb_split_count = 0;
1990 #if CONFIG_SPEED_STATS
1991   x->txfm_search_info.tx_search_count = 0;
1992 #endif  // CONFIG_SPEED_STATS
1993 
1994 #if !CONFIG_REALTIME_ONLY
1995 #if CONFIG_COLLECT_COMPONENT_TIMING
1996   start_timing(cpi, av1_compute_global_motion_time);
1997 #endif
1998   av1_compute_global_motion_facade(cpi);
1999 #if CONFIG_COLLECT_COMPONENT_TIMING
2000   end_timing(cpi, av1_compute_global_motion_time);
2001 #endif
2002 #endif  // !CONFIG_REALTIME_ONLY
2003 
2004 #if CONFIG_COLLECT_COMPONENT_TIMING
2005   start_timing(cpi, av1_setup_motion_field_time);
2006 #endif
2007   av1_calculate_ref_frame_side(cm);
2008   if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2009 #if CONFIG_COLLECT_COMPONENT_TIMING
2010   end_timing(cpi, av1_setup_motion_field_time);
2011 #endif
2012 
2013   cm->current_frame.skip_mode_info.skip_mode_flag =
2014       check_skip_mode_enabled(cpi);
2015 
2016   // Initialization of skip mode cost depends on the value of
2017   // 'skip_mode_flag'. This initialization happens in the function
2018   // av1_fill_mode_rates(), which is in turn called in
2019   // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2020   // has to be called after 'skip_mode_flag' is initialized.
2021   av1_initialize_rd_consts(cpi);
2022   av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2023   populate_thresh_to_force_zeromv_skip(cpi);
2024 
2025   enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2026   enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2027   mt_info->row_mt_enabled = 0;
2028   mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2029                                        cm->tiles.cols * cm->tiles.rows) > 1;
2030 
2031   if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2032     mt_info->row_mt_enabled = 1;
2033     enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2034     enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2035     av1_encode_tiles_row_mt(cpi);
2036   } else {
2037     if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2038       av1_encode_tiles_mt(cpi);
2039     } else {
2040       // Preallocate the pc_tree for realtime coding to reduce the cost of
2041       // memory allocation.
2042       const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2043       if (use_nonrd_mode) {
2044         td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2045         if (!td->pc_root)
2046           aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2047                              "Failed to allocate PC_TREE");
2048       } else {
2049         td->pc_root = NULL;
2050       }
2051 
2052       encode_tiles(cpi);
2053       av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2054                                  cpi->sf.part_sf.partition_search_type);
2055       td->pc_root = NULL;
2056     }
2057   }
2058 
2059   // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2060   if (features->allow_intrabc && !cpi->intrabc_used) {
2061     features->allow_intrabc = 0;
2062   }
2063   if (features->allow_intrabc) {
2064     cm->delta_q_info.delta_lf_present_flag = 0;
2065   }
2066 
2067   if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2068     cm->delta_q_info.delta_q_present_flag = 0;
2069   }
2070 
2071   // Set the transform size appropriately before bitstream creation
2072   const MODE_EVAL_TYPE eval_type =
2073       cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2074           ? WINNER_MODE_EVAL
2075           : DEFAULT_EVAL;
2076   const TX_SIZE_SEARCH_METHOD tx_search_type =
2077       cpi->winner_mode_params.tx_size_search_methods[eval_type];
2078   assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2079   features->tx_mode = select_tx_mode(cm, tx_search_type);
2080 
2081   // Retain the frame level probability update conditions for parallel frames.
2082   // These conditions will be consumed during postencode stage to update the
2083   // probability.
2084   if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2085     cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2086         cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2087     cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2088         (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2089          cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2090     cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2091         (features->allow_warped_motion &&
2092          cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2093     cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2094         (cm->current_frame.frame_type != KEY_FRAME &&
2095          cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2096          features->interp_filter == SWITCHABLE);
2097   }
2098 
2099   if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2100       ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2101         INT_MAX) &&
2102        (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2103     const FRAME_UPDATE_TYPE update_type =
2104         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2105     for (i = 0; i < TX_SIZES_ALL; i++) {
2106       int sum = 0;
2107       int j;
2108       int left = MAX_TX_TYPE_PROB;
2109 
2110       for (j = 0; j < TX_TYPES; j++)
2111         sum += cpi->td.rd_counts.tx_type_used[i][j];
2112 
2113       for (j = TX_TYPES - 1; j >= 0; j--) {
2114         int update_txtype_frameprobs = 1;
2115         const int new_prob =
2116             sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2117                         cpi->td.rd_counts.tx_type_used[i][j] / sum)
2118                 : (j ? 0 : MAX_TX_TYPE_PROB);
2119 #if CONFIG_FPMT_TEST
2120         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2121           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2122               0) {
2123             int prob =
2124                 (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2125                  new_prob) >>
2126                 1;
2127             left -= prob;
2128             if (j == 0) prob += left;
2129             temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2130                 prob;
2131             // Copy temp_frame_probs_simulation to temp_frame_probs
2132             for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2133                  update_type_idx++) {
2134               temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2135                   temp_frame_probs_simulation
2136                       ->tx_type_probs[update_type_idx][i][j];
2137             }
2138           }
2139           update_txtype_frameprobs = 0;
2140         }
2141 #endif  // CONFIG_FPMT_TEST
2142         // Track the frame probabilities of parallel encode frames to update
2143         // during postencode stage.
2144         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2145           update_txtype_frameprobs = 0;
2146           cpi->frame_new_probs[cpi->num_frame_recode]
2147               .tx_type_probs[update_type][i][j] = new_prob;
2148         }
2149         if (update_txtype_frameprobs) {
2150           int prob =
2151               (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2152           left -= prob;
2153           if (j == 0) prob += left;
2154           frame_probs->tx_type_probs[update_type][i][j] = prob;
2155         }
2156       }
2157     }
2158   }
2159 
2160   if (cm->seg.enabled) {
2161     cm->seg.temporal_update = 1;
2162     if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2163       cm->seg.temporal_update = 0;
2164   }
2165 
2166   if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2167       cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2168     const FRAME_UPDATE_TYPE update_type =
2169         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2170 
2171     for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2172       int sum = 0;
2173       int update_obmc_frameprobs = 1;
2174       for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2175 
2176       const int new_prob =
2177           sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2178 #if CONFIG_FPMT_TEST
2179       if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2180         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2181           temp_frame_probs_simulation->obmc_probs[update_type][i] =
2182               (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2183                new_prob) >>
2184               1;
2185           // Copy temp_frame_probs_simulation to temp_frame_probs
2186           for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2187                update_type_idx++) {
2188             temp_frame_probs->obmc_probs[update_type_idx][i] =
2189                 temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2190           }
2191         }
2192         update_obmc_frameprobs = 0;
2193       }
2194 #endif  // CONFIG_FPMT_TEST
2195       // Track the frame probabilities of parallel encode frames to update
2196       // during postencode stage.
2197       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2198         update_obmc_frameprobs = 0;
2199         cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2200             new_prob;
2201       }
2202       if (update_obmc_frameprobs) {
2203         frame_probs->obmc_probs[update_type][i] =
2204             (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2205       }
2206     }
2207   }
2208 
2209   if (features->allow_warped_motion &&
2210       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2211     const FRAME_UPDATE_TYPE update_type =
2212         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2213     int update_warp_frameprobs = 1;
2214     int sum = 0;
2215     for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2216     const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2217 #if CONFIG_FPMT_TEST
2218     if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2219       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2220         temp_frame_probs_simulation->warped_probs[update_type] =
2221             (temp_frame_probs_simulation->warped_probs[update_type] +
2222              new_prob) >>
2223             1;
2224         // Copy temp_frame_probs_simulation to temp_frame_probs
2225         for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2226              update_type_idx++) {
2227           temp_frame_probs->warped_probs[update_type_idx] =
2228               temp_frame_probs_simulation->warped_probs[update_type_idx];
2229         }
2230       }
2231       update_warp_frameprobs = 0;
2232     }
2233 #endif  // CONFIG_FPMT_TEST
2234     // Track the frame probabilities of parallel encode frames to update
2235     // during postencode stage.
2236     if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2237       update_warp_frameprobs = 0;
2238       cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2239           new_prob;
2240     }
2241     if (update_warp_frameprobs) {
2242       frame_probs->warped_probs[update_type] =
2243           (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2244     }
2245   }
2246 
2247   if (cm->current_frame.frame_type != KEY_FRAME &&
2248       cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2249       features->interp_filter == SWITCHABLE) {
2250     const FRAME_UPDATE_TYPE update_type =
2251         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2252 
2253     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2254       int sum = 0;
2255       int j;
2256       int left = 1536;
2257 
2258       for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2259         sum += cpi->td.counts->switchable_interp[i][j];
2260       }
2261 
2262       for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2263         int update_interpfilter_frameprobs = 1;
2264         const int new_prob =
2265             sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2266                 : (j ? 0 : 1536);
2267 #if CONFIG_FPMT_TEST
2268         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2269           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2270               0) {
2271             int prob = (temp_frame_probs_simulation
2272                             ->switchable_interp_probs[update_type][i][j] +
2273                         new_prob) >>
2274                        1;
2275             left -= prob;
2276             if (j == 0) prob += left;
2277             temp_frame_probs_simulation
2278                 ->switchable_interp_probs[update_type][i][j] = prob;
2279             // Copy temp_frame_probs_simulation to temp_frame_probs
2280             for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2281                  update_type_idx++) {
2282               temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2283                   temp_frame_probs_simulation
2284                       ->switchable_interp_probs[update_type_idx][i][j];
2285             }
2286           }
2287           update_interpfilter_frameprobs = 0;
2288         }
2289 #endif  // CONFIG_FPMT_TEST
2290         // Track the frame probabilities of parallel encode frames to update
2291         // during postencode stage.
2292         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2293           update_interpfilter_frameprobs = 0;
2294           cpi->frame_new_probs[cpi->num_frame_recode]
2295               .switchable_interp_probs[update_type][i][j] = new_prob;
2296         }
2297         if (update_interpfilter_frameprobs) {
2298           int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2299                       new_prob) >>
2300                      1;
2301           left -= prob;
2302           if (j == 0) prob += left;
2303           frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2304         }
2305       }
2306     }
2307   }
2308   if (hash_table_created) {
2309     av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2310   }
2311 }
2312 
2313 /*!\brief Setup reference frame buffers and encode a frame
2314  *
2315  * \ingroup high_level_algo
2316  * \callgraph
2317  * \callergraph
2318  *
2319  * \param[in]    cpi    Top-level encoder structure
2320  */
av1_encode_frame(AV1_COMP * cpi)2321 void av1_encode_frame(AV1_COMP *cpi) {
2322   AV1_COMMON *const cm = &cpi->common;
2323   CurrentFrame *const current_frame = &cm->current_frame;
2324   FeatureFlags *const features = &cm->features;
2325   RD_COUNTS *const rdc = &cpi->td.rd_counts;
2326   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2327   // Indicates whether or not to use a default reduced set for ext-tx
2328   // rather than the potential full set of 16 transforms
2329   features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2330 
2331   // Make sure segment_id is no larger than last_active_segid.
2332   if (cm->seg.enabled && cm->seg.update_map) {
2333     const int mi_rows = cm->mi_params.mi_rows;
2334     const int mi_cols = cm->mi_params.mi_cols;
2335     const int last_active_segid = cm->seg.last_active_segid;
2336     uint8_t *map = cpi->enc_seg.map;
2337     for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2338       for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2339         map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2340       }
2341       map += mi_cols;
2342     }
2343   }
2344 
2345   av1_setup_frame_buf_refs(cm);
2346   enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2347                          cm->cur_frame->ref_display_order_hint,
2348                          cm->current_frame.display_order_hint);
2349   set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2350                      cpi->ref_frame_flags);
2351   av1_setup_frame_sign_bias(cm);
2352 
2353   // If global motion is enabled, then every buffer which is used as either
2354   // a source or a ref frame should have an image pyramid allocated.
2355   // Check here so that issues can be caught early in debug mode
2356 #if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2357   if (cpi->alloc_pyramid) {
2358     assert(cpi->source->y_pyramid);
2359     for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2360       const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2361       if (buf != NULL) {
2362         assert(buf->buf.y_pyramid);
2363       }
2364     }
2365   }
2366 #endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2367 
2368 #if CONFIG_MISMATCH_DEBUG
2369   mismatch_reset_frame(av1_num_planes(cm));
2370 #endif
2371 
2372   rdc->newmv_or_intra_blocks = 0;
2373   cpi->palette_pixel_num = 0;
2374 
2375   if (cpi->sf.hl_sf.frame_parameter_update ||
2376       cpi->sf.rt_sf.use_comp_ref_nonrd) {
2377     if (frame_is_intra_only(cm))
2378       current_frame->reference_mode = SINGLE_REFERENCE;
2379     else
2380       current_frame->reference_mode = REFERENCE_MODE_SELECT;
2381 
2382     features->interp_filter = SWITCHABLE;
2383     if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2384 
2385     features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2386         features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2387 
2388     rdc->compound_ref_used_flag = 0;
2389     rdc->skip_mode_used_flag = 0;
2390 
2391     encode_frame_internal(cpi);
2392 
2393     if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2394       // Use a flag that includes 4x4 blocks
2395       if (rdc->compound_ref_used_flag == 0) {
2396         current_frame->reference_mode = SINGLE_REFERENCE;
2397 #if CONFIG_ENTROPY_STATS
2398         av1_zero(cpi->td.counts->comp_inter);
2399 #endif  // CONFIG_ENTROPY_STATS
2400       }
2401     }
2402     // Re-check on the skip mode status as reference mode may have been
2403     // changed.
2404     SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2405     if (frame_is_intra_only(cm) ||
2406         current_frame->reference_mode == SINGLE_REFERENCE) {
2407       skip_mode_info->skip_mode_allowed = 0;
2408       skip_mode_info->skip_mode_flag = 0;
2409     }
2410     if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2411       skip_mode_info->skip_mode_flag = 0;
2412 
2413     if (!cm->tiles.large_scale) {
2414       if (features->tx_mode == TX_MODE_SELECT &&
2415           cpi->td.mb.txfm_search_info.txb_split_count == 0)
2416         features->tx_mode = TX_MODE_LARGEST;
2417     }
2418   } else {
2419     // This is needed if real-time speed setting is changed on the fly
2420     // from one using compound prediction to one using single reference.
2421     if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2422       current_frame->reference_mode = SINGLE_REFERENCE;
2423     encode_frame_internal(cpi);
2424   }
2425 }
2426