1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_util/aom_pthread.h"
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif // CONFIG_MISMATCH_DEBUG
30
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/common_data.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46
47 #include "av1/encoder/allintra_vis.h"
48 #include "av1/encoder/aq_complexity.h"
49 #include "av1/encoder/aq_cyclicrefresh.h"
50 #include "av1/encoder/aq_variance.h"
51 #include "av1/encoder/global_motion_facade.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodeframe_utils.h"
54 #include "av1/encoder/encodemb.h"
55 #include "av1/encoder/encodemv.h"
56 #include "av1/encoder/encodetxb.h"
57 #include "av1/encoder/ethread.h"
58 #include "av1/encoder/extend.h"
59 #include "av1/encoder/intra_mode_search_utils.h"
60 #include "av1/encoder/ml.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/partition_strategy.h"
63 #if !CONFIG_REALTIME_ONLY
64 #include "av1/encoder/partition_model_weights.h"
65 #endif
66 #include "av1/encoder/partition_search.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/segmentation.h"
71 #include "av1/encoder/tokenize.h"
72 #include "av1/encoder/tpl_model.h"
73 #include "av1/encoder/var_based_part.h"
74
75 #if CONFIG_TUNE_VMAF
76 #include "av1/encoder/tune_vmaf.h"
77 #endif
78
79 /*!\cond */
80 // This is used as a reference when computing the source variance for the
81 // purposes of activity masking.
82 // Eventually this should be replaced by custom no-reference routines,
83 // which will be faster.
84 static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
85 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128
94 };
95
96 #if CONFIG_AV1_HIGHBITDEPTH
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106 128, 128, 128, 128, 128, 128, 128, 128
107 };
108
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147 128 * 16, 128 * 16
148 };
149 #endif // CONFIG_AV1_HIGHBITDEPTH
150 /*!\endcond */
151
152 // For the given bit depth, returns a constant array used to assist the
153 // calculation of source block variance, which will then be used to decide
154 // adaptive quantizers.
get_var_offs(int use_hbd,int bd)155 static const uint8_t *get_var_offs(int use_hbd, int bd) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157 if (use_hbd) {
158 assert(bd == 8 || bd == 10 || bd == 12);
159 const int off_index = (bd - 8) >> 1;
160 static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
161 AV1_HIGH_VAR_OFFS_10,
162 AV1_HIGH_VAR_OFFS_12 };
163 return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
164 }
165 #else
166 (void)use_hbd;
167 (void)bd;
168 assert(!use_hbd);
169 #endif
170 assert(bd == 8);
171 return AV1_VAR_OFFS;
172 }
173
av1_init_rtc_counters(MACROBLOCK * const x)174 void av1_init_rtc_counters(MACROBLOCK *const x) {
175 av1_init_cyclic_refresh_counters(x);
176 x->cnt_zeromv = 0;
177 }
178
av1_accumulate_rtc_counters(AV1_COMP * cpi,const MACROBLOCK * const x)179 void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
180 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
181 av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
182 cpi->rc.cnt_zeromv += x->cnt_zeromv;
183 }
184
av1_get_perpixel_variance(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane,int use_hbd)185 unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
186 const MACROBLOCKD *xd,
187 const struct buf_2d *ref,
188 BLOCK_SIZE bsize, int plane,
189 int use_hbd) {
190 const int subsampling_x = xd->plane[plane].subsampling_x;
191 const int subsampling_y = xd->plane[plane].subsampling_y;
192 const BLOCK_SIZE plane_bsize =
193 get_plane_block_size(bsize, subsampling_x, subsampling_y);
194 unsigned int sse;
195 const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
196 ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
197 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
198 }
199
av1_get_perpixel_variance_facade(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane)200 unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
201 const MACROBLOCKD *xd,
202 const struct buf_2d *ref,
203 BLOCK_SIZE bsize, int plane) {
204 const int use_hbd = is_cur_buf_hbd(xd);
205 return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
206 }
207
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)208 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
209 int mi_row, int mi_col, const int num_planes,
210 BLOCK_SIZE bsize) {
211 // Set current frame pointer.
212 x->e_mbd.cur_buf = src;
213
214 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
215 // the static analysis warnings.
216 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
217 const int is_uv = i > 0;
218 setup_pred_plane(
219 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
220 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
221 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
222 }
223 }
224
225 #if !CONFIG_REALTIME_ONLY
226 /*!\brief Assigns different quantization parameters to each super
227 * block based on its TPL weight.
228 *
229 * \ingroup tpl_modelling
230 *
231 * \param[in] cpi Top level encoder instance structure
232 * \param[in,out] td Thread data structure
233 * \param[in,out] x Macro block level data for this block.
234 * \param[in] tile_info Tile infromation / identification
235 * \param[in] mi_row Block row (in "MI_SIZE" units) index
236 * \param[in] mi_col Block column (in "MI_SIZE" units) index
237 * \param[out] num_planes Number of image planes (e.g. Y,U,V)
238 *
239 * \remark No return value but updates macroblock and thread data
240 * related to the q / q delta to be used.
241 */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)242 static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
243 MACROBLOCK *const x,
244 const TileInfo *const tile_info, int mi_row,
245 int mi_col, int num_planes) {
246 AV1_COMMON *const cm = &cpi->common;
247 const CommonModeInfoParams *const mi_params = &cm->mi_params;
248 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
249 assert(delta_q_info->delta_q_present_flag);
250
251 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
252 // Delta-q modulation based on variance
253 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254
255 const int delta_q_res = delta_q_info->delta_q_res;
256 int current_qindex = cm->quant_params.base_qindex;
257 if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
258 DUCKY_ENCODE_FRAME_MODE_QINDEX) {
259 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
260 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
261 const int sb_cols =
262 CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
263 const int sb_index = sb_row * sb_cols + sb_col;
264 current_qindex =
265 cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
266 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
267 if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
268 const int block_wavelet_energy_level =
269 av1_block_wavelet_energy_level(cpi, x, sb_size);
270 x->sb_energy_level = block_wavelet_energy_level;
271 current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
272 cpi, block_wavelet_energy_level);
273 } else {
274 const int block_var_level = av1_log_block_var(cpi, x, sb_size);
275 x->sb_energy_level = block_var_level;
276 current_qindex =
277 av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
278 }
279 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
280 cpi->oxcf.algo_cfg.enable_tpl_model) {
281 // Setup deltaq based on tpl stats
282 current_qindex =
283 av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
284 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
285 current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
286 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
287 current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
288 } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
289 current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
290 }
291
292 x->rdmult_cur_qindex = current_qindex;
293 MACROBLOCKD *const xd = &x->e_mbd;
294 const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
295 delta_q_res, xd->current_base_qindex, current_qindex);
296 if (cpi->use_ducky_encode) {
297 assert(adjusted_qindex == current_qindex);
298 }
299 current_qindex = adjusted_qindex;
300
301 x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
302 x->rdmult_delta_qindex = x->delta_qindex;
303
304 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
305 xd->mi[0]->current_qindex = current_qindex;
306 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
307
308 // keep track of any non-zero delta-q used
309 td->deltaq_used |= (x->delta_qindex != 0);
310
311 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
312 const int delta_lf_res = delta_q_info->delta_lf_res;
313 const int lfmask = ~(delta_lf_res - 1);
314 const int delta_lf_from_base =
315 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
316 const int8_t delta_lf =
317 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
318 const int frame_lf_count =
319 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
320 const int mib_size = cm->seq_params->mib_size;
321
322 // pre-set the delta lf for loop filter. Note that this value is set
323 // before mi is assigned for each block in current superblock
324 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
325 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
326 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
327 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
328 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
329 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
330 }
331 }
332 }
333 }
334 }
335
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)336 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
337 int mi_col) {
338 const AV1_COMMON *cm = &cpi->common;
339 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
340 const CommonModeInfoParams *const mi_params = &cm->mi_params;
341 MACROBLOCK *x = &td->mb;
342 const int frame_idx = cpi->gf_frame_index;
343 TplParams *const tpl_data = &cpi->ppi->tpl_data;
344 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
345
346 av1_zero(x->tpl_keep_ref_frame);
347
348 if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
349 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
350 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
351
352 const int is_overlay =
353 cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
354 if (is_overlay) {
355 memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
356 return;
357 }
358
359 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
360 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
361 const int tpl_stride = tpl_frame->stride;
362 int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
363 const int step = 1 << block_mis_log2;
364 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
365
366 const int mi_row_end =
367 AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
368 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
369 const int mi_col_sr =
370 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
371 const int mi_col_end_sr =
372 AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
373 cm->superres_scale_denominator),
374 mi_cols_sr);
375 const int row_step = step;
376 const int col_step_sr =
377 coded_to_superres_mi(step, cm->superres_scale_denominator);
378 for (int row = mi_row; row < mi_row_end; row += row_step) {
379 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
380 const TplDepStats *this_stats =
381 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
382 int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
383 // Find the winner ref frame idx for the current block
384 int64_t best_inter_cost = this_stats->pred_error[0];
385 int best_rf_idx = 0;
386 for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
387 if ((this_stats->pred_error[idx] < best_inter_cost) &&
388 (this_stats->pred_error[idx] != 0)) {
389 best_inter_cost = this_stats->pred_error[idx];
390 best_rf_idx = idx;
391 }
392 }
393 // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
394 // LAST_FRAME.
395 tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
396 this_stats->pred_error[LAST_FRAME - 1];
397
398 for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
399 inter_cost[rf_idx] += tpl_pred_error[rf_idx];
400 }
401 }
402
403 int rank_index[INTER_REFS_PER_FRAME - 1];
404 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
405 rank_index[idx] = idx + 1;
406 for (int i = idx; i > 0; --i) {
407 if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
408 const int tmp = rank_index[i - 1];
409 rank_index[i - 1] = rank_index[i];
410 rank_index[i] = tmp;
411 }
412 }
413 }
414
415 x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
416 x->tpl_keep_ref_frame[LAST_FRAME] = 1;
417
418 int cutoff_ref = 0;
419 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
420 x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
421 if (idx > 2) {
422 if (!cutoff_ref) {
423 // If the predictive coding gains are smaller than the previous more
424 // relevant frame over certain amount, discard this frame and all the
425 // frames afterwards.
426 if (llabs(inter_cost[rank_index[idx]]) <
427 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
428 inter_cost[rank_index[idx]] == 0)
429 cutoff_ref = 1;
430 }
431
432 if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
433 }
434 }
435 }
436
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)437 static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
438 int mi_row, int mi_col) {
439 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
440 const int orig_rdmult = cpi->rd.RDMULT;
441
442 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
443 cpi->gf_frame_index < cpi->ppi->gf_group.size));
444 const int gf_group_index = cpi->gf_frame_index;
445 if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
446 cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
447 cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
448 const int dr =
449 av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
450 x->rdmult = dr;
451 }
452 }
453 #endif // !CONFIG_REALTIME_ONLY
454
455 #if CONFIG_RT_ML_PARTITIONING
456 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)457 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
458 MACROBLOCK *x, int mi_row, int mi_col) {
459 AV1_COMMON *const cm = &cpi->common;
460 const int is_key_frame = frame_is_intra_only(cm);
461 MACROBLOCKD *xd = &x->e_mbd;
462
463 // TODO(kyslov) Extend to 128x128
464 assert(cm->seq_params->sb_size == BLOCK_64X64);
465
466 av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
467
468 if (!is_key_frame) {
469 MB_MODE_INFO *mi = xd->mi[0];
470 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
471
472 assert(yv12 != NULL);
473
474 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
475 get_ref_scale_factors(cm, LAST_FRAME), 1);
476 mi->ref_frame[0] = LAST_FRAME;
477 mi->ref_frame[1] = NONE;
478 mi->bsize = BLOCK_64X64;
479 mi->mv[0].as_int = 0;
480 mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
481
482 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
483
484 xd->plane[0].dst.buf = x->est_pred;
485 xd->plane[0].dst.stride = 64;
486 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
487 } else {
488 #if CONFIG_AV1_HIGHBITDEPTH
489 switch (xd->bd) {
490 case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
491 case 10:
492 memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
493 break;
494 case 12:
495 memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
496 break;
497 }
498 #else
499 memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
500 #endif // CONFIG_VP9_HIGHBITDEPTH
501 }
502 }
503 #endif // CONFIG_RT_ML_PARTITIONING
504
505 #define AVG_CDF_WEIGHT_LEFT 3
506 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
507
508 /*!\brief Encode a superblock (minimal RD search involved)
509 *
510 * \ingroup partition_search
511 * Encodes the superblock by a pre-determined partition pattern, only minor
512 * rd-based searches are allowed to adjust the initial pattern. It is only used
513 * by realtime encoding.
514 */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)515 static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
516 TileDataEnc *tile_data, TokenExtra **tp,
517 const int mi_row, const int mi_col,
518 const int seg_skip) {
519 AV1_COMMON *const cm = &cpi->common;
520 MACROBLOCK *const x = &td->mb;
521 const SPEED_FEATURES *const sf = &cpi->sf;
522 const TileInfo *const tile_info = &tile_data->tile_info;
523 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
524 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
525 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
526 PC_TREE *const pc_root = td->pc_root;
527
528 #if CONFIG_RT_ML_PARTITIONING
529 if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
530 RD_STATS dummy_rdc;
531 get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
532 av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
533 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
534 return;
535 }
536 #endif
537 // Set the partition
538 if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
539 (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
540 (!frame_is_intra_only(cm) &&
541 (!cpi->ppi->use_svc ||
542 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
543 // set a fixed-size partition
544 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
545 BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
546 if (sf->rt_sf.use_fast_fixed_part &&
547 x->content_state_sb.source_sad_nonrd < kLowSad) {
548 bsize_select = cm->seq_params->sb_size;
549 }
550 const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
551 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
552 } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
553 // set a variance-based partition
554 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
555 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
556 }
557 assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
558 sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
559 set_cb_offsets(td->mb.cb_offset, 0, 0);
560
561 // Initialize the flag to skip cdef to 1.
562 if (sf->rt_sf.skip_cdef_sb) {
563 const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
564 // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
565 // "blocks".
566 for (int r = 0; r < block64_in_sb; ++r) {
567 for (int c = 0; c < block64_in_sb; ++c) {
568 const int idx_in_sb =
569 r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
570 if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
571 }
572 }
573 }
574
575 #if CONFIG_COLLECT_COMPONENT_TIMING
576 start_timing(cpi, nonrd_use_partition_time);
577 #endif
578 av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
579 pc_root);
580 #if CONFIG_COLLECT_COMPONENT_TIMING
581 end_timing(cpi, nonrd_use_partition_time);
582 #endif
583 }
584
585 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)586 static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
587 const TileDataEnc *tile_data,
588 SIMPLE_MOTION_DATA_TREE *sms_root,
589 RD_STATS *rd_cost, int mi_row, int mi_col,
590 int gather_tpl_data) {
591 const AV1_COMMON *cm = &cpi->common;
592 const TileInfo *tile_info = &tile_data->tile_info;
593 MACROBLOCK *x = &td->mb;
594
595 const SPEED_FEATURES *sf = &cpi->sf;
596 const int use_simple_motion_search =
597 (sf->part_sf.simple_motion_search_split ||
598 sf->part_sf.simple_motion_search_prune_rect ||
599 sf->part_sf.simple_motion_search_early_term_none ||
600 sf->part_sf.ml_early_term_after_part_split_level) &&
601 !frame_is_intra_only(cm);
602 if (use_simple_motion_search) {
603 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
604 mi_row, mi_col);
605 }
606
607 #if !CONFIG_REALTIME_ONLY
608 if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
609 cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
610 init_ref_frame_space(cpi, td, mi_row, mi_col);
611 x->sb_energy_level = 0;
612 x->part_search_info.cnn_output_valid = 0;
613 if (gather_tpl_data) {
614 if (cm->delta_q_info.delta_q_present_flag) {
615 const int num_planes = av1_num_planes(cm);
616 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
617 setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
618 av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
619 }
620
621 // TODO(jingning): revisit this function.
622 if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
623 adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
624 }
625 }
626 }
627 #else
628 (void)tile_info;
629 (void)mi_row;
630 (void)mi_col;
631 (void)gather_tpl_data;
632 #endif
633
634 x->reuse_inter_pred = false;
635 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
636 reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
637 av1_zero(x->picked_ref_frames_mask);
638 av1_invalid_rd_stats(rd_cost);
639 }
640
641 #if !CONFIG_REALTIME_ONLY
sb_qp_sweep_init_quantizers(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_tree,RD_STATS * rd_cost,int mi_row,int mi_col,int delta_qp_ofs)642 static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
643 const TileDataEnc *tile_data,
644 SIMPLE_MOTION_DATA_TREE *sms_tree,
645 RD_STATS *rd_cost, int mi_row,
646 int mi_col, int delta_qp_ofs) {
647 AV1_COMMON *const cm = &cpi->common;
648 MACROBLOCK *const x = &td->mb;
649 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
650 const TileInfo *tile_info = &tile_data->tile_info;
651 const CommonModeInfoParams *const mi_params = &cm->mi_params;
652 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
653 assert(delta_q_info->delta_q_present_flag);
654 const int delta_q_res = delta_q_info->delta_q_res;
655
656 const SPEED_FEATURES *sf = &cpi->sf;
657 const int use_simple_motion_search =
658 (sf->part_sf.simple_motion_search_split ||
659 sf->part_sf.simple_motion_search_prune_rect ||
660 sf->part_sf.simple_motion_search_early_term_none ||
661 sf->part_sf.ml_early_term_after_part_split_level) &&
662 !frame_is_intra_only(cm);
663 if (use_simple_motion_search) {
664 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
665 mi_row, mi_col);
666 }
667
668 int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
669
670 MACROBLOCKD *const xd = &x->e_mbd;
671 current_qindex = av1_adjust_q_from_delta_q_res(
672 delta_q_res, xd->current_base_qindex, current_qindex);
673
674 x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
675
676 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
677 xd->mi[0]->current_qindex = current_qindex;
678 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
679
680 // keep track of any non-zero delta-q used
681 td->deltaq_used |= (x->delta_qindex != 0);
682
683 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
684 const int delta_lf_res = delta_q_info->delta_lf_res;
685 const int lfmask = ~(delta_lf_res - 1);
686 const int delta_lf_from_base =
687 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
688 const int8_t delta_lf =
689 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
690 const int frame_lf_count =
691 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
692 const int mib_size = cm->seq_params->mib_size;
693
694 // pre-set the delta lf for loop filter. Note that this value is set
695 // before mi is assigned for each block in current superblock
696 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
697 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
698 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
699 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
700 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
701 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
702 }
703 }
704 }
705 }
706
707 x->reuse_inter_pred = false;
708 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
709 reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
710 av1_zero(x->picked_ref_frames_mask);
711 av1_invalid_rd_stats(rd_cost);
712 }
713
sb_qp_sweep(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,SIMPLE_MOTION_DATA_TREE * sms_tree,SB_FIRST_PASS_STATS * sb_org_stats)714 static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
715 TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
716 int mi_col, BLOCK_SIZE bsize,
717 SIMPLE_MOTION_DATA_TREE *sms_tree,
718 SB_FIRST_PASS_STATS *sb_org_stats) {
719 AV1_COMMON *const cm = &cpi->common;
720 MACROBLOCK *const x = &td->mb;
721 RD_STATS rdc_winner, cur_rdc;
722 av1_invalid_rd_stats(&rdc_winner);
723
724 int best_qindex = td->mb.rdmult_delta_qindex;
725 const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
726 const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
727 const int step = cm->delta_q_info.delta_q_res;
728
729 for (int sweep_qp_delta = start; sweep_qp_delta <= end;
730 sweep_qp_delta += step) {
731 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
732 mi_col, sweep_qp_delta);
733
734 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
735 const int backup_current_qindex =
736 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
737
738 av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
739 av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
740 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
741
742 td->pc_root = av1_alloc_pc_tree_node(bsize);
743 if (!td->pc_root)
744 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
745 "Failed to allocate PC_TREE");
746 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
747 &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
748 SB_DRY_PASS, NULL);
749
750 if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
751 (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
752 rdc_winner.rdcost == cur_rdc.rdcost)) {
753 rdc_winner = cur_rdc;
754 best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
755 }
756 }
757
758 return best_qindex;
759 }
760 #endif //! CONFIG_REALTIME_ONLY
761
762 /*!\brief Encode a superblock (RD-search-based)
763 *
764 * \ingroup partition_search
765 * Conducts partition search for a superblock, based on rate-distortion costs,
766 * from scratch or adjusting from a pre-calculated partition pattern.
767 */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)768 static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
769 TileDataEnc *tile_data, TokenExtra **tp,
770 const int mi_row, const int mi_col,
771 const int seg_skip) {
772 AV1_COMMON *const cm = &cpi->common;
773 MACROBLOCK *const x = &td->mb;
774 MACROBLOCKD *const xd = &x->e_mbd;
775 const SPEED_FEATURES *const sf = &cpi->sf;
776 const TileInfo *const tile_info = &tile_data->tile_info;
777 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
778 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
779 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
780 const int num_planes = av1_num_planes(cm);
781 int dummy_rate;
782 int64_t dummy_dist;
783 RD_STATS dummy_rdc;
784 SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
785
786 #if CONFIG_REALTIME_ONLY
787 (void)seg_skip;
788 #endif // CONFIG_REALTIME_ONLY
789
790 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
791 1);
792
793 // Encode the superblock
794 if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
795 // partition search starting from a variance-based partition
796 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
797 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
798
799 #if CONFIG_COLLECT_COMPONENT_TIMING
800 start_timing(cpi, rd_use_partition_time);
801 #endif
802 td->pc_root = av1_alloc_pc_tree_node(sb_size);
803 if (!td->pc_root)
804 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
805 "Failed to allocate PC_TREE");
806 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
807 &dummy_rate, &dummy_dist, 1, td->pc_root);
808 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
809 sf->part_sf.partition_search_type);
810 td->pc_root = NULL;
811 #if CONFIG_COLLECT_COMPONENT_TIMING
812 end_timing(cpi, rd_use_partition_time);
813 #endif
814 }
815 #if !CONFIG_REALTIME_ONLY
816 else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
817 // partition search by adjusting a fixed-size partition
818 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
819 const BLOCK_SIZE bsize =
820 seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
821 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
822 td->pc_root = av1_alloc_pc_tree_node(sb_size);
823 if (!td->pc_root)
824 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
825 "Failed to allocate PC_TREE");
826 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
827 &dummy_rate, &dummy_dist, 1, td->pc_root);
828 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
829 sf->part_sf.partition_search_type);
830 td->pc_root = NULL;
831 } else {
832 // The most exhaustive recursive partition search
833 SuperBlockEnc *sb_enc = &x->sb_enc;
834 // No stats for overlay frames. Exclude key frame.
835 av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
836
837 // Reset the tree for simple motion search data
838 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
839
840 #if CONFIG_COLLECT_COMPONENT_TIMING
841 start_timing(cpi, rd_pick_partition_time);
842 #endif
843
844 // Estimate the maximum square partition block size, which will be used
845 // as the starting block size for partitioning the sb
846 set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
847
848 // The superblock can be searched only once, or twice consecutively for
849 // better quality. Note that the meaning of passes here is different from
850 // the general concept of 1-pass/2-pass encoders.
851 const int num_passes =
852 cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
853
854 if (cpi->oxcf.sb_qp_sweep &&
855 !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
856 cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
857 cm->delta_q_info.delta_q_present_flag) {
858 AOM_CHECK_MEM_ERROR(
859 x->e_mbd.error_info, td->mb.sb_stats_cache,
860 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
861 av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
862 mi_col);
863 assert(x->rdmult_delta_qindex == x->delta_qindex);
864
865 const int best_qp_diff =
866 sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
867 td->mb.sb_stats_cache) -
868 x->rdmult_delta_qindex;
869
870 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
871 mi_row, mi_col, best_qp_diff);
872
873 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
874 const int backup_current_qindex =
875 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
876
877 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
878 av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
879 mi_col);
880
881 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
882 backup_current_qindex;
883 aom_free(td->mb.sb_stats_cache);
884 td->mb.sb_stats_cache = NULL;
885 }
886 if (num_passes == 1) {
887 #if CONFIG_PARTITION_SEARCH_ORDER
888 if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
889 av1_reset_part_sf(&cpi->sf.part_sf);
890 av1_reset_sf_for_ext_part(cpi);
891 RD_STATS this_rdc;
892 av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
893 mi_col, sb_size, &this_rdc);
894 } else {
895 td->pc_root = av1_alloc_pc_tree_node(sb_size);
896 if (!td->pc_root)
897 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
898 "Failed to allocate PC_TREE");
899 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
900 &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
901 NULL, SB_SINGLE_PASS, NULL);
902 }
903 #else
904 td->pc_root = av1_alloc_pc_tree_node(sb_size);
905 if (!td->pc_root)
906 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
907 "Failed to allocate PC_TREE");
908 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
909 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
910 SB_SINGLE_PASS, NULL);
911 #endif // CONFIG_PARTITION_SEARCH_ORDER
912 } else {
913 // First pass
914 AOM_CHECK_MEM_ERROR(
915 x->e_mbd.error_info, td->mb.sb_fp_stats,
916 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
917 av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
918 mi_col);
919 td->pc_root = av1_alloc_pc_tree_node(sb_size);
920 if (!td->pc_root)
921 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
922 "Failed to allocate PC_TREE");
923 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
924 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
925 SB_DRY_PASS, NULL);
926
927 // Second pass
928 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
929 mi_col, 0);
930 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
931 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
932
933 av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
934 mi_col);
935
936 td->pc_root = av1_alloc_pc_tree_node(sb_size);
937 if (!td->pc_root)
938 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
939 "Failed to allocate PC_TREE");
940 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
941 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
942 SB_WET_PASS, NULL);
943 aom_free(td->mb.sb_fp_stats);
944 td->mb.sb_fp_stats = NULL;
945 }
946
947 // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
948 sb_enc->tpl_data_count = 0;
949 #if CONFIG_COLLECT_COMPONENT_TIMING
950 end_timing(cpi, rd_pick_partition_time);
951 #endif
952 }
953 #endif // !CONFIG_REALTIME_ONLY
954
955 // Update the inter rd model
956 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
957 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
958 cm->tiles.cols == 1 && cm->tiles.rows == 1) {
959 av1_inter_mode_data_fit(tile_data, x->rdmult);
960 }
961 }
962
963 // Check if the cost update of symbols mode, coeff and dv are tile or off.
is_mode_coeff_dv_upd_freq_tile_or_off(const AV1_COMP * const cpi)964 static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
965 const AV1_COMP *const cpi) {
966 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
967
968 return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
969 inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
970 cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
971 }
972
973 // When row-mt is enabled and cost update frequencies are set to off/tile,
974 // processing of current SB can start even before processing of top-right SB
975 // is finished. This function checks if it is sufficient to wait for top SB
976 // to finish processing before current SB starts processing.
delay_wait_for_top_right_sb(const AV1_COMP * const cpi)977 static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
978 const MODE mode = cpi->oxcf.mode;
979 if (mode == GOOD) return 0;
980
981 if (mode == ALLINTRA)
982 return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
983 else if (mode == REALTIME)
984 return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
985 cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
986 else
987 return 0;
988 }
989
990 /*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
991 *
992 * \ingroup partition_search
993 * \callgraph
994 * \callergraph
995 */
get_sb_source_sad(const AV1_COMP * cpi,int mi_row,int mi_col)996 static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
997 int mi_col) {
998 if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
999
1000 const AV1_COMMON *const cm = &cpi->common;
1001 const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1002 ? (cm->seq_params->mib_size >> 1)
1003 : cm->seq_params->mib_size;
1004 const int num_blk_64x64_cols =
1005 (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1006 const int num_blk_64x64_rows =
1007 (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1008 const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1009 const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1010 uint64_t curr_sb_sad = UINT64_MAX;
1011 // Avoid the border as sad_blk_64x64 may not be set for the border
1012 // in the scene detection.
1013 if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1014 (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1015 return curr_sb_sad;
1016 }
1017 const uint64_t *const src_sad_blk_64x64_data =
1018 &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1019 blk_64x64_row_index * num_blk_64x64_cols];
1020 if (cm->seq_params->sb_size == BLOCK_128X128) {
1021 // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1022 // superblock
1023 curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1024 src_sad_blk_64x64_data[num_blk_64x64_cols] +
1025 src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1026 } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1027 curr_sb_sad = src_sad_blk_64x64_data[0];
1028 }
1029 return curr_sb_sad;
1030 }
1031
1032 /*!\brief Determine whether grading content can be skipped based on sad stat
1033 *
1034 * \ingroup partition_search
1035 * \callgraph
1036 * \callergraph
1037 */
is_calc_src_content_needed(AV1_COMP * cpi,MACROBLOCK * const x,int mi_row,int mi_col)1038 static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1039 MACROBLOCK *const x, int mi_row,
1040 int mi_col) {
1041 if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1042 return true;
1043 const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1044 if (curr_sb_sad == UINT64_MAX) return true;
1045 if (curr_sb_sad == 0) {
1046 x->content_state_sb.source_sad_nonrd = kZeroSad;
1047 return false;
1048 }
1049 AV1_COMMON *const cm = &cpi->common;
1050 bool do_calc_src_content = true;
1051
1052 if (cpi->oxcf.speed < 9) return do_calc_src_content;
1053
1054 // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1055 if (AOMMIN(cm->width, cm->height) < 360) {
1056 // Derive Average 64x64 block source SAD from SB source SAD
1057 const uint64_t avg_64x64_blk_sad =
1058 (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1059 : curr_sb_sad;
1060
1061 // The threshold is determined based on kLowSad and kHighSad threshold and
1062 // test results.
1063 uint64_t thresh_low = 15000;
1064 uint64_t thresh_high = 40000;
1065
1066 if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1067 thresh_low = thresh_low << 1;
1068 thresh_high = thresh_high << 1;
1069 }
1070
1071 if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1072 do_calc_src_content = false;
1073 // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1074 // to RTC rd path.
1075 x->content_state_sb.source_sad_nonrd = kMedSad;
1076 }
1077 }
1078
1079 return do_calc_src_content;
1080 }
1081
1082 /*!\brief Determine whether grading content is needed based on sf and frame stat
1083 *
1084 * \ingroup partition_search
1085 * \callgraph
1086 * \callergraph
1087 */
1088 // TODO(any): consolidate sfs to make interface cleaner
grade_source_content_sb(AV1_COMP * cpi,MACROBLOCK * const x,TileDataEnc * tile_data,int mi_row,int mi_col)1089 static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1090 TileDataEnc *tile_data, int mi_row,
1091 int mi_col) {
1092 AV1_COMMON *const cm = &cpi->common;
1093 if (cm->current_frame.frame_type == KEY_FRAME ||
1094 (cpi->ppi->use_svc &&
1095 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1096 assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1097 assert(x->content_state_sb.source_sad_rd == kMedSad);
1098 return;
1099 }
1100 bool calc_src_content = false;
1101
1102 if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1103 if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1104 calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1105 } else {
1106 x->content_state_sb.source_sad_nonrd = kZeroSad;
1107 }
1108 } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1109 (cm->width * cm->height <= 352 * 288)) {
1110 if (cpi->rc.frame_source_sad > 0)
1111 calc_src_content = true;
1112 else
1113 x->content_state_sb.source_sad_rd = kZeroSad;
1114 }
1115 if (calc_src_content)
1116 av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1117 }
1118
1119 /*!\brief Encode a superblock row by breaking it into superblocks
1120 *
1121 * \ingroup partition_search
1122 * \callgraph
1123 * \callergraph
1124 * Do partition and mode search for an sb row: one row of superblocks filling up
1125 * the width of the current tile.
1126 */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)1127 static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1128 TileDataEnc *tile_data, int mi_row,
1129 TokenExtra **tp) {
1130 AV1_COMMON *const cm = &cpi->common;
1131 const TileInfo *const tile_info = &tile_data->tile_info;
1132 MultiThreadInfo *const mt_info = &cpi->mt_info;
1133 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1134 AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1135 bool row_mt_enabled = mt_info->row_mt_enabled;
1136 MACROBLOCK *const x = &td->mb;
1137 MACROBLOCKD *const xd = &x->e_mbd;
1138 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1139 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1140 const int mib_size = cm->seq_params->mib_size;
1141 const int mib_size_log2 = cm->seq_params->mib_size_log2;
1142 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1143 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1144
1145 #if CONFIG_COLLECT_COMPONENT_TIMING
1146 start_timing(cpi, encode_sb_row_time);
1147 #endif
1148
1149 // Initialize the left context for the new SB row
1150 av1_zero_left_context(xd);
1151
1152 // Reset delta for quantizer and loof filters at the beginning of every tile
1153 if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1154 if (cm->delta_q_info.delta_q_present_flag)
1155 xd->current_base_qindex = cm->quant_params.base_qindex;
1156 if (cm->delta_q_info.delta_lf_present_flag) {
1157 av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1158 }
1159 }
1160
1161 reset_thresh_freq_fact(x);
1162
1163 // Code each SB in the row
1164 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1165 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1166 // In realtime/allintra mode and when frequency of cost updates is off/tile,
1167 // wait for the top superblock to finish encoding. Otherwise, wait for the
1168 // top-right superblock to finish encoding.
1169 enc_row_mt->sync_read_ptr(
1170 row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1171
1172 #if CONFIG_MULTITHREAD
1173 if (row_mt_enabled) {
1174 pthread_mutex_lock(enc_row_mt->mutex_);
1175 const bool row_mt_exit = enc_row_mt->row_mt_exit;
1176 pthread_mutex_unlock(enc_row_mt->mutex_);
1177 // Exit in case any worker has encountered an error.
1178 if (row_mt_exit) return;
1179 }
1180 #endif
1181
1182 const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1183 if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1184 if ((tile_info->mi_col_start == mi_col)) {
1185 // restore frame context at the 1st column sb
1186 memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
1187 } else {
1188 // update context
1189 int wt_left = AVG_CDF_WEIGHT_LEFT;
1190 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1191 if (tile_info->mi_col_end > (mi_col + mib_size))
1192 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1193 wt_left, wt_tr);
1194 else
1195 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1196 wt_left, wt_tr);
1197 }
1198 }
1199
1200 // Update the rate cost tables for some symbols
1201 av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1202
1203 // Reset color coding related parameters
1204 av1_zero(x->color_sensitivity_sb);
1205 av1_zero(x->color_sensitivity_sb_g);
1206 av1_zero(x->color_sensitivity_sb_alt);
1207 av1_zero(x->color_sensitivity);
1208 x->content_state_sb.source_sad_nonrd = kMedSad;
1209 x->content_state_sb.source_sad_rd = kMedSad;
1210 x->content_state_sb.lighting_change = 0;
1211 x->content_state_sb.low_sumdiff = 0;
1212 x->force_zeromv_skip_for_sb = 0;
1213 x->sb_me_block = 0;
1214 x->sb_me_partition = 0;
1215 x->sb_me_mv.as_int = 0;
1216 x->sb_force_fixed_part = 1;
1217 x->color_palette_thresh = 64;
1218 x->nonrd_prune_ref_frame_search =
1219 cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1220
1221 if (cpi->oxcf.mode == ALLINTRA) {
1222 x->intra_sb_rdmult_modifier = 128;
1223 }
1224
1225 xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1226 x->source_variance = UINT_MAX;
1227 td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1228
1229 // Get segment id and skip flag
1230 const struct segmentation *const seg = &cm->seg;
1231 int seg_skip = 0;
1232 if (seg->enabled) {
1233 const uint8_t *const map =
1234 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1235 const uint8_t segment_id =
1236 map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1237 : 0;
1238 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1239 }
1240
1241 produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1242
1243 init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1244 sb_size);
1245
1246 // Grade the temporal variation of the sb, the grade will be used to decide
1247 // fast mode search strategy for coding blocks
1248 if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1249
1250 // encode the superblock
1251 if (use_nonrd_mode) {
1252 encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1253 } else {
1254 encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1255 }
1256
1257 // Update the top-right context in row_mt coding
1258 if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1259 if (sb_cols_in_tile == 1)
1260 memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
1261 else if (sb_col_in_tile >= 1)
1262 memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
1263 sizeof(*xd->tile_ctx));
1264 }
1265 enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1266 sb_cols_in_tile);
1267 }
1268
1269 #if CONFIG_COLLECT_COMPONENT_TIMING
1270 end_timing(cpi, encode_sb_row_time);
1271 #endif
1272 }
1273
init_encode_frame_mb_context(AV1_COMP * cpi)1274 static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1275 AV1_COMMON *const cm = &cpi->common;
1276 const int num_planes = av1_num_planes(cm);
1277 MACROBLOCK *const x = &cpi->td.mb;
1278 MACROBLOCKD *const xd = &x->e_mbd;
1279
1280 // Copy data over into macro block data structures.
1281 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1282 cm->seq_params->sb_size);
1283
1284 av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1285 cm->seq_params->subsampling_y, num_planes);
1286 }
1287
av1_alloc_tile_data(AV1_COMP * cpi)1288 void av1_alloc_tile_data(AV1_COMP *cpi) {
1289 AV1_COMMON *const cm = &cpi->common;
1290 AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1291 const int tile_cols = cm->tiles.cols;
1292 const int tile_rows = cm->tiles.rows;
1293
1294 av1_row_mt_mem_dealloc(cpi);
1295
1296 aom_free(cpi->tile_data);
1297 cpi->allocated_tiles = 0;
1298 enc_row_mt->allocated_tile_cols = 0;
1299 enc_row_mt->allocated_tile_rows = 0;
1300
1301 CHECK_MEM_ERROR(
1302 cm, cpi->tile_data,
1303 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1304
1305 cpi->allocated_tiles = tile_cols * tile_rows;
1306 enc_row_mt->allocated_tile_cols = tile_cols;
1307 enc_row_mt->allocated_tile_rows = tile_rows;
1308 for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1309 for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1310 const int tile_index = tile_row * tile_cols + tile_col;
1311 TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1312 av1_zero(this_tile->row_mt_sync);
1313 this_tile->row_ctx = NULL;
1314 }
1315 }
1316 }
1317
av1_init_tile_data(AV1_COMP * cpi)1318 void av1_init_tile_data(AV1_COMP *cpi) {
1319 AV1_COMMON *const cm = &cpi->common;
1320 const int num_planes = av1_num_planes(cm);
1321 const int tile_cols = cm->tiles.cols;
1322 const int tile_rows = cm->tiles.rows;
1323 int tile_col, tile_row;
1324 TokenInfo *const token_info = &cpi->token_info;
1325 TokenExtra *pre_tok = token_info->tile_tok[0][0];
1326 TokenList *tplist = token_info->tplist[0][0];
1327 unsigned int tile_tok = 0;
1328 int tplist_count = 0;
1329
1330 if (!is_stat_generation_stage(cpi) &&
1331 cm->features.allow_screen_content_tools) {
1332 // Number of tokens for which token info needs to be allocated.
1333 unsigned int tokens_required =
1334 get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1335 MAX_SB_SIZE_LOG2, num_planes);
1336 // Allocate/reallocate memory for token related info if the number of tokens
1337 // required is more than the number of tokens already allocated. This could
1338 // occur in case of the following:
1339 // 1) If the memory is not yet allocated
1340 // 2) If the frame dimensions have changed
1341 const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1342 if (realloc_tokens) {
1343 free_token_info(token_info);
1344 alloc_token_info(cm, token_info, tokens_required);
1345 pre_tok = token_info->tile_tok[0][0];
1346 tplist = token_info->tplist[0][0];
1347 }
1348 }
1349
1350 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1351 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1352 TileDataEnc *const tile_data =
1353 &cpi->tile_data[tile_row * tile_cols + tile_col];
1354 TileInfo *const tile_info = &tile_data->tile_info;
1355 av1_tile_init(tile_info, cm, tile_row, tile_col);
1356 tile_data->firstpass_top_mv = kZeroMv;
1357 tile_data->abs_sum_level = 0;
1358
1359 if (is_token_info_allocated(token_info)) {
1360 token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1361 pre_tok = token_info->tile_tok[tile_row][tile_col];
1362 tile_tok = allocated_tokens(
1363 tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1364 num_planes);
1365 token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1366 tplist = token_info->tplist[tile_row][tile_col];
1367 tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1368 }
1369 tile_data->allow_update_cdf = !cm->tiles.large_scale;
1370 tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1371 !cm->features.disable_cdf_update &&
1372 !delay_wait_for_top_right_sb(cpi);
1373 tile_data->tctx = *cm->fc;
1374 }
1375 }
1376 }
1377
1378 // Populate the start palette token info prior to encoding an SB row.
get_token_start(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra ** tp)1379 static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1380 int tile_row, int tile_col, int mi_row,
1381 TokenExtra **tp) {
1382 const TokenInfo *token_info = &cpi->token_info;
1383 if (!is_token_info_allocated(token_info)) return;
1384
1385 const AV1_COMMON *cm = &cpi->common;
1386 const int num_planes = av1_num_planes(cm);
1387 TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1388 const int sb_row_in_tile =
1389 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1390
1391 get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1392 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1393 assert(tplist != NULL);
1394 tplist[sb_row_in_tile].start = *tp;
1395 }
1396
1397 // Populate the token count after encoding an SB row.
populate_token_count(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra * tok)1398 static inline void populate_token_count(AV1_COMP *cpi,
1399 const TileInfo *tile_info, int tile_row,
1400 int tile_col, int mi_row,
1401 TokenExtra *tok) {
1402 const TokenInfo *token_info = &cpi->token_info;
1403 if (!is_token_info_allocated(token_info)) return;
1404
1405 const AV1_COMMON *cm = &cpi->common;
1406 const int num_planes = av1_num_planes(cm);
1407 TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1408 const int sb_row_in_tile =
1409 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1410 const int tile_mb_cols =
1411 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1412 const int num_mb_rows_in_sb =
1413 ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1414 tplist[sb_row_in_tile].count =
1415 (unsigned int)(tok - tplist[sb_row_in_tile].start);
1416
1417 assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1418 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1419 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1420 num_planes));
1421
1422 (void)num_planes;
1423 (void)tile_mb_cols;
1424 (void)num_mb_rows_in_sb;
1425 }
1426
1427 /*!\brief Encode a superblock row
1428 *
1429 * \ingroup partition_search
1430 */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)1431 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1432 int tile_col, int mi_row) {
1433 AV1_COMMON *const cm = &cpi->common;
1434 const int tile_cols = cm->tiles.cols;
1435 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1436 const TileInfo *const tile_info = &this_tile->tile_info;
1437 TokenExtra *tok = NULL;
1438
1439 get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1440
1441 encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1442
1443 populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1444 }
1445
1446 /*!\brief Encode a tile
1447 *
1448 * \ingroup partition_search
1449 */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)1450 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1451 int tile_col) {
1452 AV1_COMMON *const cm = &cpi->common;
1453 TileDataEnc *const this_tile =
1454 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1455 const TileInfo *const tile_info = &this_tile->tile_info;
1456
1457 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1458
1459 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1460 tile_info->mi_col_end, tile_row);
1461 av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1462 &td->mb.e_mbd);
1463
1464 #if !CONFIG_REALTIME_ONLY
1465 if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1466 cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1467 #endif
1468
1469 if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1470 av1_crc32c_calculator_init(
1471 &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1472 }
1473
1474 for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1475 mi_row += cm->seq_params->mib_size) {
1476 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1477 }
1478 this_tile->abs_sum_level = td->abs_sum_level;
1479 }
1480
1481 /*!\brief Break one frame into tiles and encode the tiles
1482 *
1483 * \ingroup partition_search
1484 *
1485 * \param[in] cpi Top-level encoder structure
1486 */
encode_tiles(AV1_COMP * cpi)1487 static inline void encode_tiles(AV1_COMP *cpi) {
1488 AV1_COMMON *const cm = &cpi->common;
1489 const int tile_cols = cm->tiles.cols;
1490 const int tile_rows = cm->tiles.rows;
1491 int tile_col, tile_row;
1492
1493 MACROBLOCK *const mb = &cpi->td.mb;
1494 assert(IMPLIES(cpi->tile_data == NULL,
1495 cpi->allocated_tiles < tile_cols * tile_rows));
1496 if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1497
1498 av1_init_tile_data(cpi);
1499 av1_alloc_mb_data(cpi, mb);
1500
1501 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1502 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1503 TileDataEnc *const this_tile =
1504 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1505 cpi->td.intrabc_used = 0;
1506 cpi->td.deltaq_used = 0;
1507 cpi->td.abs_sum_level = 0;
1508 cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1509 cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1510 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1511 cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1512 av1_init_rtc_counters(&cpi->td.mb);
1513 cpi->td.mb.palette_pixels = 0;
1514 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1515 if (!frame_is_intra_only(&cpi->common))
1516 av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1517 cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1518 cpi->intrabc_used |= cpi->td.intrabc_used;
1519 cpi->deltaq_used |= cpi->td.deltaq_used;
1520 }
1521 }
1522
1523 av1_dealloc_mb_data(mb, av1_num_planes(cm));
1524 }
1525
1526 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1527 static inline void set_rel_frame_dist(
1528 const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1529 const int ref_frame_flags) {
1530 MV_REFERENCE_FRAME ref_frame;
1531 int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1532 ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1533 ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1534 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1535 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1536 if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1537 int dist = av1_encoder_get_relative_dist(
1538 cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1539 cm->current_frame.display_order_hint);
1540 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1541 // Get the nearest ref_frame in the past
1542 if (abs(dist) < min_past_dist && dist < 0) {
1543 ref_frame_dist_info->nearest_past_ref = ref_frame;
1544 min_past_dist = abs(dist);
1545 }
1546 // Get the nearest ref_frame in the future
1547 if (dist < min_future_dist && dist > 0) {
1548 ref_frame_dist_info->nearest_future_ref = ref_frame;
1549 min_future_dist = dist;
1550 }
1551 }
1552 }
1553 }
1554
refs_are_one_sided(const AV1_COMMON * cm)1555 static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1556 assert(!frame_is_intra_only(cm));
1557
1558 int one_sided_refs = 1;
1559 const int cur_display_order_hint = cm->current_frame.display_order_hint;
1560 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1561 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1562 if (buf == NULL) continue;
1563 if (av1_encoder_get_relative_dist(buf->display_order_hint,
1564 cur_display_order_hint) > 0) {
1565 one_sided_refs = 0; // bwd reference
1566 break;
1567 }
1568 }
1569 return one_sided_refs;
1570 }
1571
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1572 static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1573 int ref_order_hint[2]) {
1574 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1575 ref_order_hint[0] = ref_order_hint[1] = 0;
1576 if (!skip_mode_info->skip_mode_allowed) return;
1577
1578 const RefCntBuffer *const buf_0 =
1579 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1580 const RefCntBuffer *const buf_1 =
1581 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1582 assert(buf_0 != NULL && buf_1 != NULL);
1583
1584 ref_order_hint[0] = buf_0->order_hint;
1585 ref_order_hint[1] = buf_1->order_hint;
1586 }
1587
check_skip_mode_enabled(AV1_COMP * const cpi)1588 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1589 AV1_COMMON *const cm = &cpi->common;
1590
1591 av1_setup_skip_mode_allowed(cm);
1592 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1593
1594 // Turn off skip mode if the temporal distances of the reference pair to the
1595 // current frame are different by more than 1 frame.
1596 const int cur_offset = (int)cm->current_frame.order_hint;
1597 int ref_offset[2];
1598 get_skip_mode_ref_offsets(cm, ref_offset);
1599 const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1600 cur_offset, ref_offset[0]);
1601 const int cur_to_ref1 = abs(get_relative_dist(
1602 &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1603 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1604
1605 // High Latency: Turn off skip mode if all refs are fwd.
1606 if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1607
1608 const int ref_frame[2] = {
1609 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1610 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1611 };
1612 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1613 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1614 return 0;
1615
1616 return 1;
1617 }
1618
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1619 static inline void set_default_interp_skip_flags(
1620 const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1621 const int num_planes = av1_num_planes(cm);
1622 interp_search_flags->default_interp_skip_flags =
1623 (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1624 : INTERP_SKIP_LUMA_SKIP_CHROMA;
1625 }
1626
setup_prune_ref_frame_mask(AV1_COMP * cpi)1627 static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1628 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1629 cpi->sf.inter_sf.disable_onesided_comp) &&
1630 cpi->all_one_sided_refs) {
1631 // Disable all compound references
1632 cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1633 } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1634 cpi->sf.inter_sf.selective_ref_frame >= 2) {
1635 AV1_COMMON *const cm = &cpi->common;
1636 const int cur_frame_display_order_hint =
1637 cm->current_frame.display_order_hint;
1638 unsigned int *ref_display_order_hint =
1639 cm->cur_frame->ref_display_order_hint;
1640 const int arf2_dist = av1_encoder_get_relative_dist(
1641 ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1642 cur_frame_display_order_hint);
1643 const int bwd_dist = av1_encoder_get_relative_dist(
1644 ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1645 cur_frame_display_order_hint);
1646
1647 for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1648 MV_REFERENCE_FRAME rf[2];
1649 av1_set_ref_frame(rf, ref_idx);
1650 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1651 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1652 continue;
1653 }
1654
1655 if (!cpi->all_one_sided_refs) {
1656 int ref_dist[2];
1657 for (int i = 0; i < 2; ++i) {
1658 ref_dist[i] = av1_encoder_get_relative_dist(
1659 ref_display_order_hint[rf[i] - LAST_FRAME],
1660 cur_frame_display_order_hint);
1661 }
1662
1663 // One-sided compound is used only when all reference frames are
1664 // one-sided.
1665 if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1666 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1667 }
1668 }
1669
1670 if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1671 (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1672 (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1673 // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1674 if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1675 // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1676 // reference to the current frame than ALTREF2_FRAME
1677 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1678 }
1679 }
1680 }
1681 }
1682 }
1683
allow_deltaq_mode(AV1_COMP * cpi)1684 static int allow_deltaq_mode(AV1_COMP *cpi) {
1685 #if !CONFIG_REALTIME_ONLY
1686 AV1_COMMON *const cm = &cpi->common;
1687 BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1688 int sbs_wide = mi_size_wide[sb_size];
1689 int sbs_high = mi_size_high[sb_size];
1690
1691 int64_t delta_rdcost = 0;
1692 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1693 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1694 int64_t this_delta_rdcost = 0;
1695 av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1696 mi_row, mi_col);
1697 delta_rdcost += this_delta_rdcost;
1698 }
1699 }
1700 return delta_rdcost < 0;
1701 #else
1702 (void)cpi;
1703 return 1;
1704 #endif // !CONFIG_REALTIME_ONLY
1705 }
1706
1707 #define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1708 #define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1709
1710 // Populates block level thresholds for force zeromv-skip decision
populate_thresh_to_force_zeromv_skip(AV1_COMP * cpi)1711 static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1712 if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1713
1714 // Threshold for forcing zeromv-skip decision is as below:
1715 // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1716 // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1717 // allowing slightly higher error for smaller blocks.
1718 // Per Pixel Threshold of 64x64 block Area of 64x64 block 1 1
1719 // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1720 // Per Pixel Threshold of 128x128 block Area of 128x128 block 4 2
1721 // Thus, per pixel thresholds for blocks of size 32x32, 16x16,... can be
1722 // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1723 // small blocks, the same is clipped to 4.
1724 const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1725 const int num_128x128_pix =
1726 block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1727
1728 for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1729 const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1730
1731 // Calculate the threshold for zeromv-skip decision based on area of the
1732 // partition
1733 unsigned int thresh_exit_part_blk =
1734 (unsigned int)(thresh_exit_128x128_part *
1735 sqrt((double)num_block_pix / num_128x128_pix) +
1736 0.5);
1737 thresh_exit_part_blk = AOMMIN(
1738 thresh_exit_part_blk,
1739 (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1740 cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1741 }
1742 }
1743
free_block_hash_buffers(uint32_t * block_hash_values[2][2],int8_t * is_block_same[2][3])1744 static void free_block_hash_buffers(uint32_t *block_hash_values[2][2],
1745 int8_t *is_block_same[2][3]) {
1746 for (int k = 0; k < 2; ++k) {
1747 for (int j = 0; j < 2; ++j) {
1748 aom_free(block_hash_values[k][j]);
1749 }
1750
1751 for (int j = 0; j < 3; ++j) {
1752 aom_free(is_block_same[k][j]);
1753 }
1754 }
1755 }
1756
1757 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1758 * for a single frame
1759 *
1760 * \ingroup high_level_algo
1761 */
encode_frame_internal(AV1_COMP * cpi)1762 static inline void encode_frame_internal(AV1_COMP *cpi) {
1763 ThreadData *const td = &cpi->td;
1764 MACROBLOCK *const x = &td->mb;
1765 AV1_COMMON *const cm = &cpi->common;
1766 CommonModeInfoParams *const mi_params = &cm->mi_params;
1767 FeatureFlags *const features = &cm->features;
1768 MACROBLOCKD *const xd = &x->e_mbd;
1769 RD_COUNTS *const rdc = &cpi->td.rd_counts;
1770 #if CONFIG_FPMT_TEST
1771 FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
1772 FrameProbInfo *const temp_frame_probs_simulation =
1773 &cpi->ppi->temp_frame_probs_simulation;
1774 #endif
1775 FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1776 IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1777 MultiThreadInfo *const mt_info = &cpi->mt_info;
1778 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1779 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1780 const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1781 int i;
1782
1783 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1784 mi_params->setup_mi(mi_params);
1785 }
1786
1787 set_mi_offsets(mi_params, xd, 0, 0);
1788
1789 av1_zero(*td->counts);
1790 av1_zero(rdc->tx_type_used);
1791 av1_zero(rdc->obmc_used);
1792 av1_zero(rdc->warped_used);
1793 av1_zero(rdc->seg_tmp_pred_cost);
1794
1795 // Reset the flag.
1796 cpi->intrabc_used = 0;
1797 // Need to disable intrabc when superres is selected
1798 if (av1_superres_scaled(cm)) {
1799 features->allow_intrabc = 0;
1800 }
1801
1802 features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1803
1804 if (features->allow_warped_motion &&
1805 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1806 const FRAME_UPDATE_TYPE update_type =
1807 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1808 int warped_probability =
1809 #if CONFIG_FPMT_TEST
1810 cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
1811 ? temp_frame_probs->warped_probs[update_type]
1812 :
1813 #endif // CONFIG_FPMT_TEST
1814 frame_probs->warped_probs[update_type];
1815 if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
1816 features->allow_warped_motion = 0;
1817 }
1818
1819 int hash_table_created = 0;
1820 if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1821 !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1822 // TODO(any): move this outside of the recoding loop to avoid recalculating
1823 // the hash table.
1824 // add to hash table
1825 const int pic_width = cpi->source->y_crop_width;
1826 const int pic_height = cpi->source->y_crop_height;
1827 uint32_t *block_hash_values[2][2] = { { NULL } };
1828 int8_t *is_block_same[2][3] = { { NULL } };
1829 int k, j;
1830 bool error = false;
1831
1832 for (k = 0; k < 2 && !error; ++k) {
1833 for (j = 0; j < 2; ++j) {
1834 block_hash_values[k][j] = (uint32_t *)aom_malloc(
1835 sizeof(*block_hash_values[0][0]) * pic_width * pic_height);
1836 if (!block_hash_values[k][j]) {
1837 error = true;
1838 break;
1839 }
1840 }
1841
1842 for (j = 0; j < 3 && !error; ++j) {
1843 is_block_same[k][j] = (int8_t *)aom_malloc(
1844 sizeof(*is_block_same[0][0]) * pic_width * pic_height);
1845 if (!is_block_same[k][j]) error = true;
1846 }
1847 }
1848
1849 av1_hash_table_init(intrabc_hash_info);
1850 if (error ||
1851 !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
1852 free_block_hash_buffers(block_hash_values, is_block_same);
1853 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1854 "Error allocating intrabc_hash_table and buffers");
1855 }
1856 hash_table_created = 1;
1857 av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1858 block_hash_values[0], is_block_same[0]);
1859 // Hash data generated for screen contents is used for intraBC ME
1860 const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1861 const int max_sb_size =
1862 (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1863 int src_idx = 0;
1864 for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1865 const int dst_idx = !src_idx;
1866 av1_generate_block_hash_value(
1867 intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1868 block_hash_values[dst_idx], is_block_same[src_idx],
1869 is_block_same[dst_idx]);
1870 if (size >= min_alloc_size) {
1871 if (!av1_add_to_hash_map_by_row_with_precal_data(
1872 &intrabc_hash_info->intrabc_hash_table,
1873 block_hash_values[dst_idx], is_block_same[dst_idx][2],
1874 pic_width, pic_height, size)) {
1875 error = true;
1876 break;
1877 }
1878 }
1879 }
1880
1881 free_block_hash_buffers(block_hash_values, is_block_same);
1882
1883 if (error) {
1884 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1885 "Error adding data to intrabc_hash_table");
1886 }
1887 }
1888
1889 const CommonQuantParams *quant_params = &cm->quant_params;
1890 for (i = 0; i < MAX_SEGMENTS; ++i) {
1891 const int qindex =
1892 cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1893 : quant_params->base_qindex;
1894 xd->lossless[i] =
1895 qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1896 quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1897 quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1898 if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1899 xd->qindex[i] = qindex;
1900 if (xd->lossless[i]) {
1901 cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1902 } else {
1903 cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1904 }
1905 }
1906 features->coded_lossless = is_coded_lossless(cm, xd);
1907 features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1908
1909 // Fix delta q resolution for the moment
1910
1911 cm->delta_q_info.delta_q_res = 0;
1912 if (cpi->use_ducky_encode) {
1913 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
1914 } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1915 if (deltaq_mode == DELTA_Q_OBJECTIVE)
1916 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1917 else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1918 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1919 else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1920 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1921 else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1922 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1923 else if (deltaq_mode == DELTA_Q_HDR)
1924 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1925 // Set delta_q_present_flag before it is used for the first time
1926 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1927 cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1928
1929 // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1930 // is used for ineligible frames. That effectively will turn off row_mt
1931 // usage. Note objective delta_q and tpl eligible frames are only altref
1932 // frames currently.
1933 const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1934 if (cm->delta_q_info.delta_q_present_flag) {
1935 if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1936 gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
1937 cm->delta_q_info.delta_q_present_flag = 0;
1938
1939 if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1940 cm->delta_q_info.delta_q_present_flag) {
1941 cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
1942 }
1943 }
1944
1945 // Reset delta_q_used flag
1946 cpi->deltaq_used = 0;
1947
1948 cm->delta_q_info.delta_lf_present_flag =
1949 cm->delta_q_info.delta_q_present_flag &&
1950 oxcf->tool_cfg.enable_deltalf_mode;
1951 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
1952
1953 // update delta_q_present_flag and delta_lf_present_flag based on
1954 // base_qindex
1955 cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
1956 cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
1957 } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
1958 cpi->svc.number_temporal_layers == 1) {
1959 cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
1960 cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
1961 }
1962 cpi->rc.cnt_zeromv = 0;
1963
1964 av1_frame_init_quantizer(cpi);
1965 init_encode_frame_mb_context(cpi);
1966 set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
1967
1968 if (cm->prev_frame && cm->prev_frame->seg.enabled)
1969 cm->last_frame_seg_map = cm->prev_frame->seg_map;
1970 else
1971 cm->last_frame_seg_map = NULL;
1972 if (features->allow_intrabc || features->coded_lossless) {
1973 av1_set_default_ref_deltas(cm->lf.ref_deltas);
1974 av1_set_default_mode_deltas(cm->lf.mode_deltas);
1975 } else if (cm->prev_frame) {
1976 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
1977 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
1978 }
1979 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
1980 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
1981
1982 cpi->all_one_sided_refs =
1983 frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
1984
1985 cpi->prune_ref_frame_mask = 0;
1986 // Figure out which ref frames can be skipped at frame level.
1987 setup_prune_ref_frame_mask(cpi);
1988
1989 x->txfm_search_info.txb_split_count = 0;
1990 #if CONFIG_SPEED_STATS
1991 x->txfm_search_info.tx_search_count = 0;
1992 #endif // CONFIG_SPEED_STATS
1993
1994 #if !CONFIG_REALTIME_ONLY
1995 #if CONFIG_COLLECT_COMPONENT_TIMING
1996 start_timing(cpi, av1_compute_global_motion_time);
1997 #endif
1998 av1_compute_global_motion_facade(cpi);
1999 #if CONFIG_COLLECT_COMPONENT_TIMING
2000 end_timing(cpi, av1_compute_global_motion_time);
2001 #endif
2002 #endif // !CONFIG_REALTIME_ONLY
2003
2004 #if CONFIG_COLLECT_COMPONENT_TIMING
2005 start_timing(cpi, av1_setup_motion_field_time);
2006 #endif
2007 av1_calculate_ref_frame_side(cm);
2008 if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2009 #if CONFIG_COLLECT_COMPONENT_TIMING
2010 end_timing(cpi, av1_setup_motion_field_time);
2011 #endif
2012
2013 cm->current_frame.skip_mode_info.skip_mode_flag =
2014 check_skip_mode_enabled(cpi);
2015
2016 // Initialization of skip mode cost depends on the value of
2017 // 'skip_mode_flag'. This initialization happens in the function
2018 // av1_fill_mode_rates(), which is in turn called in
2019 // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2020 // has to be called after 'skip_mode_flag' is initialized.
2021 av1_initialize_rd_consts(cpi);
2022 av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2023 populate_thresh_to_force_zeromv_skip(cpi);
2024
2025 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2026 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2027 mt_info->row_mt_enabled = 0;
2028 mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2029 cm->tiles.cols * cm->tiles.rows) > 1;
2030
2031 if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2032 mt_info->row_mt_enabled = 1;
2033 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2034 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2035 av1_encode_tiles_row_mt(cpi);
2036 } else {
2037 if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2038 av1_encode_tiles_mt(cpi);
2039 } else {
2040 // Preallocate the pc_tree for realtime coding to reduce the cost of
2041 // memory allocation.
2042 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2043 if (use_nonrd_mode) {
2044 td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2045 if (!td->pc_root)
2046 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2047 "Failed to allocate PC_TREE");
2048 } else {
2049 td->pc_root = NULL;
2050 }
2051
2052 encode_tiles(cpi);
2053 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2054 cpi->sf.part_sf.partition_search_type);
2055 td->pc_root = NULL;
2056 }
2057 }
2058
2059 // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2060 if (features->allow_intrabc && !cpi->intrabc_used) {
2061 features->allow_intrabc = 0;
2062 }
2063 if (features->allow_intrabc) {
2064 cm->delta_q_info.delta_lf_present_flag = 0;
2065 }
2066
2067 if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2068 cm->delta_q_info.delta_q_present_flag = 0;
2069 }
2070
2071 // Set the transform size appropriately before bitstream creation
2072 const MODE_EVAL_TYPE eval_type =
2073 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2074 ? WINNER_MODE_EVAL
2075 : DEFAULT_EVAL;
2076 const TX_SIZE_SEARCH_METHOD tx_search_type =
2077 cpi->winner_mode_params.tx_size_search_methods[eval_type];
2078 assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2079 features->tx_mode = select_tx_mode(cm, tx_search_type);
2080
2081 // Retain the frame level probability update conditions for parallel frames.
2082 // These conditions will be consumed during postencode stage to update the
2083 // probability.
2084 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2085 cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2086 cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2087 cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2088 (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2089 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2090 cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2091 (features->allow_warped_motion &&
2092 cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2093 cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2094 (cm->current_frame.frame_type != KEY_FRAME &&
2095 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2096 features->interp_filter == SWITCHABLE);
2097 }
2098
2099 if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2100 ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2101 INT_MAX) &&
2102 (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2103 const FRAME_UPDATE_TYPE update_type =
2104 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2105 for (i = 0; i < TX_SIZES_ALL; i++) {
2106 int sum = 0;
2107 int j;
2108 int left = MAX_TX_TYPE_PROB;
2109
2110 for (j = 0; j < TX_TYPES; j++)
2111 sum += cpi->td.rd_counts.tx_type_used[i][j];
2112
2113 for (j = TX_TYPES - 1; j >= 0; j--) {
2114 int update_txtype_frameprobs = 1;
2115 const int new_prob =
2116 sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2117 cpi->td.rd_counts.tx_type_used[i][j] / sum)
2118 : (j ? 0 : MAX_TX_TYPE_PROB);
2119 #if CONFIG_FPMT_TEST
2120 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2121 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2122 0) {
2123 int prob =
2124 (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2125 new_prob) >>
2126 1;
2127 left -= prob;
2128 if (j == 0) prob += left;
2129 temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2130 prob;
2131 // Copy temp_frame_probs_simulation to temp_frame_probs
2132 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2133 update_type_idx++) {
2134 temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2135 temp_frame_probs_simulation
2136 ->tx_type_probs[update_type_idx][i][j];
2137 }
2138 }
2139 update_txtype_frameprobs = 0;
2140 }
2141 #endif // CONFIG_FPMT_TEST
2142 // Track the frame probabilities of parallel encode frames to update
2143 // during postencode stage.
2144 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2145 update_txtype_frameprobs = 0;
2146 cpi->frame_new_probs[cpi->num_frame_recode]
2147 .tx_type_probs[update_type][i][j] = new_prob;
2148 }
2149 if (update_txtype_frameprobs) {
2150 int prob =
2151 (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2152 left -= prob;
2153 if (j == 0) prob += left;
2154 frame_probs->tx_type_probs[update_type][i][j] = prob;
2155 }
2156 }
2157 }
2158 }
2159
2160 if (cm->seg.enabled) {
2161 cm->seg.temporal_update = 1;
2162 if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2163 cm->seg.temporal_update = 0;
2164 }
2165
2166 if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2167 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2168 const FRAME_UPDATE_TYPE update_type =
2169 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2170
2171 for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2172 int sum = 0;
2173 int update_obmc_frameprobs = 1;
2174 for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2175
2176 const int new_prob =
2177 sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2178 #if CONFIG_FPMT_TEST
2179 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2180 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2181 temp_frame_probs_simulation->obmc_probs[update_type][i] =
2182 (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2183 new_prob) >>
2184 1;
2185 // Copy temp_frame_probs_simulation to temp_frame_probs
2186 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2187 update_type_idx++) {
2188 temp_frame_probs->obmc_probs[update_type_idx][i] =
2189 temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2190 }
2191 }
2192 update_obmc_frameprobs = 0;
2193 }
2194 #endif // CONFIG_FPMT_TEST
2195 // Track the frame probabilities of parallel encode frames to update
2196 // during postencode stage.
2197 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2198 update_obmc_frameprobs = 0;
2199 cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2200 new_prob;
2201 }
2202 if (update_obmc_frameprobs) {
2203 frame_probs->obmc_probs[update_type][i] =
2204 (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2205 }
2206 }
2207 }
2208
2209 if (features->allow_warped_motion &&
2210 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2211 const FRAME_UPDATE_TYPE update_type =
2212 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2213 int update_warp_frameprobs = 1;
2214 int sum = 0;
2215 for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2216 const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2217 #if CONFIG_FPMT_TEST
2218 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2219 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2220 temp_frame_probs_simulation->warped_probs[update_type] =
2221 (temp_frame_probs_simulation->warped_probs[update_type] +
2222 new_prob) >>
2223 1;
2224 // Copy temp_frame_probs_simulation to temp_frame_probs
2225 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2226 update_type_idx++) {
2227 temp_frame_probs->warped_probs[update_type_idx] =
2228 temp_frame_probs_simulation->warped_probs[update_type_idx];
2229 }
2230 }
2231 update_warp_frameprobs = 0;
2232 }
2233 #endif // CONFIG_FPMT_TEST
2234 // Track the frame probabilities of parallel encode frames to update
2235 // during postencode stage.
2236 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2237 update_warp_frameprobs = 0;
2238 cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2239 new_prob;
2240 }
2241 if (update_warp_frameprobs) {
2242 frame_probs->warped_probs[update_type] =
2243 (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2244 }
2245 }
2246
2247 if (cm->current_frame.frame_type != KEY_FRAME &&
2248 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2249 features->interp_filter == SWITCHABLE) {
2250 const FRAME_UPDATE_TYPE update_type =
2251 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2252
2253 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2254 int sum = 0;
2255 int j;
2256 int left = 1536;
2257
2258 for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2259 sum += cpi->td.counts->switchable_interp[i][j];
2260 }
2261
2262 for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2263 int update_interpfilter_frameprobs = 1;
2264 const int new_prob =
2265 sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2266 : (j ? 0 : 1536);
2267 #if CONFIG_FPMT_TEST
2268 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2269 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2270 0) {
2271 int prob = (temp_frame_probs_simulation
2272 ->switchable_interp_probs[update_type][i][j] +
2273 new_prob) >>
2274 1;
2275 left -= prob;
2276 if (j == 0) prob += left;
2277 temp_frame_probs_simulation
2278 ->switchable_interp_probs[update_type][i][j] = prob;
2279 // Copy temp_frame_probs_simulation to temp_frame_probs
2280 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2281 update_type_idx++) {
2282 temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2283 temp_frame_probs_simulation
2284 ->switchable_interp_probs[update_type_idx][i][j];
2285 }
2286 }
2287 update_interpfilter_frameprobs = 0;
2288 }
2289 #endif // CONFIG_FPMT_TEST
2290 // Track the frame probabilities of parallel encode frames to update
2291 // during postencode stage.
2292 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2293 update_interpfilter_frameprobs = 0;
2294 cpi->frame_new_probs[cpi->num_frame_recode]
2295 .switchable_interp_probs[update_type][i][j] = new_prob;
2296 }
2297 if (update_interpfilter_frameprobs) {
2298 int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2299 new_prob) >>
2300 1;
2301 left -= prob;
2302 if (j == 0) prob += left;
2303 frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2304 }
2305 }
2306 }
2307 }
2308 if (hash_table_created) {
2309 av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2310 }
2311 }
2312
2313 /*!\brief Setup reference frame buffers and encode a frame
2314 *
2315 * \ingroup high_level_algo
2316 * \callgraph
2317 * \callergraph
2318 *
2319 * \param[in] cpi Top-level encoder structure
2320 */
av1_encode_frame(AV1_COMP * cpi)2321 void av1_encode_frame(AV1_COMP *cpi) {
2322 AV1_COMMON *const cm = &cpi->common;
2323 CurrentFrame *const current_frame = &cm->current_frame;
2324 FeatureFlags *const features = &cm->features;
2325 RD_COUNTS *const rdc = &cpi->td.rd_counts;
2326 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2327 // Indicates whether or not to use a default reduced set for ext-tx
2328 // rather than the potential full set of 16 transforms
2329 features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2330
2331 // Make sure segment_id is no larger than last_active_segid.
2332 if (cm->seg.enabled && cm->seg.update_map) {
2333 const int mi_rows = cm->mi_params.mi_rows;
2334 const int mi_cols = cm->mi_params.mi_cols;
2335 const int last_active_segid = cm->seg.last_active_segid;
2336 uint8_t *map = cpi->enc_seg.map;
2337 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2338 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2339 map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2340 }
2341 map += mi_cols;
2342 }
2343 }
2344
2345 av1_setup_frame_buf_refs(cm);
2346 enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2347 cm->cur_frame->ref_display_order_hint,
2348 cm->current_frame.display_order_hint);
2349 set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2350 cpi->ref_frame_flags);
2351 av1_setup_frame_sign_bias(cm);
2352
2353 // If global motion is enabled, then every buffer which is used as either
2354 // a source or a ref frame should have an image pyramid allocated.
2355 // Check here so that issues can be caught early in debug mode
2356 #if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2357 if (cpi->alloc_pyramid) {
2358 assert(cpi->source->y_pyramid);
2359 for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2360 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2361 if (buf != NULL) {
2362 assert(buf->buf.y_pyramid);
2363 }
2364 }
2365 }
2366 #endif // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2367
2368 #if CONFIG_MISMATCH_DEBUG
2369 mismatch_reset_frame(av1_num_planes(cm));
2370 #endif
2371
2372 rdc->newmv_or_intra_blocks = 0;
2373 cpi->palette_pixel_num = 0;
2374
2375 if (cpi->sf.hl_sf.frame_parameter_update ||
2376 cpi->sf.rt_sf.use_comp_ref_nonrd) {
2377 if (frame_is_intra_only(cm))
2378 current_frame->reference_mode = SINGLE_REFERENCE;
2379 else
2380 current_frame->reference_mode = REFERENCE_MODE_SELECT;
2381
2382 features->interp_filter = SWITCHABLE;
2383 if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2384
2385 features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2386 features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2387
2388 rdc->compound_ref_used_flag = 0;
2389 rdc->skip_mode_used_flag = 0;
2390
2391 encode_frame_internal(cpi);
2392
2393 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2394 // Use a flag that includes 4x4 blocks
2395 if (rdc->compound_ref_used_flag == 0) {
2396 current_frame->reference_mode = SINGLE_REFERENCE;
2397 #if CONFIG_ENTROPY_STATS
2398 av1_zero(cpi->td.counts->comp_inter);
2399 #endif // CONFIG_ENTROPY_STATS
2400 }
2401 }
2402 // Re-check on the skip mode status as reference mode may have been
2403 // changed.
2404 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info;
2405 if (frame_is_intra_only(cm) ||
2406 current_frame->reference_mode == SINGLE_REFERENCE) {
2407 skip_mode_info->skip_mode_allowed = 0;
2408 skip_mode_info->skip_mode_flag = 0;
2409 }
2410 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2411 skip_mode_info->skip_mode_flag = 0;
2412
2413 if (!cm->tiles.large_scale) {
2414 if (features->tx_mode == TX_MODE_SELECT &&
2415 cpi->td.mb.txfm_search_info.txb_split_count == 0)
2416 features->tx_mode = TX_MODE_LARGEST;
2417 }
2418 } else {
2419 // This is needed if real-time speed setting is changed on the fly
2420 // from one using compound prediction to one using single reference.
2421 if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2422 current_frame->reference_mode = SINGLE_REFERENCE;
2423 encode_frame_internal(cpi);
2424 }
2425 }
2426