1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13
14 #include "av1/common/reconintra.h"
15
16 #include "av1/encoder/encoder.h"
17 #include "av1/encoder/speed_features.h"
18 #include "av1/encoder/rdopt.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21
22 #define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
23 // Max speed setting for tx domain evaluation
24 #define MAX_TX_DOMAIN_EVAL_SPEED 5
25 static MESH_PATTERN
26 good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
27 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
28 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
29 { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
30 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
31 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
32 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
33 };
34
35 // TODO([email protected]): These settings are pretty relaxed, tune them for
36 // each speed setting
37 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
38 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
39 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
40 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
41 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
42 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
43 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
44 };
45
46 // Threshold values to be used for pruning the txfm_domain_distortion
47 // based on block MSE
48 // Index 0: Default mode evaluation, Winner mode processing is not
49 // applicable (Eg : IntraBc). Index 1: Mode evaluation.
50 // Index 2: Winner mode evaluation. Index 1 and 2 are applicable when
51 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
52 // TODO(any): Experiment the threshold logic based on variance metric
53 static unsigned int tx_domain_dist_thresholds[4][MODE_EVAL_TYPES] = {
54 { UINT_MAX, UINT_MAX, UINT_MAX },
55 { 22026, 22026, 22026 },
56 { 1377, 1377, 1377 },
57 { 0, 0, 0 }
58 };
59
60 // Number of different levels of aggressiveness in using transform domain
61 // distortion during the R-D evaluation based on the speed feature
62 // tx_domain_dist_level.
63 #define TX_DOMAIN_DIST_LEVELS 4
64
65 // Transform domain distortion type to be used for default, mode and winner mode
66 // evaluation Index 0: Default mode evaluation, Winner mode processing is not
67 // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode
68 // evaluation. Index 1 and 2 are applicable when
69 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
70 static unsigned int
71 tx_domain_dist_types[TX_DOMAIN_DIST_LEVELS][MODE_EVAL_TYPES] = {
72 { 0, 2, 0 }, { 1, 2, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
73 };
74
75 // Threshold values to be used for disabling coeff RD-optimization
76 // based on block MSE / qstep^2.
77 // TODO(any): Experiment the threshold logic based on variance metric.
78 // Table has satd and dist threshold value index 0 : dist,index 1: satd
79 // For each row, the indices are as follows.
80 // Index 0: Default mode evaluation, Winner mode processing is not applicable
81 // (Eg : IntraBc)
82 // Index 1: Mode evaluation.
83 // Index 2: Winner mode evaluation.
84 // Index 1 and 2 are applicable when enable_winner_mode_for_coeff_opt speed
85 // feature is ON
86 // There are 7 levels with increasing speed, mapping to vertical indices.
87 static unsigned int coeff_opt_thresholds[9][MODE_EVAL_TYPES][2] = {
88 { { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX } },
89 { { 3200, UINT_MAX }, { 250, UINT_MAX }, { UINT_MAX, UINT_MAX } },
90 { { 1728, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
91 { { 864, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
92 { { 432, UINT_MAX }, { 86, UINT_MAX }, { UINT_MAX, UINT_MAX } },
93 { { 864, 97 }, { 142, 16 }, { UINT_MAX, UINT_MAX } },
94 { { 432, 97 }, { 86, 16 }, { UINT_MAX, UINT_MAX } },
95 { { 216, 25 }, { 86, 10 }, { UINT_MAX, UINT_MAX } },
96 { { 216, 25 }, { 0, 10 }, { UINT_MAX, UINT_MAX } }
97 };
98
99 // Transform size to be used for default, mode and winner mode evaluation
100 // Index 0: Default mode evaluation, Winner mode processing is not applicable
101 // (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation.
102 // Index 1 and 2 are applicable when enable_winner_mode_for_tx_size_srch speed
103 // feature is ON
104 static TX_SIZE_SEARCH_METHOD tx_size_search_methods[4][MODE_EVAL_TYPES] = {
105 { USE_FULL_RD, USE_LARGESTALL, USE_FULL_RD },
106 { USE_FAST_RD, USE_LARGESTALL, USE_FULL_RD },
107 { USE_LARGESTALL, USE_LARGESTALL, USE_FULL_RD },
108 { USE_LARGESTALL, USE_LARGESTALL, USE_LARGESTALL }
109 };
110
111 // Predict transform skip levels to be used for default, mode and winner mode
112 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not
113 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
114 // Values indicate the aggressiveness of skip flag prediction.
115 // 0 : no early skip prediction
116 // 1 : conservative early skip prediction using DCT_DCT
117 // 2 : early skip prediction based on SSE
118 static unsigned int predict_skip_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
119 { 1, 1, 1 },
120 { 1, 2, 1 } };
121
122 // Predict skip or DC block level used during transform type search. It is
123 // indexed using the following:
124 // First index : Speed feature 'dc_blk_pred_level' (0 to 3)
125 // Second index : Mode evaluation type (DEFAULT_EVAL, MODE_EVAL and
126 // WINNER_MODE_EVAL).
127 //
128 // The values of predict_dc_levels[][] indicate the aggressiveness of predicting
129 // a block as transform skip or DC only.
130 // Type 0 : No skip block or DC only block prediction
131 // Type 1 : Prediction of skip block based on residual mean and variance
132 // Type 2 : Prediction of skip block or DC only block based on residual mean and
133 // variance
134 static unsigned int predict_dc_levels[4][MODE_EVAL_TYPES] = {
135 { 0, 0, 0 }, { 1, 1, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
136 };
137
138 #if !CONFIG_FPMT_TEST
139 // This table holds the maximum number of reference frames for global motion.
140 // The table is indexed as per the speed feature 'gm_search_type'.
141 // 0 : All reference frames are allowed.
142 // 1 : All reference frames except L2 and L3 are allowed.
143 // 2 : All reference frames except L2, L3 and ARF2 are allowed.
144 // 3 : No reference frame is allowed.
145 static int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = {
146 INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0
147 };
148 #endif
149
150 // Qindex threshold levels used for selecting full-pel motion search.
151 // ms_qthresh[i][j][k] indicates the qindex boundary value for 'k'th qindex band
152 // for resolution index 'j' for aggressiveness level 'i'.
153 // Aggressiveness increases from i = 0 to 2.
154 // j = 0: lower than 720p resolution, j = 1: 720p or larger resolution.
155 // Currently invoked only for speed 0, 1 and 2.
156 static int ms_qindex_thresh[3][2][2] = { { { 200, 70 }, { MAXQ, 200 } },
157 { { 170, 50 }, { MAXQ, 200 } },
158 { { 170, 40 }, { 200, 40 } } };
159
160 // Full-pel search methods for aggressive search based on qindex.
161 // Index 0 is for resolutions lower than 720p, index 1 for 720p or larger
162 // resolutions. Currently invoked only for speed 1 and 2.
163 static SEARCH_METHODS motion_search_method[2] = { CLAMPED_DIAMOND, DIAMOND };
164
165 // Intra only frames, golden frames (except alt ref overlays) and
166 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const AV1_COMP * cpi)167 static int frame_is_boosted(const AV1_COMP *cpi) {
168 return frame_is_kf_gf_arf(cpi);
169 }
170
171 // Set transform rd gate level for all transform search cases.
set_txfm_rd_gate_level(int txfm_rd_gate_level[TX_SEARCH_CASES],int level)172 static inline void set_txfm_rd_gate_level(
173 int txfm_rd_gate_level[TX_SEARCH_CASES], int level) {
174 assert(level <= MAX_TX_RD_GATE_LEVEL);
175 for (int idx = 0; idx < TX_SEARCH_CASES; idx++)
176 txfm_rd_gate_level[idx] = level;
177 }
178
set_allintra_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)179 static void set_allintra_speed_feature_framesize_dependent(
180 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
181 const AV1_COMMON *const cm = &cpi->common;
182 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
183 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
184 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
185 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
186 const bool use_hbd = cpi->oxcf.use_highbitdepth;
187
188 if (is_480p_or_larger) {
189 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
190 if (is_720p_or_larger)
191 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
192 else
193 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
194 } else {
195 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
196 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
197 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
198 }
199
200 if (is_4k_or_larger) {
201 sf->part_sf.default_min_partition_size = BLOCK_8X8;
202 }
203
204 // TODO([email protected]): train models for 720P and above.
205 if (!is_720p_or_larger) {
206 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
207 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
208 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
209 sf->part_sf.ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
210 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
211 sf->part_sf.ml_early_term_after_part_split_level = 1;
212 }
213
214 if (is_720p_or_larger) {
215 // TODO([email protected]): make this speed feature adaptive based on
216 // current block's vertical texture instead of hardcoded with resolution
217 sf->mv_sf.use_downsampled_sad = 2;
218 }
219
220 if (speed >= 1) {
221 if (is_720p_or_larger) {
222 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
223 } else if (is_480p_or_larger) {
224 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
225 } else {
226 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
227 }
228
229 if (!is_720p_or_larger) {
230 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
231 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
232 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
233 sf->part_sf.ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
234 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
235 }
236 sf->part_sf.ml_early_term_after_part_split_level = 2;
237 }
238
239 if (speed >= 2) {
240 if (is_720p_or_larger) {
241 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
242 } else if (is_480p_or_larger) {
243 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
244 } else {
245 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
246 }
247
248 if (is_720p_or_larger) {
249 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
250 sf->part_sf.partition_search_breakout_rate_thr = 120;
251 } else {
252 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
253 sf->part_sf.partition_search_breakout_rate_thr = 100;
254 }
255
256 if (is_480p_or_larger) {
257 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
258 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
259 } else {
260 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
261 }
262 }
263
264 if (speed >= 3) {
265 sf->part_sf.ml_early_term_after_part_split_level = 0;
266
267 if (is_720p_or_larger) {
268 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
269 sf->part_sf.partition_search_breakout_rate_thr = 200;
270 } else {
271 sf->part_sf.max_intra_bsize = BLOCK_32X32;
272 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
273 sf->part_sf.partition_search_breakout_rate_thr = 120;
274 }
275 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
276 }
277
278 if (speed >= 4) {
279 if (is_720p_or_larger) {
280 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
281 } else {
282 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
283 }
284
285 if (is_480p_or_larger) {
286 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
287 }
288 }
289
290 if (speed >= 6) {
291 if (is_720p_or_larger) {
292 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
293 } else if (is_480p_or_larger) {
294 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
295 }
296
297 if (is_1080p_or_larger) {
298 sf->part_sf.default_min_partition_size = BLOCK_8X8;
299 }
300
301 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
302 }
303
304 if (speed >= 7) {
305 // TODO(kyslov): add more speed features to control speed/quality
306 }
307
308 if (speed >= 8) {
309 if (!is_480p_or_larger) {
310 sf->rt_sf.nonrd_check_partition_merge_mode = 2;
311 }
312 if (is_720p_or_larger) {
313 sf->rt_sf.force_large_partition_blocks_intra = 1;
314 }
315 }
316
317 if (speed >= 9) {
318 // TODO(kyslov): add more speed features to control speed/quality
319 if (!is_4k_or_larger) {
320 // In av1_select_sb_size(), superblock size is set to 64x64 only for
321 // resolutions less than 4k in speed>=9, to improve the multithread
322 // performance. If cost update levels are set to INTERNAL_COST_UPD_OFF
323 // for resolutions >= 4k, the SB size setting can be modified for these
324 // resolutions as well.
325 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_OFF;
326 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_OFF;
327 }
328 }
329 }
330
set_allintra_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)331 static void set_allintra_speed_features_framesize_independent(
332 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
333 const AV1_COMMON *const cm = &cpi->common;
334 const int allow_screen_content_tools =
335 cm->features.allow_screen_content_tools;
336 const int use_hbd = cpi->oxcf.use_highbitdepth;
337
338 sf->part_sf.less_rectangular_check_level = 1;
339 sf->part_sf.ml_prune_partition = 1;
340 sf->part_sf.prune_ext_partition_types_search_level = 1;
341 sf->part_sf.prune_part4_search = 2;
342 sf->part_sf.simple_motion_search_prune_rect = 1;
343 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
344 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
345 sf->part_sf.use_best_rd_for_pruning = 1;
346
347 sf->intra_sf.intra_pruning_with_hog = 1;
348 sf->intra_sf.prune_luma_palette_size_search_level = 1;
349 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
350 sf->intra_sf.early_term_chroma_palette_size_search = 1;
351
352 sf->tx_sf.adaptive_txb_search_level = 1;
353 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
354 sf->tx_sf.model_based_prune_tx_search_level = 1;
355 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
356
357 sf->rt_sf.use_nonrd_pick_mode = 0;
358 sf->rt_sf.use_real_time_ref_set = 0;
359
360 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION ||
361 cpi->use_screen_content_tools) {
362 sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
363 } else {
364 sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
365 }
366
367 sf->rd_sf.perform_coeff_opt = 1;
368 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
369
370 if (speed >= 1) {
371 sf->part_sf.intra_cnn_based_part_prune_level =
372 allow_screen_content_tools ? 0 : 2;
373 sf->part_sf.simple_motion_search_early_term_none = 1;
374 // TODO(Venkat): Clean-up frame type dependency for
375 // simple_motion_search_split in partition search function and set the
376 // speed feature accordingly
377 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
378 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
379 sf->part_sf.reuse_best_prediction_for_part_ab = 1;
380
381 sf->mv_sf.exhaustive_searches_thresh <<= 1;
382
383 sf->intra_sf.prune_palette_search_level = 1;
384 sf->intra_sf.prune_luma_palette_size_search_level = 2;
385 sf->intra_sf.top_intra_model_count_allowed = 3;
386
387 sf->tx_sf.adaptive_txb_search_level = 2;
388 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
389 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
390 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
391 sf->tx_sf.model_based_prune_tx_search_level = 0;
392 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
393 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
394 sf->tx_sf.tx_type_search.skip_tx_search = 1;
395
396 sf->rd_sf.perform_coeff_opt = 2;
397 sf->rd_sf.tx_domain_dist_level = 1;
398 sf->rd_sf.tx_domain_dist_thres_level = 1;
399
400 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
401 sf->lpf_sf.dual_sgr_penalty_level = 1;
402 sf->lpf_sf.enable_sgr_ep_pruning = 1;
403 }
404
405 if (speed >= 2) {
406 sf->mv_sf.auto_mv_step_size = 1;
407
408 sf->intra_sf.disable_smooth_intra = 1;
409 sf->intra_sf.intra_pruning_with_hog = 2;
410 sf->intra_sf.prune_filter_intra_level = 1;
411
412 sf->rd_sf.perform_coeff_opt = 3;
413
414 sf->lpf_sf.prune_wiener_based_on_src_var = 1;
415 sf->lpf_sf.prune_sgr_based_on_wiener = 1;
416 }
417
418 if (speed >= 3) {
419 sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
420 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
421
422 sf->part_sf.less_rectangular_check_level = 2;
423 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL1;
424 sf->part_sf.prune_ext_part_using_split_info = 1;
425
426 sf->mv_sf.full_pixel_search_level = 1;
427 sf->mv_sf.search_method = DIAMOND;
428
429 // TODO([email protected]): the thresholds chosen for intra hog are
430 // inherited directly from luma hog with some minor tweaking. Eventually we
431 // should run this with a bayesian optimizer to find the Pareto frontier.
432 sf->intra_sf.chroma_intra_pruning_with_hog = 2;
433 sf->intra_sf.intra_pruning_with_hog = 3;
434 sf->intra_sf.prune_palette_search_level = 2;
435
436 sf->tx_sf.adaptive_txb_search_level = 2;
437 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
438 sf->tx_sf.use_rd_based_breakout_for_intra_tx_search = true;
439
440 // TODO(any): evaluate if these lpf features can be moved to speed 2.
441 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
442 // loss.
443 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
444 sf->lpf_sf.disable_loop_restoration_chroma = 0;
445 sf->lpf_sf.reduce_wiener_window_size = 1;
446 sf->lpf_sf.prune_wiener_based_on_src_var = 2;
447 }
448
449 if (speed >= 4) {
450 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
451
452 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL2;
453 sf->part_sf.simple_motion_search_reduce_search_steps = 4;
454 sf->part_sf.prune_ext_part_using_split_info = 2;
455 sf->part_sf.early_term_after_none_split = 1;
456 sf->part_sf.ml_predict_breakout_level = 3;
457
458 sf->intra_sf.prune_chroma_modes_using_luma_winner = 1;
459
460 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
461
462 sf->tpl_sf.prune_starting_mv = 2;
463 sf->tpl_sf.subpel_force_stop = HALF_PEL;
464 sf->tpl_sf.search_method = FAST_BIGDIA;
465
466 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
467 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
468 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
469 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1;
470
471 sf->rd_sf.perform_coeff_opt = 5;
472 sf->rd_sf.tx_domain_dist_thres_level = 3;
473
474 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
475 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
476
477 sf->mv_sf.reduce_search_range = 1;
478
479 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
480 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
481 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_DEFAULT;
482 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
483 }
484
485 if (speed >= 5) {
486 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL3;
487 sf->part_sf.ext_partition_eval_thresh =
488 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
489 sf->part_sf.intra_cnn_based_part_prune_level =
490 allow_screen_content_tools ? 1 : 2;
491
492 sf->intra_sf.chroma_intra_pruning_with_hog = 3;
493
494 sf->lpf_sf.use_coarse_filter_level_search = 0;
495 // Disable Wiener and Self-guided Loop restoration filters.
496 sf->lpf_sf.disable_wiener_filter = true;
497 sf->lpf_sf.disable_sgr_filter = true;
498
499 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2;
500
501 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_FAST;
502 }
503
504 if (speed >= 6) {
505 sf->intra_sf.prune_smooth_intra_mode_for_chroma = 1;
506 sf->intra_sf.prune_filter_intra_level = 2;
507 sf->intra_sf.chroma_intra_pruning_with_hog = 4;
508 sf->intra_sf.intra_pruning_with_hog = 4;
509 sf->intra_sf.cfl_search_range = 1;
510 sf->intra_sf.top_intra_model_count_allowed = 2;
511 sf->intra_sf.adapt_top_model_rd_count_using_neighbors = 1;
512 sf->intra_sf.prune_luma_odd_delta_angles_in_intra = 1;
513
514 sf->part_sf.prune_rectangular_split_based_on_qidx =
515 allow_screen_content_tools ? 0 : 2;
516 sf->part_sf.prune_rect_part_using_4x4_var_deviation = true;
517 sf->part_sf.prune_rect_part_using_none_pred_mode = true;
518 sf->part_sf.prune_sub_8x8_partition_level =
519 allow_screen_content_tools ? 0 : 1;
520 sf->part_sf.prune_part4_search = 3;
521 // TODO(jingning): This might not be a good trade off if the
522 // target image quality is very low.
523 sf->part_sf.default_max_partition_size = BLOCK_32X32;
524
525 sf->mv_sf.use_bsize_dependent_search_method = 1;
526
527 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
528 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
529 sf->tx_sf.prune_intra_tx_depths_using_nn = true;
530
531 sf->rd_sf.perform_coeff_opt = 6;
532 sf->rd_sf.tx_domain_dist_level = 3;
533
534 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
535 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
536
537 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
538 sf->winner_mode_sf.prune_winner_mode_eval_level = 1;
539 sf->winner_mode_sf.dc_blk_pred_level = 1;
540 }
541 // The following should make all-intra mode speed 7 approximately equal
542 // to real-time speed 6,
543 // all-intra speed 8 close to real-time speed 7, and all-intra speed 9
544 // close to real-time speed 8
545 if (speed >= 7) {
546 sf->part_sf.default_min_partition_size = BLOCK_8X8;
547 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
548 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
549 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
550 sf->rt_sf.var_part_split_threshold_shift = 7;
551 }
552
553 if (speed >= 8) {
554 sf->rt_sf.hybrid_intra_pickmode = 1;
555 sf->rt_sf.use_nonrd_pick_mode = 1;
556 sf->rt_sf.nonrd_check_partition_merge_mode = 1;
557 sf->rt_sf.var_part_split_threshold_shift = 8;
558 // Set mask for intra modes.
559 for (int i = 0; i < BLOCK_SIZES; ++i)
560 if (i >= BLOCK_32X32)
561 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
562 else
563 // Use DC, H, V intra mode for block sizes < 32X32.
564 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
565 }
566
567 if (speed >= 9) {
568 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW;
569 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW;
570
571 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
572 sf->rt_sf.hybrid_intra_pickmode = 0;
573 sf->rt_sf.var_part_split_threshold_shift = 9;
574 sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true;
575 sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
576 sf->rt_sf.enable_intra_mode_pruning_using_neighbors = true;
577 sf->rt_sf.prune_intra_mode_using_best_sad_so_far = true;
578 }
579
580 // As the speed feature prune_chroma_modes_using_luma_winner already
581 // constrains the number of chroma directional mode evaluations to a maximum
582 // of 1, the HOG computation and the associated pruning logic does not seem to
583 // help speed-up the chroma mode evaluations. Hence disable the speed feature
584 // chroma_intra_pruning_with_hog when prune_chroma_modes_using_luma_winner is
585 // enabled.
586 if (sf->intra_sf.prune_chroma_modes_using_luma_winner)
587 sf->intra_sf.chroma_intra_pruning_with_hog = 0;
588 }
589
set_good_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)590 static void set_good_speed_feature_framesize_dependent(
591 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
592 const AV1_COMMON *const cm = &cpi->common;
593 const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480;
594 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
595 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
596 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
597 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
598 const bool use_hbd = cpi->oxcf.use_highbitdepth;
599 // Speed features applicable for temporal filtering and tpl modules may be
600 // changed based on frame type at places where the sf is applied (Example :
601 // use_downsampled_sad). This is because temporal filtering and tpl modules
602 // are called before this function (except for the first key frame).
603 // TODO([email protected]): For the speed features applicable to temporal
604 // filtering and tpl modules, modify the sf initialization appropriately
605 // before calling the modules.
606 const int boosted = frame_is_boosted(cpi);
607 const int is_boosted_arf2_bwd_type =
608 boosted ||
609 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
610 const int is_lf_frame =
611 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == LF_UPDATE;
612 const int allow_screen_content_tools =
613 cm->features.allow_screen_content_tools;
614
615 if (is_480p_or_larger) {
616 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
617 if (is_720p_or_larger)
618 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
619 else
620 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
621 } else {
622 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
623 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
624 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
625 }
626
627 if (is_4k_or_larger) {
628 sf->part_sf.default_min_partition_size = BLOCK_8X8;
629 }
630
631 // TODO([email protected]): train models for 720P and above.
632 if (!is_720p_or_larger) {
633 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
634 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
635 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
636 sf->part_sf.ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
637 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
638 sf->part_sf.ml_early_term_after_part_split_level = 1;
639 }
640
641 if (is_720p_or_larger) {
642 // TODO([email protected]): make this speed feature adaptive based on
643 // current block's vertical texture instead of hardcoded with resolution
644 sf->mv_sf.use_downsampled_sad = 2;
645 }
646
647 if (!is_720p_or_larger) {
648 const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
649 const int rate_tolerance =
650 AOMMIN(rc_cfg->under_shoot_pct, rc_cfg->over_shoot_pct);
651 sf->hl_sf.recode_tolerance = 25 + (rate_tolerance >> 2);
652 }
653
654 if (speed >= 1) {
655 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 1;
656
657 if (is_720p_or_larger) {
658 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
659 } else if (is_480p_or_larger) {
660 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
661 } else {
662 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
663 }
664
665 if (!is_720p_or_larger) {
666 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
667 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
668 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
669 sf->part_sf.ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
670 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
671 }
672 sf->part_sf.ml_early_term_after_part_split_level = 2;
673
674 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
675 }
676
677 if (speed >= 2) {
678 if (is_720p_or_larger) {
679 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
680 } else if (is_480p_or_larger) {
681 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
682 } else {
683 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
684 }
685
686 if (is_720p_or_larger) {
687 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
688 sf->part_sf.partition_search_breakout_rate_thr = 120;
689 } else {
690 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
691 sf->part_sf.partition_search_breakout_rate_thr = 100;
692 }
693
694 if (is_720p_or_larger) {
695 sf->inter_sf.prune_obmc_prob_thresh = 16;
696 } else {
697 sf->inter_sf.prune_obmc_prob_thresh = 8;
698 }
699
700 if (is_480p_or_larger) {
701 sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
702 } else {
703 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
704 }
705
706 if (is_480p_or_lesser) sf->inter_sf.skip_ext_comp_nearmv_mode = 1;
707
708 if (is_720p_or_larger) {
709 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 1 : 0;
710 } else {
711 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 2 : 0;
712 }
713
714 if (is_480p_or_larger) {
715 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
716 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
717 } else {
718 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
719 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = boosted ? 0 : 1;
720 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = boosted ? 0 : 1;
721 }
722
723 if (!is_720p_or_larger) {
724 sf->mv_sf.disable_second_mv = 1;
725 sf->mv_sf.auto_mv_step_size = 2;
726 } else {
727 sf->mv_sf.disable_second_mv = boosted ? 0 : 2;
728 sf->mv_sf.auto_mv_step_size = 1;
729 }
730
731 if (!is_720p_or_larger) {
732 sf->hl_sf.recode_tolerance = 50;
733 sf->inter_sf.disable_interinter_wedge_newmv_search =
734 is_boosted_arf2_bwd_type ? 0 : 1;
735 sf->inter_sf.enable_fast_wedge_mask_search = 1;
736 }
737 }
738
739 if (speed >= 3) {
740 sf->inter_sf.enable_fast_wedge_mask_search = 1;
741 sf->inter_sf.skip_newmv_in_drl = 2;
742 sf->inter_sf.skip_ext_comp_nearmv_mode = 1;
743 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 3 : 0;
744 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
745 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
746 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch =
747 frame_is_intra_only(&cpi->common) ? 0 : 1;
748
749 sf->part_sf.ml_early_term_after_part_split_level = 0;
750
751 if (is_720p_or_larger) {
752 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
753 sf->part_sf.partition_search_breakout_rate_thr = 200;
754 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 2 : 0;
755 } else {
756 sf->part_sf.max_intra_bsize = BLOCK_32X32;
757 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
758 sf->part_sf.partition_search_breakout_rate_thr = 120;
759 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 1 : 0;
760 }
761 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
762
763 if (is_480p_or_larger) {
764 sf->part_sf.early_term_after_none_split = 1;
765 } else {
766 sf->part_sf.early_term_after_none_split = 0;
767 }
768 if (is_720p_or_larger) {
769 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 2;
770 } else {
771 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 3;
772 }
773
774 if (is_720p_or_larger) {
775 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
776 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 1;
777 } else {
778 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
779 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2;
780 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL2;
781 }
782
783 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
784 }
785
786 if (speed >= 4) {
787 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
788 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
789 if (is_720p_or_larger) {
790 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
791 } else {
792 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
793 }
794 sf->part_sf.early_term_after_none_split = 1;
795
796 if (is_480p_or_larger) {
797 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
798 } else {
799 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
800 }
801
802 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
803 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
804 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2;
805 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 3;
806
807 if (is_720p_or_larger) {
808 sf->inter_sf.prune_comp_ref_frames = 1;
809 } else if (is_480p_or_larger) {
810 sf->inter_sf.prune_comp_ref_frames = is_boosted_arf2_bwd_type ? 0 : 1;
811 }
812
813 if (is_720p_or_larger)
814 sf->hl_sf.recode_tolerance = 32;
815 else
816 sf->hl_sf.recode_tolerance = 55;
817
818 sf->intra_sf.skip_intra_in_interframe = 4;
819
820 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
821 }
822
823 if (speed >= 5) {
824 if (is_720p_or_larger) {
825 sf->inter_sf.prune_warped_prob_thresh = 16;
826 } else if (is_480p_or_larger) {
827 sf->inter_sf.prune_warped_prob_thresh = 8;
828 }
829 if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 40;
830
831 sf->inter_sf.skip_newmv_in_drl = 4;
832 sf->inter_sf.prune_comp_ref_frames = 1;
833 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
834
835 if (!is_720p_or_larger) {
836 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET;
837 sf->inter_sf.prune_nearest_near_mv_using_refmv_weight =
838 (boosted || allow_screen_content_tools) ? 0 : 1;
839 sf->mv_sf.use_downsampled_sad = 1;
840 }
841
842 if (!is_480p_or_larger) {
843 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
844 }
845
846 if (is_480p_or_lesser) {
847 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL1;
848 } else {
849 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL2;
850 }
851
852 if (is_720p_or_larger)
853 sf->part_sf.ext_part_eval_based_on_cur_best =
854 (allow_screen_content_tools || frame_is_intra_only(cm)) ? 0 : 1;
855
856 if (is_480p_or_larger) {
857 sf->tpl_sf.reduce_num_frames = 1;
858 }
859 }
860
861 if (speed >= 6) {
862 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
863 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3;
864 sf->inter_sf.prune_comp_ref_frames = 2;
865 sf->inter_sf.prune_nearest_near_mv_using_refmv_weight =
866 (boosted || allow_screen_content_tools) ? 0 : 1;
867 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 2;
868
869 if (is_720p_or_larger) {
870 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
871 } else if (is_480p_or_larger) {
872 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
873 }
874
875 if (is_480p_or_larger) {
876 sf->hl_sf.allow_sub_blk_me_in_tf = 1;
877 }
878
879 if (is_1080p_or_larger) {
880 sf->part_sf.default_min_partition_size = BLOCK_8X8;
881 }
882
883 if (is_720p_or_larger) {
884 sf->inter_sf.disable_masked_comp = 1;
885 }
886
887 if (!is_720p_or_larger) {
888 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW;
889 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW;
890 }
891
892 if (is_720p_or_larger) {
893 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
894 sf->part_sf.partition_search_breakout_dist_thr = (1 << 28);
895 } else {
896 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
897 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
898 }
899
900 if (is_720p_or_larger) {
901 sf->inter_sf.prune_ref_mv_idx_search = 2;
902 } else {
903 sf->inter_sf.prune_ref_mv_idx_search = 1;
904 }
905
906 if (!is_720p_or_larger) {
907 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
908 is_boosted_arf2_bwd_type ? 450 : 150;
909 }
910
911 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
912
913 sf->hl_sf.recode_tolerance = 55;
914 }
915 }
916
set_good_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)917 static void set_good_speed_features_framesize_independent(
918 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
919 const AV1_COMMON *const cm = &cpi->common;
920 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
921 const int boosted = frame_is_boosted(cpi);
922 const int is_boosted_arf2_bwd_type =
923 boosted || gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
924 const int is_inter_frame =
925 gf_group->frame_type[cpi->gf_frame_index] == INTER_FRAME;
926 const int allow_screen_content_tools =
927 cm->features.allow_screen_content_tools;
928 const int use_hbd = cpi->oxcf.use_highbitdepth;
929 if (!cpi->oxcf.tile_cfg.enable_large_scale_tile) {
930 sf->hl_sf.high_precision_mv_usage = LAST_MV_DATA;
931 }
932
933 // Speed 0 for all speed features that give neutral coding performance change.
934 sf->gm_sf.gm_search_type = boosted ? GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2
935 : GM_SEARCH_CLOSEST_REFS_ONLY;
936 sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
937 sf->gm_sf.disable_gm_search_based_on_stats = 1;
938
939 sf->part_sf.less_rectangular_check_level = 1;
940 sf->part_sf.ml_prune_partition = 1;
941 sf->part_sf.prune_ext_partition_types_search_level = 1;
942 sf->part_sf.prune_part4_search = 2;
943 sf->part_sf.simple_motion_search_prune_rect = 1;
944 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
945 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
946 sf->part_sf.use_best_rd_for_pruning = 1;
947 sf->part_sf.simple_motion_search_prune_agg =
948 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL0;
949
950 // TODO(debargha): Test, tweak and turn on either 1 or 2
951 sf->inter_sf.inter_mode_rd_model_estimation = 1;
952 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
953 sf->inter_sf.prune_compound_using_single_ref = 1;
954 sf->inter_sf.prune_mode_search_simple_translation = 1;
955 sf->inter_sf.prune_ref_frame_for_rect_partitions =
956 (boosted || (allow_screen_content_tools))
957 ? 0
958 : (is_boosted_arf2_bwd_type ? 1 : 2);
959 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 2;
960 sf->inter_sf.selective_ref_frame = 1;
961 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
962
963 sf->interp_sf.use_fast_interpolation_filter_search = 1;
964
965 sf->intra_sf.intra_pruning_with_hog = 1;
966
967 sf->tx_sf.adaptive_txb_search_level = 1;
968 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
969 sf->tx_sf.model_based_prune_tx_search_level = 1;
970 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
971
972 sf->tpl_sf.search_method = NSTEP_8PT;
973
974 sf->rt_sf.use_nonrd_pick_mode = 0;
975 sf->rt_sf.use_real_time_ref_set = 0;
976
977 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION ||
978 cpi->use_screen_content_tools) {
979 sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
980 } else {
981 sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
982 }
983
984 sf->rd_sf.perform_coeff_opt = 1;
985 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
986
987 if (speed >= 1) {
988 sf->hl_sf.adjust_num_frames_for_arf_filtering =
989 allow_screen_content_tools ? 0 : 1;
990
991 sf->part_sf.intra_cnn_based_part_prune_level =
992 allow_screen_content_tools ? 0 : 2;
993 sf->part_sf.simple_motion_search_early_term_none = 1;
994 // TODO(Venkat): Clean-up frame type dependency for
995 // simple_motion_search_split in partition search function and set the
996 // speed feature accordingly
997 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
998 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
999
1000 sf->mv_sf.exhaustive_searches_thresh <<= 1;
1001 sf->mv_sf.obmc_full_pixel_search_level = 1;
1002 sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
1003 sf->mv_sf.disable_extensive_joint_motion_search = 1;
1004
1005 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1;
1006 sf->inter_sf.prune_comp_type_by_comp_avg = 1;
1007 sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1;
1008 sf->inter_sf.prune_ref_frame_for_rect_partitions =
1009 (frame_is_intra_only(&cpi->common) || (allow_screen_content_tools))
1010 ? 0
1011 : (boosted ? 1 : 2);
1012 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
1013 sf->inter_sf.reuse_inter_intra_mode = 1;
1014 sf->inter_sf.selective_ref_frame = 2;
1015 sf->inter_sf.skip_arf_compound = 1;
1016
1017 sf->interp_sf.use_interp_filter = 1;
1018
1019 sf->intra_sf.prune_palette_search_level = 1;
1020
1021 sf->tx_sf.adaptive_txb_search_level = 2;
1022 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
1023 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
1024 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
1025 sf->tx_sf.model_based_prune_tx_search_level = 0;
1026 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
1027 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
1028 sf->tx_sf.tx_type_search.skip_tx_search = 1;
1029
1030 sf->rd_sf.perform_coeff_opt = boosted ? 2 : 3;
1031 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
1032 sf->rd_sf.tx_domain_dist_thres_level = 1;
1033
1034 sf->lpf_sf.dual_sgr_penalty_level = 1;
1035 sf->lpf_sf.enable_sgr_ep_pruning = 1;
1036
1037 // TODO(any, yunqing): move this feature to speed 0.
1038 sf->tpl_sf.skip_alike_starting_mv = 1;
1039 }
1040
1041 if (speed >= 2) {
1042 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
1043
1044 sf->fp_sf.skip_motion_search_threshold = 25;
1045
1046 sf->gm_sf.num_refinement_steps = 2;
1047
1048 sf->part_sf.reuse_best_prediction_for_part_ab =
1049 !frame_is_intra_only(&cpi->common);
1050
1051 sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL;
1052 sf->mv_sf.subpel_iters_per_step = 1;
1053 sf->mv_sf.reduce_search_range = 1;
1054
1055 // TODO([email protected]): We can get 10% speed up if we move
1056 // adaptive_rd_thresh to speed 1. But currently it performs poorly on some
1057 // clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a
1058 // bit more closely to figure out why.
1059 sf->inter_sf.adaptive_rd_thresh = 1;
1060 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
1061 sf->inter_sf.fast_interintra_wedge_search = 1;
1062 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
1063 sf->inter_sf.prune_ext_comp_using_neighbors = 1;
1064 sf->inter_sf.prune_comp_using_best_single_mode_ref = 2;
1065 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
1066 sf->inter_sf.selective_ref_frame = 3;
1067 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
1068 sf->inter_sf.enable_fast_compound_mode_search = 1;
1069 sf->inter_sf.reuse_mask_search_results = 1;
1070 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 1);
1071 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1;
1072 sf->inter_sf.alt_ref_search_fp = 1;
1073
1074 sf->interp_sf.adaptive_interp_filter_search = 1;
1075 sf->interp_sf.disable_dual_filter = 1;
1076
1077 sf->intra_sf.disable_smooth_intra =
1078 !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key > 1);
1079 sf->intra_sf.intra_pruning_with_hog = 2;
1080 sf->intra_sf.skip_intra_in_interframe = is_inter_frame ? 2 : 1;
1081 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1082
1083 sf->tpl_sf.prune_starting_mv = 1;
1084 sf->tpl_sf.search_method = DIAMOND;
1085
1086 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 3 : 4;
1087 sf->rd_sf.use_mb_rd_hash = 1;
1088
1089 sf->lpf_sf.prune_wiener_based_on_src_var = 1;
1090 sf->lpf_sf.prune_sgr_based_on_wiener = 1;
1091 sf->lpf_sf.disable_loop_restoration_chroma = boosted ? 0 : 1;
1092 sf->lpf_sf.reduce_wiener_window_size = boosted ? 0 : 1;
1093
1094 // TODO(any): Re-evaluate this feature set to 1 in speed 2.
1095 sf->tpl_sf.allow_compound_pred = 0;
1096 sf->tpl_sf.prune_ref_frames_in_tpl = 1;
1097 }
1098
1099 if (speed >= 3) {
1100 sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
1101
1102 sf->gm_sf.prune_ref_frame_for_gm_search = 1;
1103 sf->gm_sf.prune_zero_mv_with_sse = 1;
1104 sf->gm_sf.num_refinement_steps = 0;
1105
1106 sf->part_sf.less_rectangular_check_level = 2;
1107 sf->part_sf.simple_motion_search_prune_agg =
1108 allow_screen_content_tools
1109 ? SIMPLE_AGG_LVL0
1110 : (boosted ? SIMPLE_AGG_LVL1 : QIDX_BASED_AGG_LVL1);
1111 sf->part_sf.prune_ext_part_using_split_info = 1;
1112 sf->part_sf.simple_motion_search_rect_split = 1;
1113
1114 sf->mv_sf.full_pixel_search_level = 1;
1115 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1116 sf->mv_sf.search_method = DIAMOND;
1117 sf->mv_sf.disable_second_mv = 2;
1118 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_1;
1119 sf->mv_sf.use_intrabc = 0;
1120
1121 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
1122 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
1123 sf->inter_sf.disable_onesided_comp = 1;
1124 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
1125 // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
1126 // and clean-up the speed feature
1127 sf->inter_sf.perform_best_rd_based_gating_for_chroma = 1;
1128 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 1;
1129 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2;
1130 sf->inter_sf.selective_ref_frame = 5;
1131 sf->inter_sf.reuse_compound_type_decision = 1;
1132 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level,
1133 boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2));
1134 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 2;
1135
1136 sf->interp_sf.adaptive_interp_filter_search = 2;
1137
1138 // TODO([email protected]): the thresholds chosen for intra hog are
1139 // inherited directly from luma hog with some minor tweaking. Eventually we
1140 // should run this with a bayesian optimizer to find the Pareto frontier.
1141 sf->intra_sf.chroma_intra_pruning_with_hog = 2;
1142 sf->intra_sf.intra_pruning_with_hog = 3;
1143 sf->intra_sf.prune_palette_search_level = 2;
1144 sf->intra_sf.top_intra_model_count_allowed = 2;
1145
1146 sf->tpl_sf.prune_starting_mv = 2;
1147 sf->tpl_sf.skip_alike_starting_mv = 2;
1148 sf->tpl_sf.prune_intra_modes = 1;
1149 sf->tpl_sf.reduce_first_step_size = 6;
1150 sf->tpl_sf.subpel_force_stop = QUARTER_PEL;
1151 sf->tpl_sf.gop_length_decision_method = 1;
1152
1153 sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
1154 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
1155 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
1156
1157 // TODO(any): Refactor the code related to following winner mode speed
1158 // features
1159 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
1160 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
1161 sf->winner_mode_sf.motion_mode_for_winner_cand =
1162 boosted ? 0
1163 : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE ? 1
1164 : 2;
1165 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 4;
1166
1167 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
1168 // loss.
1169 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
1170 sf->lpf_sf.prune_wiener_based_on_src_var = 2;
1171 sf->lpf_sf.use_coarse_filter_level_search =
1172 frame_is_intra_only(&cpi->common) ? 0 : 1;
1173 sf->lpf_sf.use_downsampled_wiener_stats = 1;
1174 }
1175
1176 if (speed >= 4) {
1177 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1178
1179 sf->gm_sf.prune_zero_mv_with_sse = 2;
1180 sf->gm_sf.downsample_level = 1;
1181
1182 sf->part_sf.simple_motion_search_prune_agg =
1183 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL2;
1184 sf->part_sf.simple_motion_search_reduce_search_steps = 4;
1185 sf->part_sf.prune_ext_part_using_split_info = 2;
1186 sf->part_sf.ml_predict_breakout_level = 3;
1187 sf->part_sf.prune_rectangular_split_based_on_qidx =
1188 (allow_screen_content_tools || frame_is_intra_only(&cpi->common)) ? 0
1189 : 1;
1190
1191 sf->inter_sf.alt_ref_search_fp = 2;
1192 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_DEFAULT] = boosted ? 0 : 3;
1193 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_MOTION_MODE] = boosted ? 0 : 5;
1194 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_COMP_TYPE_MODE] = boosted ? 0 : 3;
1195
1196 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2;
1197 sf->inter_sf.prune_ext_comp_using_neighbors = 2;
1198 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
1199 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
1200
1201 sf->interp_sf.cb_pred_filter_search = 1;
1202 sf->interp_sf.skip_sharp_interp_filter_search = 1;
1203 sf->interp_sf.use_interp_filter = 2;
1204
1205 sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
1206 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
1207 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
1208 // TODO(any): "intra_y_mode_mask" doesn't help much at speed 4.
1209 // sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
1210 // sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
1211 // sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
1212 sf->intra_sf.skip_intra_in_interframe = 4;
1213
1214 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
1215 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2;
1216
1217 sf->tpl_sf.subpel_force_stop = HALF_PEL;
1218 sf->tpl_sf.search_method = FAST_BIGDIA;
1219 sf->tpl_sf.use_sad_for_mode_decision = 1;
1220
1221 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
1222
1223 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 5 : 7;
1224
1225 // TODO(any): Extend multi-winner mode processing support for inter frames
1226 sf->winner_mode_sf.multi_winner_mode_type =
1227 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_DEFAULT
1228 : MULTI_WINNER_MODE_OFF;
1229 sf->winner_mode_sf.dc_blk_pred_level = boosted ? 0 : 2;
1230
1231 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
1232 }
1233
1234 if (speed >= 5) {
1235 sf->hl_sf.weight_calc_level_in_tf = 1;
1236 sf->hl_sf.adjust_num_frames_for_arf_filtering =
1237 allow_screen_content_tools ? 0 : 2;
1238
1239 sf->fp_sf.reduce_mv_step_param = 4;
1240
1241 sf->part_sf.simple_motion_search_prune_agg =
1242 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL3;
1243 sf->part_sf.ext_partition_eval_thresh =
1244 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
1245 sf->part_sf.prune_sub_8x8_partition_level =
1246 allow_screen_content_tools ? 1 : 2;
1247
1248 sf->mv_sf.warp_search_method = WARP_SEARCH_DIAMOND;
1249
1250 sf->inter_sf.prune_inter_modes_if_skippable = 1;
1251 sf->inter_sf.prune_single_ref = is_boosted_arf2_bwd_type ? 0 : 1;
1252 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_DEFAULT] = boosted ? 0 : 4;
1253 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_COMP_TYPE_MODE] = boosted ? 0 : 5;
1254 sf->inter_sf.enable_fast_compound_mode_search = 2;
1255
1256 sf->interp_sf.skip_interp_filter_search = boosted ? 0 : 1;
1257
1258 sf->intra_sf.chroma_intra_pruning_with_hog = 3;
1259
1260 // TODO(any): Extend multi-winner mode processing support for inter frames
1261 sf->winner_mode_sf.multi_winner_mode_type =
1262 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_FAST
1263 : MULTI_WINNER_MODE_OFF;
1264
1265 // Disable Self-guided Loop restoration filter.
1266 sf->lpf_sf.disable_sgr_filter = true;
1267 sf->lpf_sf.disable_wiener_coeff_refine_search = true;
1268
1269 sf->tpl_sf.prune_starting_mv = 3;
1270 sf->tpl_sf.use_y_only_rate_distortion = 1;
1271 sf->tpl_sf.subpel_force_stop = FULL_PEL;
1272 sf->tpl_sf.gop_length_decision_method = 2;
1273 sf->tpl_sf.use_sad_for_mode_decision = 2;
1274
1275 sf->winner_mode_sf.dc_blk_pred_level = 2;
1276
1277 sf->fp_sf.disable_recon = 1;
1278 }
1279
1280 if (speed >= 6) {
1281 sf->hl_sf.disable_extra_sc_testing = 1;
1282 sf->hl_sf.second_alt_ref_filtering = 0;
1283
1284 sf->gm_sf.downsample_level = 2;
1285
1286 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3;
1287 sf->inter_sf.selective_ref_frame = 6;
1288 sf->inter_sf.prune_single_ref = is_boosted_arf2_bwd_type ? 0 : 2;
1289 sf->inter_sf.prune_ext_comp_using_neighbors = 3;
1290
1291 sf->intra_sf.chroma_intra_pruning_with_hog = 4;
1292 sf->intra_sf.intra_pruning_with_hog = 4;
1293 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC;
1294 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC;
1295 sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC;
1296 sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC;
1297 sf->intra_sf.early_term_chroma_palette_size_search = 1;
1298
1299 sf->part_sf.prune_rectangular_split_based_on_qidx =
1300 boosted || allow_screen_content_tools ? 0 : 2;
1301
1302 sf->part_sf.prune_part4_search = 3;
1303
1304 sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
1305 sf->mv_sf.use_bsize_dependent_search_method = 1;
1306
1307 sf->tpl_sf.gop_length_decision_method = 3;
1308
1309 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8;
1310
1311 sf->winner_mode_sf.dc_blk_pred_level = 3;
1312 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
1313
1314 sf->fp_sf.skip_zeromv_motion_search = 1;
1315 }
1316 }
1317
set_rt_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)1318 static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
1319 SPEED_FEATURES *const sf,
1320 int speed) {
1321 const AV1_COMMON *const cm = &cpi->common;
1322 const int boosted = frame_is_boosted(cpi);
1323 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
1324 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
1325 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
1326 const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
1327
1328 if (!is_360p_or_larger) {
1329 sf->rt_sf.prune_intra_mode_based_on_mv_range = 1;
1330 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
1331 if (speed >= 6)
1332 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
1333 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 2;
1334 if (speed >= 7) {
1335 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1336 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1337 sf->rt_sf.use_rtc_tf = 2;
1338 }
1339 if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 1;
1340 if (speed >= 8) {
1341 sf->rt_sf.use_nonrd_filter_search = 1;
1342 sf->rt_sf.tx_size_level_based_on_qstep = 1;
1343 }
1344 if (speed >= 9) {
1345 sf->rt_sf.use_comp_ref_nonrd = 0;
1346 sf->rt_sf.nonrd_aggressive_skip = 1;
1347 sf->rt_sf.skip_intra_pred = 1;
1348 // Only turn on enable_ref_short_signaling for low resolution when only
1349 // LAST and GOLDEN ref frames are used.
1350 sf->rt_sf.enable_ref_short_signaling =
1351 (!sf->rt_sf.use_nonrd_altref_frame &&
1352 (!sf->rt_sf.use_comp_ref_nonrd ||
1353 (!sf->rt_sf.ref_frame_comp_nonrd[1] &&
1354 !sf->rt_sf.ref_frame_comp_nonrd[2])));
1355
1356 // TODO(kyslov) Re-enable when AV1 models are trained
1357 #if 0
1358 #if CONFIG_RT_ML_PARTITIONING
1359 if (!frame_is_intra_only(cm)) {
1360 sf->part_sf.partition_search_type = ML_BASED_PARTITION;
1361 sf->rt_sf.reuse_inter_pred_nonrd = 0;
1362 }
1363 #endif
1364 #endif
1365 sf->rt_sf.use_adaptive_subpel_search = false;
1366 }
1367 if (speed >= 10) {
1368 // TODO([email protected]): To be conservative, disable
1369 // sf->rt_sf.estimate_motion_for_var_based_partition = 3 for speed 10/qvga
1370 // for now. May enable it in the future.
1371 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1372 sf->rt_sf.skip_intra_pred = 2;
1373 sf->rt_sf.hybrid_intra_pickmode = 3;
1374 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
1375 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1376 sf->rt_sf.use_nonrd_filter_search = 0;
1377 }
1378 } else {
1379 sf->rt_sf.prune_intra_mode_based_on_mv_range = 2;
1380 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1381 if (speed <= 5) {
1382 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
1383 boosted ? INT_MAX : 350;
1384 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
1385 }
1386 if (speed == 6) sf->part_sf.disable_8x8_part_based_on_qidx = 1;
1387 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 2;
1388 if (speed == 7) {
1389 sf->rt_sf.prefer_large_partition_blocks = 1;
1390 // Enable this feature for [360p, 720p] resolution range initially.
1391 // Only enable for low bitdepth to mitigate issue: b/303023614.
1392 if (!cpi->rc.rtc_external_ratectrl &&
1393 AOMMIN(cm->width, cm->height) <= 720 && !cpi->oxcf.use_highbitdepth)
1394 sf->hl_sf.accurate_bit_estimate = cpi->oxcf.q_cfg.aq_mode == NO_AQ;
1395 }
1396 if (speed >= 7) {
1397 sf->rt_sf.use_rtc_tf = 1;
1398 }
1399 if (speed == 8 && !cpi->ppi->use_svc) {
1400 sf->rt_sf.short_circuit_low_temp_var = 0;
1401 sf->rt_sf.use_nonrd_altref_frame = 1;
1402 }
1403 if (speed >= 8) sf->rt_sf.tx_size_level_based_on_qstep = 2;
1404 if (speed >= 9) {
1405 sf->rt_sf.gf_length_lvl = 1;
1406 sf->rt_sf.skip_cdef_sb = 1;
1407 sf->rt_sf.sad_based_adp_altref_lag = 2;
1408 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1409 sf->rt_sf.use_adaptive_subpel_search = true;
1410 sf->interp_sf.cb_pred_filter_search = 1;
1411 }
1412 if (speed >= 10) {
1413 sf->rt_sf.hybrid_intra_pickmode = 2;
1414 sf->rt_sf.sad_based_adp_altref_lag = 4;
1415 sf->rt_sf.tx_size_level_based_on_qstep = 0;
1416 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1417 sf->rt_sf.use_adaptive_subpel_search = false;
1418 sf->interp_sf.cb_pred_filter_search = 2;
1419 }
1420 }
1421 if (!is_480p_or_larger) {
1422 if (speed == 7) {
1423 sf->rt_sf.nonrd_check_partition_merge_mode = 2;
1424 }
1425 }
1426 if (!is_720p_or_larger) {
1427 if (speed >= 9) {
1428 sf->rt_sf.force_large_partition_blocks_intra = 1;
1429 }
1430 } else {
1431 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3;
1432 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 0;
1433 if (speed >= 7) {
1434 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2;
1435 sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
1436 }
1437 if (speed >= 9) {
1438 sf->rt_sf.sad_based_adp_altref_lag = 1;
1439 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 0;
1440 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1441 }
1442 if (speed >= 10) {
1443 sf->rt_sf.sad_based_adp_altref_lag = 3;
1444 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1445 }
1446 }
1447 // TODO(Any): Check/Tune settings of other sfs for 1080p.
1448 if (is_1080p_or_larger) {
1449 if (speed >= 7) {
1450 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1451 sf->rt_sf.use_adaptive_subpel_search = 0;
1452 }
1453 if (speed >= 9) sf->interp_sf.cb_pred_filter_search = 0;
1454 } else {
1455 if (speed >= 9) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1456 if (speed >= 10) sf->rt_sf.nonrd_aggressive_skip = 1;
1457 }
1458 // TODO(marpan): Tune settings for speed 11 video mode,
1459 // for resolutions below 720p.
1460 if (speed >= 11 && !is_720p_or_larger &&
1461 cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
1462 sf->rt_sf.skip_cdef_sb = 1;
1463 sf->rt_sf.force_only_last_ref = 1;
1464 sf->rt_sf.selective_cdf_update = 1;
1465 sf->rt_sf.use_nonrd_filter_search = 0;
1466 if (is_360p_or_larger) {
1467 sf->part_sf.fixed_partition_size = BLOCK_32X32;
1468 sf->rt_sf.use_fast_fixed_part = 1;
1469 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2;
1470 }
1471 sf->rt_sf.increase_source_sad_thresh = 1;
1472 sf->rt_sf.part_early_exit_zeromv = 2;
1473 sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2;
1474 for (int i = 0; i < BLOCK_SIZES; ++i) {
1475 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1476 }
1477 sf->rt_sf.hybrid_intra_pickmode = 0;
1478 }
1479 // Setting for SVC, or when the ref_frame_config control is
1480 // used to set the reference structure.
1481 if (cpi->ppi->use_svc || cpi->ppi->rtc_ref.set_ref_frame_config) {
1482 const RTC_REF *const rtc_ref = &cpi->ppi->rtc_ref;
1483 // For SVC: for greater than 2 temporal layers, use better mv search on
1484 // base temporal layers, and only on base spatial layer if highest
1485 // resolution is above 640x360.
1486 if (cpi->svc.number_temporal_layers >= 2 &&
1487 cpi->svc.temporal_layer_id == 0 &&
1488 (cpi->svc.spatial_layer_id == 0 ||
1489 cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
1490 640 * 360)) {
1491 sf->mv_sf.search_method = NSTEP;
1492 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1493 sf->rt_sf.fullpel_search_step_param = 10;
1494 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1495 if (cm->width * cm->height <= 352 * 288)
1496 sf->rt_sf.nonrd_prune_ref_frame_search = 2;
1497 sf->rt_sf.force_large_partition_blocks_intra = 0;
1498 }
1499 if (speed >= 8) {
1500 if (cpi->svc.number_temporal_layers > 2)
1501 sf->rt_sf.disable_cdf_update_non_reference_frame = true;
1502 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1503 if (rtc_ref->non_reference_frame) {
1504 sf->rt_sf.nonrd_aggressive_skip = 1;
1505 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1506 }
1507 }
1508 if (speed <= 9 && cpi->svc.number_temporal_layers > 2 &&
1509 cpi->svc.temporal_layer_id == 0)
1510 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = false;
1511 else
1512 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1513 sf->rt_sf.frame_level_mode_cost_update = false;
1514
1515 // Compound mode enabling.
1516 if (rtc_ref->ref_frame_comp[0] || rtc_ref->ref_frame_comp[1] ||
1517 rtc_ref->ref_frame_comp[2]) {
1518 sf->rt_sf.use_comp_ref_nonrd = 1;
1519 sf->rt_sf.ref_frame_comp_nonrd[0] =
1520 rtc_ref->ref_frame_comp[0] && rtc_ref->reference[GOLDEN_FRAME - 1];
1521 sf->rt_sf.ref_frame_comp_nonrd[1] =
1522 rtc_ref->ref_frame_comp[1] && rtc_ref->reference[LAST2_FRAME - 1];
1523 sf->rt_sf.ref_frame_comp_nonrd[2] =
1524 rtc_ref->ref_frame_comp[2] && rtc_ref->reference[ALTREF_FRAME - 1];
1525 } else {
1526 sf->rt_sf.use_comp_ref_nonrd = 0;
1527 }
1528
1529 if (cpi->svc.number_spatial_layers > 1 ||
1530 cpi->svc.number_temporal_layers > 1)
1531 sf->hl_sf.accurate_bit_estimate = 0;
1532
1533 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1534
1535 // For single layers RPS: bias/adjustment for recovery frame.
1536 if (cpi->ppi->rtc_ref.bias_recovery_frame) {
1537 sf->mv_sf.search_method = NSTEP;
1538 sf->mv_sf.subpel_search_method = SUBPEL_TREE;
1539 sf->rt_sf.fullpel_search_step_param = 8;
1540 sf->rt_sf.nonrd_aggressive_skip = 0;
1541 }
1542 }
1543 // Screen settings.
1544 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
1545 // TODO(marpan): Check settings for speed 7 and 8.
1546 if (speed >= 7) {
1547 sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
1548 sf->mv_sf.use_bsize_dependent_search_method = 0;
1549 sf->rt_sf.skip_cdef_sb = 1;
1550 sf->rt_sf.increase_color_thresh_palette = 1;
1551 if (!frame_is_intra_only(cm)) sf->rt_sf.dct_only_palette_nonrd = 1;
1552 }
1553 if (speed >= 8) {
1554 sf->rt_sf.nonrd_check_partition_merge_mode = 3;
1555 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1556 sf->rt_sf.use_nonrd_filter_search = 0;
1557 sf->rt_sf.prune_hv_pred_modes_using_src_sad = false;
1558 }
1559 if (speed >= 9) {
1560 sf->rt_sf.prune_idtx_nonrd = 1;
1561 sf->rt_sf.part_early_exit_zeromv = 2;
1562 sf->rt_sf.skip_lf_screen = 1;
1563 sf->rt_sf.nonrd_prune_ref_frame_search = 3;
1564 sf->rt_sf.var_part_split_threshold_shift = 10;
1565 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1566 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1567 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
1568 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1569 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1570 sf->interp_sf.cb_pred_filter_search = 0;
1571 }
1572 if (speed >= 10) {
1573 if (cm->width * cm->height > 1920 * 1080)
1574 sf->part_sf.disable_8x8_part_based_on_qidx = 1;
1575 sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
1576 sf->rt_sf.part_early_exit_zeromv = 1;
1577 sf->rt_sf.nonrd_aggressive_skip = 1;
1578 sf->rt_sf.thresh_active_maps_skip_lf_cdef = 90;
1579 sf->rt_sf.hybrid_intra_pickmode = 0;
1580 sf->rt_sf.dct_only_palette_nonrd = 1;
1581 sf->rt_sf.prune_palette_search_nonrd = 1;
1582 sf->rt_sf.prune_intra_mode_using_best_sad_so_far = true;
1583 sf->rt_sf.rc_faster_convergence_static = 1;
1584 sf->rt_sf.rc_compute_spatial_var_sc = 1;
1585 }
1586 if (speed >= 11) {
1587 sf->rt_sf.skip_lf_screen = 2;
1588 sf->rt_sf.skip_cdef_sb = 2;
1589 sf->rt_sf.prune_palette_search_nonrd = 2;
1590 sf->rt_sf.increase_color_thresh_palette = 0;
1591 sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
1592 sf->rt_sf.enable_intra_mode_pruning_using_neighbors = true;
1593 }
1594 sf->rt_sf.skip_encoding_non_reference_slide_change =
1595 cpi->oxcf.rc_cfg.drop_frames_water_mark > 0 ? 1 : 0;
1596 sf->rt_sf.skip_newmv_flat_blocks_screen = 1;
1597 sf->rt_sf.use_idtx_nonrd = 1;
1598 sf->rt_sf.higher_thresh_scene_detection = 0;
1599 sf->rt_sf.use_nonrd_altref_frame = 0;
1600 sf->rt_sf.use_rtc_tf = 0;
1601 sf->rt_sf.use_comp_ref_nonrd = 0;
1602 sf->rt_sf.source_metrics_sb_nonrd = 1;
1603 if (cpi->rc.high_source_sad == 1) {
1604 sf->rt_sf.prefer_large_partition_blocks = 0;
1605 sf->part_sf.max_intra_bsize = BLOCK_128X128;
1606 for (int i = 0; i < BLOCK_SIZES; ++i) {
1607 if (i > BLOCK_32X32)
1608 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1609 else
1610 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1611 }
1612 }
1613 if (speed >= 11 && cpi->rc.high_motion_content_screen_rtc) {
1614 sf->rt_sf.higher_thresh_scene_detection = 1;
1615 sf->rt_sf.force_only_last_ref = 1;
1616 sf->rt_sf.use_nonrd_filter_search = 0;
1617 sf->part_sf.fixed_partition_size = BLOCK_32X32;
1618 sf->rt_sf.use_fast_fixed_part = 1;
1619 sf->rt_sf.increase_source_sad_thresh = 1;
1620 sf->rt_sf.selective_cdf_update = 1;
1621 sf->mv_sf.search_method = FAST_DIAMOND;
1622 } else if (cpi->rc.max_block_source_sad > 20000 &&
1623 cpi->rc.frame_source_sad > 100 && speed >= 6 &&
1624 (cpi->rc.percent_blocks_with_motion > 1 ||
1625 cpi->svc.last_layer_dropped[0])) {
1626 sf->mv_sf.search_method = NSTEP;
1627 sf->rt_sf.fullpel_search_step_param = 2;
1628 }
1629 if (cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
1630 sf->rt_sf.use_idtx_nonrd = 0;
1631 sf->rt_sf.prefer_large_partition_blocks = 1;
1632 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1633 sf->rt_sf.fullpel_search_step_param = 10;
1634 }
1635 sf->rt_sf.partition_direct_merging = 0;
1636 sf->hl_sf.accurate_bit_estimate = 0;
1637 // This feature is for nonrd_pickmode.
1638 if (sf->rt_sf.use_nonrd_pick_mode)
1639 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1640 else
1641 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1642 }
1643 if (is_lossless_requested(&cpi->oxcf.rc_cfg)) {
1644 sf->rt_sf.use_rtc_tf = 0;
1645 // TODO(aomedia:3412): The setting accurate_bit_estimate = 0
1646 // can be removed once it's fixed for lossless mode.
1647 sf->hl_sf.accurate_bit_estimate = 0;
1648 }
1649 if (cpi->oxcf.use_highbitdepth) {
1650 // Disable for use_highbitdepth = 1 to mitigate issue: b/303023614.
1651 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1652 }
1653 if (cpi->oxcf.superres_cfg.enable_superres) {
1654 sf->rt_sf.use_rtc_tf = 0;
1655 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1656 }
1657 // rtc_tf feature allocates new source because of possible
1658 // temporal filtering which may change the input source during encoding:
1659 // this causes an issue on resized frames when psnr is calculated,
1660 // so disable it here for frames that are resized (encoding width/height
1661 // different from configured width/height).
1662 if (is_psnr_calc_enabled(cpi) && (cpi->oxcf.frm_dim_cfg.width != cm->width ||
1663 cpi->oxcf.frm_dim_cfg.height != cm->height))
1664 sf->rt_sf.use_rtc_tf = 0;
1665 }
1666
1667 // TODO(kyslov): now this is very similar to
1668 // set_good_speed_features_framesize_independent
1669 // except it sets non-rd flag on speed 8. This function will likely
1670 // be modified in the future with RT-specific speed features.
set_rt_speed_features_framesize_independent(AV1_COMP * cpi,SPEED_FEATURES * sf,int speed)1671 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
1672 SPEED_FEATURES *sf,
1673 int speed) {
1674 AV1_COMMON *const cm = &cpi->common;
1675 const int boosted = frame_is_boosted(cpi);
1676
1677 // Currently, rt speed 0, 1, 2, 3, 4, 5 are the same.
1678 // Following set of speed features are not impacting encoder's decisions as
1679 // the relevant tools are disabled by default.
1680 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
1681 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
1682 sf->inter_sf.reuse_inter_intra_mode = 1;
1683 sf->inter_sf.prune_compound_using_single_ref = 0;
1684 sf->inter_sf.prune_comp_search_by_single_result = 2;
1685 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
1686 sf->inter_sf.fast_wedge_sign_estimate = 1;
1687 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
1688 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
1689 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
1690 sf->interp_sf.cb_pred_filter_search = 0;
1691 sf->interp_sf.skip_interp_filter_search = 1;
1692 sf->part_sf.ml_prune_partition = 1;
1693 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
1694 sf->part_sf.prune_ext_partition_types_search_level = 2;
1695 sf->part_sf.less_rectangular_check_level = 2;
1696 sf->mv_sf.obmc_full_pixel_search_level = 1;
1697 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
1698 sf->tx_sf.model_based_prune_tx_search_level = 0;
1699 sf->lpf_sf.dual_sgr_penalty_level = 1;
1700 // Disable Wiener and Self-guided Loop restoration filters.
1701 sf->lpf_sf.disable_wiener_filter = true;
1702 sf->lpf_sf.disable_sgr_filter = true;
1703 sf->intra_sf.prune_palette_search_level = 2;
1704 sf->intra_sf.prune_luma_palette_size_search_level = 2;
1705 sf->intra_sf.early_term_chroma_palette_size_search = 1;
1706
1707 // End of set
1708
1709 // TODO(any, yunqing): tune these features for real-time use cases.
1710 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_SOLO;
1711 sf->hl_sf.frame_parameter_update = 0;
1712
1713 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
1714 // TODO(any): As per the experiments, this speed feature is doing redundant
1715 // computation since the model rd based pruning logic is similar to model rd
1716 // based gating when inter_mode_rd_model_estimation = 2. Enable this SF if
1717 // either of the condition becomes true.
1718 // (1) inter_mode_rd_model_estimation != 2
1719 // (2) skip_interp_filter_search == 0
1720 // (3) Motion mode or compound mode is enabled */
1721 sf->inter_sf.prune_mode_search_simple_translation = 0;
1722 sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted;
1723 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
1724 sf->inter_sf.selective_ref_frame = 4;
1725 sf->inter_sf.alt_ref_search_fp = 2;
1726 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 4);
1727 sf->inter_sf.limit_txfm_eval_per_mode = 3;
1728
1729 sf->inter_sf.adaptive_rd_thresh = 4;
1730 sf->inter_sf.inter_mode_rd_model_estimation = 2;
1731 sf->inter_sf.prune_inter_modes_if_skippable = 1;
1732 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3;
1733 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
1734 sf->inter_sf.skip_newmv_in_drl = 4;
1735
1736 sf->interp_sf.use_fast_interpolation_filter_search = 1;
1737 sf->interp_sf.use_interp_filter = 1;
1738 sf->interp_sf.adaptive_interp_filter_search = 1;
1739 sf->interp_sf.disable_dual_filter = 1;
1740
1741 sf->part_sf.default_max_partition_size = BLOCK_128X128;
1742 sf->part_sf.default_min_partition_size = BLOCK_8X8;
1743 sf->part_sf.use_best_rd_for_pruning = 1;
1744 sf->part_sf.early_term_after_none_split = 1;
1745 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
1746 sf->part_sf.max_intra_bsize = BLOCK_16X16;
1747 sf->part_sf.partition_search_breakout_rate_thr = 500;
1748 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
1749 sf->part_sf.adjust_var_based_rd_partitioning = 2;
1750
1751 sf->mv_sf.full_pixel_search_level = 1;
1752 sf->mv_sf.exhaustive_searches_thresh = INT_MAX;
1753 sf->mv_sf.auto_mv_step_size = 1;
1754 sf->mv_sf.subpel_iters_per_step = 1;
1755 sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS;
1756 sf->mv_sf.search_method = FAST_DIAMOND;
1757 sf->mv_sf.subpel_force_stop = EIGHTH_PEL;
1758 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1759
1760 for (int i = 0; i < TX_SIZES; ++i) {
1761 sf->intra_sf.intra_y_mode_mask[i] = INTRA_DC;
1762 sf->intra_sf.intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
1763 }
1764 sf->intra_sf.skip_intra_in_interframe = 5;
1765 sf->intra_sf.disable_smooth_intra = 1;
1766 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1767
1768 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
1769 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
1770 sf->tx_sf.adaptive_txb_search_level = 2;
1771 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
1772 sf->tx_sf.tx_size_search_lgr_block = 1;
1773 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
1774 sf->tx_sf.tx_type_search.skip_tx_search = 1;
1775 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
1776 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
1777 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
1778 sf->tx_sf.refine_fast_tx_search_results = 0;
1779 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
1780 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
1781 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
1782
1783 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
1784 sf->rd_sf.simple_model_rd_from_var = 1;
1785 sf->rd_sf.tx_domain_dist_level = 2;
1786 sf->rd_sf.tx_domain_dist_thres_level = 2;
1787
1788 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
1789 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
1790
1791 sf->winner_mode_sf.dc_blk_pred_level = frame_is_intra_only(cm) ? 0 : 3;
1792 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
1793 sf->winner_mode_sf.tx_size_search_level = 1;
1794 sf->winner_mode_sf.winner_mode_ifs = 1;
1795
1796 sf->rt_sf.check_intra_pred_nonrd = 1;
1797 sf->rt_sf.estimate_motion_for_var_based_partition = 2;
1798 sf->rt_sf.hybrid_intra_pickmode = 1;
1799 sf->rt_sf.use_comp_ref_nonrd = 0;
1800 sf->rt_sf.ref_frame_comp_nonrd[0] = 0;
1801 sf->rt_sf.ref_frame_comp_nonrd[1] = 0;
1802 sf->rt_sf.ref_frame_comp_nonrd[2] = 0;
1803 sf->rt_sf.use_nonrd_filter_search = 1;
1804 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
1805 sf->rt_sf.num_inter_modes_for_tx_search = 5;
1806 sf->rt_sf.prune_inter_modes_using_temp_var = 1;
1807 sf->rt_sf.use_real_time_ref_set = 1;
1808 sf->rt_sf.use_simple_rd_model = 1;
1809 sf->rt_sf.prune_inter_modes_with_golden_ref = boosted ? 0 : 1;
1810 // TODO(any): This sf could be removed.
1811 sf->rt_sf.short_circuit_low_temp_var = 1;
1812 sf->rt_sf.check_scene_detection = 1;
1813 if (cpi->rc.rtc_external_ratectrl) sf->rt_sf.check_scene_detection = 0;
1814 if (cm->current_frame.frame_type != KEY_FRAME &&
1815 cpi->oxcf.rc_cfg.mode == AOM_CBR)
1816 sf->rt_sf.overshoot_detection_cbr = FAST_DETECTION_MAXQ;
1817 // Enable noise estimation only for high resolutions for now.
1818 //
1819 // Since use_temporal_noise_estimate has no effect for all-intra frame
1820 // encoding, it is disabled for this case.
1821 if (cpi->oxcf.kf_cfg.key_freq_max != 0 && cm->width * cm->height > 640 * 480)
1822 sf->rt_sf.use_temporal_noise_estimate = 1;
1823 sf->rt_sf.skip_tx_no_split_var_based_partition = 1;
1824 sf->rt_sf.skip_newmv_mode_based_on_sse = 1;
1825 sf->rt_sf.mode_search_skip_flags =
1826 (cm->current_frame.frame_type == KEY_FRAME)
1827 ? 0
1828 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
1829 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
1830 FLAG_EARLY_TERMINATE;
1831 sf->rt_sf.var_part_split_threshold_shift = 5;
1832 if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 1;
1833 sf->rt_sf.use_fast_fixed_part = 0;
1834 sf->rt_sf.increase_source_sad_thresh = 0;
1835
1836 if (speed >= 6) {
1837 sf->mv_sf.use_fullpel_costlist = 1;
1838
1839 sf->rd_sf.tx_domain_dist_thres_level = 3;
1840
1841 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 0;
1842 sf->inter_sf.limit_inter_mode_cands = 4;
1843 sf->inter_sf.prune_warped_prob_thresh = 8;
1844 sf->inter_sf.extra_prune_warped = 1;
1845
1846 sf->rt_sf.gf_refresh_based_on_qp = 1;
1847 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
1848 sf->rt_sf.var_part_split_threshold_shift = 7;
1849 if (!frame_is_intra_only(&cpi->common))
1850 sf->rt_sf.var_part_based_on_qidx = 2;
1851
1852 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 3;
1853 }
1854
1855 if (speed >= 7) {
1856 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_1;
1857 sf->rt_sf.use_comp_ref_nonrd = 1;
1858 sf->rt_sf.ref_frame_comp_nonrd[2] = 1; // LAST_ALTREF
1859 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 2;
1860 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
1861 sf->part_sf.max_intra_bsize = BLOCK_32X32;
1862
1863 sf->mv_sf.search_method = FAST_DIAMOND;
1864 sf->mv_sf.subpel_force_stop = QUARTER_PEL;
1865
1866 sf->inter_sf.inter_mode_rd_model_estimation = 2;
1867 // This sf is not applicable in non-rd path.
1868 sf->inter_sf.skip_newmv_in_drl = 0;
1869
1870 sf->interp_sf.skip_interp_filter_search = 0;
1871
1872 // Disable intra_y_mode_mask pruning since the performance at speed 7 isn't
1873 // good. May need more study.
1874 for (int i = 0; i < TX_SIZES; ++i) {
1875 sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL;
1876 }
1877
1878 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
1879 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL5;
1880
1881 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
1882 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1883 // This is for rd path only.
1884 sf->rt_sf.prune_inter_modes_using_temp_var = 0;
1885 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
1886 sf->rt_sf.prune_intra_mode_based_on_mv_range = 0;
1887 #if !CONFIG_REALTIME_ONLY
1888 sf->rt_sf.reuse_inter_pred_nonrd =
1889 (cpi->oxcf.motion_mode_cfg.enable_warped_motion == 0);
1890 #else
1891 sf->rt_sf.reuse_inter_pred_nonrd = 1;
1892 #endif
1893 #if CONFIG_AV1_TEMPORAL_DENOISING
1894 sf->rt_sf.reuse_inter_pred_nonrd = (cpi->oxcf.noise_sensitivity == 0);
1895 #endif
1896 sf->rt_sf.short_circuit_low_temp_var = 0;
1897 // For spatial layers, only LAST and GOLDEN are currently used in the SVC
1898 // for nonrd. The flag use_nonrd_altref_frame can disable GOLDEN in the
1899 // get_ref_frame_flags() for some patterns, so disable it here for
1900 // spatial layers.
1901 sf->rt_sf.use_nonrd_altref_frame =
1902 (cpi->svc.number_spatial_layers > 1) ? 0 : 1;
1903 sf->rt_sf.use_nonrd_pick_mode = 1;
1904 sf->rt_sf.nonrd_check_partition_merge_mode = 3;
1905 sf->rt_sf.skip_intra_pred = 1;
1906 sf->rt_sf.source_metrics_sb_nonrd = 1;
1907 // Set mask for intra modes.
1908 for (int i = 0; i < BLOCK_SIZES; ++i)
1909 if (i >= BLOCK_32X32)
1910 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1911 else
1912 // Use DC, H, V intra mode for block sizes < 32X32.
1913 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1914
1915 sf->winner_mode_sf.dc_blk_pred_level = 0;
1916 sf->rt_sf.var_part_based_on_qidx = 3;
1917 sf->rt_sf.prune_compoundmode_with_singlecompound_var = true;
1918 sf->rt_sf.prune_compoundmode_with_singlemode_var = true;
1919 sf->rt_sf.skip_compound_based_on_var = true;
1920 sf->rt_sf.use_adaptive_subpel_search = true;
1921 }
1922
1923 if (speed >= 8) {
1924 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_2;
1925 sf->intra_sf.intra_pruning_with_hog = 1;
1926 sf->rt_sf.short_circuit_low_temp_var = 1;
1927 sf->rt_sf.use_nonrd_altref_frame = 0;
1928 sf->rt_sf.nonrd_prune_ref_frame_search = 2;
1929 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1930 sf->rt_sf.var_part_split_threshold_shift = 8;
1931 sf->rt_sf.var_part_based_on_qidx = 4;
1932 sf->rt_sf.partition_direct_merging = 1;
1933 sf->rt_sf.prune_compoundmode_with_singlemode_var = false;
1934 sf->mv_sf.use_bsize_dependent_search_method = 2;
1935 sf->rt_sf.prune_hv_pred_modes_using_src_sad = true;
1936 }
1937 if (speed >= 9) {
1938 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
1939 sf->rt_sf.estimate_motion_for_var_based_partition = 3;
1940 sf->rt_sf.prefer_large_partition_blocks = 3;
1941 sf->rt_sf.skip_intra_pred = 2;
1942 sf->rt_sf.var_part_split_threshold_shift = 9;
1943 for (int i = 0; i < BLOCK_SIZES; ++i)
1944 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1945 sf->rt_sf.var_part_based_on_qidx = 0;
1946 sf->rt_sf.frame_level_mode_cost_update = true;
1947 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1948 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1949 sf->rt_sf.use_adaptive_subpel_search = true;
1950 sf->mv_sf.use_bsize_dependent_search_method = 0;
1951 }
1952 if (speed >= 10) {
1953 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_4;
1954 sf->rt_sf.nonrd_prune_ref_frame_search = 3;
1955 sf->rt_sf.var_part_split_threshold_shift = 10;
1956 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1957 }
1958 if (speed >= 11 && !frame_is_intra_only(cm) &&
1959 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
1960 sf->winner_mode_sf.dc_blk_pred_level = 3;
1961 }
1962 }
1963
init_hl_sf(HIGH_LEVEL_SPEED_FEATURES * hl_sf)1964 static inline void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
1965 // best quality defaults
1966 hl_sf->frame_parameter_update = 1;
1967 hl_sf->recode_loop = ALLOW_RECODE;
1968 // Recode loop tolerance %.
1969 hl_sf->recode_tolerance = 25;
1970 hl_sf->high_precision_mv_usage = CURRENT_Q;
1971 hl_sf->superres_auto_search_type = SUPERRES_AUTO_ALL;
1972 hl_sf->disable_extra_sc_testing = 0;
1973 hl_sf->second_alt_ref_filtering = 1;
1974 hl_sf->adjust_num_frames_for_arf_filtering = 0;
1975 hl_sf->accurate_bit_estimate = 0;
1976 hl_sf->weight_calc_level_in_tf = 0;
1977 hl_sf->allow_sub_blk_me_in_tf = 0;
1978 }
1979
init_fp_sf(FIRST_PASS_SPEED_FEATURES * fp_sf)1980 static inline void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) {
1981 fp_sf->reduce_mv_step_param = 3;
1982 fp_sf->skip_motion_search_threshold = 0;
1983 fp_sf->disable_recon = 0;
1984 fp_sf->skip_zeromv_motion_search = 0;
1985 }
1986
init_tpl_sf(TPL_SPEED_FEATURES * tpl_sf)1987 static inline void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
1988 tpl_sf->gop_length_decision_method = 0;
1989 tpl_sf->prune_intra_modes = 0;
1990 tpl_sf->prune_starting_mv = 0;
1991 tpl_sf->reduce_first_step_size = 0;
1992 tpl_sf->skip_alike_starting_mv = 0;
1993 tpl_sf->subpel_force_stop = EIGHTH_PEL;
1994 tpl_sf->search_method = NSTEP;
1995 tpl_sf->prune_ref_frames_in_tpl = 0;
1996 tpl_sf->allow_compound_pred = 1;
1997 tpl_sf->use_y_only_rate_distortion = 0;
1998 tpl_sf->use_sad_for_mode_decision = 0;
1999 tpl_sf->reduce_num_frames = 0;
2000 }
2001
init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES * gm_sf)2002 static inline void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
2003 gm_sf->gm_search_type = GM_FULL_SEARCH;
2004 gm_sf->prune_ref_frame_for_gm_search = 0;
2005 gm_sf->prune_zero_mv_with_sse = 0;
2006 gm_sf->disable_gm_search_based_on_stats = 0;
2007 gm_sf->downsample_level = 0;
2008 gm_sf->num_refinement_steps = GM_MAX_REFINEMENT_STEPS;
2009 }
2010
init_part_sf(PARTITION_SPEED_FEATURES * part_sf)2011 static inline void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
2012 part_sf->partition_search_type = SEARCH_PARTITION;
2013 part_sf->less_rectangular_check_level = 0;
2014 part_sf->use_square_partition_only_threshold = BLOCK_128X128;
2015 part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
2016 part_sf->default_max_partition_size = BLOCK_LARGEST;
2017 part_sf->default_min_partition_size = BLOCK_4X4;
2018 part_sf->adjust_var_based_rd_partitioning = 0;
2019 part_sf->max_intra_bsize = BLOCK_LARGEST;
2020 // This setting only takes effect when partition_search_type is set
2021 // to FIXED_PARTITION.
2022 part_sf->fixed_partition_size = BLOCK_16X16;
2023 // Recode loop tolerance %.
2024 part_sf->partition_search_breakout_dist_thr = 0;
2025 part_sf->partition_search_breakout_rate_thr = 0;
2026 part_sf->prune_ext_partition_types_search_level = 0;
2027 part_sf->prune_part4_search = 0;
2028 part_sf->ml_prune_partition = 0;
2029 part_sf->ml_early_term_after_part_split_level = 0;
2030 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
2031 part_sf->ml_partition_search_breakout_thresh[i] =
2032 -1; // -1 means not enabled.
2033 }
2034 part_sf->simple_motion_search_prune_agg = SIMPLE_AGG_LVL0;
2035 part_sf->simple_motion_search_split = 0;
2036 part_sf->simple_motion_search_prune_rect = 0;
2037 part_sf->simple_motion_search_early_term_none = 0;
2038 part_sf->simple_motion_search_reduce_search_steps = 0;
2039 part_sf->intra_cnn_based_part_prune_level = 0;
2040 part_sf->ext_partition_eval_thresh = BLOCK_8X8;
2041 part_sf->rect_partition_eval_thresh = BLOCK_128X128;
2042 part_sf->ext_part_eval_based_on_cur_best = 0;
2043 part_sf->prune_ext_part_using_split_info = 0;
2044 part_sf->prune_rectangular_split_based_on_qidx = 0;
2045 part_sf->prune_rect_part_using_4x4_var_deviation = false;
2046 part_sf->prune_rect_part_using_none_pred_mode = false;
2047 part_sf->early_term_after_none_split = 0;
2048 part_sf->ml_predict_breakout_level = 0;
2049 part_sf->prune_sub_8x8_partition_level = 0;
2050 part_sf->simple_motion_search_rect_split = 0;
2051 part_sf->reuse_prev_rd_results_for_part_ab = 0;
2052 part_sf->reuse_best_prediction_for_part_ab = 0;
2053 part_sf->use_best_rd_for_pruning = 0;
2054 part_sf->skip_non_sq_part_based_on_none = 0;
2055 part_sf->disable_8x8_part_based_on_qidx = 0;
2056 }
2057
init_mv_sf(MV_SPEED_FEATURES * mv_sf)2058 static inline void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
2059 mv_sf->full_pixel_search_level = 0;
2060 mv_sf->auto_mv_step_size = 0;
2061 mv_sf->exhaustive_searches_thresh = 0;
2062 mv_sf->obmc_full_pixel_search_level = 0;
2063 mv_sf->prune_mesh_search = PRUNE_MESH_SEARCH_DISABLED;
2064 mv_sf->reduce_search_range = 0;
2065 mv_sf->search_method = NSTEP;
2066 mv_sf->simple_motion_subpel_force_stop = EIGHTH_PEL;
2067 mv_sf->subpel_force_stop = EIGHTH_PEL;
2068 mv_sf->subpel_iters_per_step = 2;
2069 mv_sf->subpel_search_method = SUBPEL_TREE;
2070 mv_sf->use_accurate_subpel_search = USE_8_TAPS;
2071 mv_sf->use_bsize_dependent_search_method = 0;
2072 mv_sf->use_fullpel_costlist = 0;
2073 mv_sf->use_downsampled_sad = 0;
2074 mv_sf->disable_extensive_joint_motion_search = 0;
2075 mv_sf->disable_second_mv = 0;
2076 mv_sf->skip_fullpel_search_using_startmv = 0;
2077 mv_sf->warp_search_method = WARP_SEARCH_SQUARE;
2078 mv_sf->warp_search_iters = 8;
2079 mv_sf->use_intrabc = 1;
2080 }
2081
init_inter_sf(INTER_MODE_SPEED_FEATURES * inter_sf)2082 static inline void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
2083 inter_sf->adaptive_rd_thresh = 0;
2084 inter_sf->model_based_post_interp_filter_breakout = 0;
2085 inter_sf->reduce_inter_modes = 0;
2086 inter_sf->alt_ref_search_fp = 0;
2087 inter_sf->prune_single_ref = 0;
2088 inter_sf->prune_comp_ref_frames = 0;
2089 inter_sf->selective_ref_frame = 0;
2090 inter_sf->prune_ref_frame_for_rect_partitions = 0;
2091 inter_sf->fast_wedge_sign_estimate = 0;
2092 inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
2093 inter_sf->reuse_inter_intra_mode = 0;
2094 inter_sf->mv_cost_upd_level = INTERNAL_COST_UPD_SB;
2095 inter_sf->coeff_cost_upd_level = INTERNAL_COST_UPD_SB;
2096 inter_sf->mode_cost_upd_level = INTERNAL_COST_UPD_SB;
2097 inter_sf->prune_inter_modes_based_on_tpl = 0;
2098 inter_sf->prune_nearmv_using_neighbors = PRUNE_NEARMV_OFF;
2099 inter_sf->prune_comp_search_by_single_result = 0;
2100 inter_sf->skip_repeated_ref_mv = 0;
2101 inter_sf->skip_newmv_in_drl = 0;
2102 inter_sf->inter_mode_rd_model_estimation = 0;
2103 inter_sf->prune_compound_using_single_ref = 0;
2104 inter_sf->prune_ext_comp_using_neighbors = 0;
2105 inter_sf->skip_ext_comp_nearmv_mode = 0;
2106 inter_sf->prune_comp_using_best_single_mode_ref = 0;
2107 inter_sf->prune_nearest_near_mv_using_refmv_weight = 0;
2108 inter_sf->disable_onesided_comp = 0;
2109 inter_sf->prune_mode_search_simple_translation = 0;
2110 inter_sf->prune_comp_type_by_comp_avg = 0;
2111 inter_sf->disable_interinter_wedge_newmv_search = 0;
2112 inter_sf->fast_interintra_wedge_search = 0;
2113 inter_sf->prune_comp_type_by_model_rd = 0;
2114 inter_sf->perform_best_rd_based_gating_for_chroma = 0;
2115 inter_sf->prune_obmc_prob_thresh = 0;
2116 inter_sf->disable_interinter_wedge_var_thresh = 0;
2117 inter_sf->disable_interintra_wedge_var_thresh = 0;
2118 inter_sf->prune_ref_mv_idx_search = 0;
2119 inter_sf->prune_warped_prob_thresh = 0;
2120 inter_sf->reuse_compound_type_decision = 0;
2121 inter_sf->prune_inter_modes_if_skippable = 0;
2122 inter_sf->disable_masked_comp = 0;
2123 inter_sf->enable_fast_compound_mode_search = 0;
2124 inter_sf->reuse_mask_search_results = 0;
2125 inter_sf->enable_fast_wedge_mask_search = 0;
2126 inter_sf->inter_mode_txfm_breakout = 0;
2127 inter_sf->limit_inter_mode_cands = 0;
2128 inter_sf->limit_txfm_eval_per_mode = 0;
2129 inter_sf->skip_arf_compound = 0;
2130 set_txfm_rd_gate_level(inter_sf->txfm_rd_gate_level, 0);
2131 }
2132
init_interp_sf(INTERP_FILTER_SPEED_FEATURES * interp_sf)2133 static inline void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
2134 interp_sf->adaptive_interp_filter_search = 0;
2135 interp_sf->cb_pred_filter_search = 0;
2136 interp_sf->disable_dual_filter = 0;
2137 interp_sf->skip_sharp_interp_filter_search = 0;
2138 interp_sf->use_fast_interpolation_filter_search = 0;
2139 interp_sf->use_interp_filter = 0;
2140 interp_sf->skip_interp_filter_search = 0;
2141 }
2142
init_intra_sf(INTRA_MODE_SPEED_FEATURES * intra_sf)2143 static inline void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
2144 intra_sf->dv_cost_upd_level = INTERNAL_COST_UPD_SB;
2145 intra_sf->skip_intra_in_interframe = 1;
2146 intra_sf->intra_pruning_with_hog = 0;
2147 intra_sf->chroma_intra_pruning_with_hog = 0;
2148 intra_sf->prune_palette_search_level = 0;
2149 intra_sf->prune_luma_palette_size_search_level = 0;
2150
2151 for (int i = 0; i < TX_SIZES; i++) {
2152 intra_sf->intra_y_mode_mask[i] = INTRA_ALL;
2153 intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
2154 }
2155 intra_sf->disable_smooth_intra = 0;
2156 intra_sf->prune_smooth_intra_mode_for_chroma = 0;
2157 intra_sf->prune_filter_intra_level = 0;
2158 intra_sf->prune_chroma_modes_using_luma_winner = 0;
2159 intra_sf->cfl_search_range = 3;
2160 intra_sf->top_intra_model_count_allowed = TOP_INTRA_MODEL_COUNT;
2161 intra_sf->adapt_top_model_rd_count_using_neighbors = 0;
2162 intra_sf->early_term_chroma_palette_size_search = 0;
2163 intra_sf->skip_filter_intra_in_inter_frames = 0;
2164 intra_sf->prune_luma_odd_delta_angles_in_intra = 0;
2165 }
2166
init_tx_sf(TX_SPEED_FEATURES * tx_sf)2167 static inline void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
2168 tx_sf->inter_tx_size_search_init_depth_sqr = 0;
2169 tx_sf->inter_tx_size_search_init_depth_rect = 0;
2170 tx_sf->intra_tx_size_search_init_depth_rect = 0;
2171 tx_sf->intra_tx_size_search_init_depth_sqr = 0;
2172 tx_sf->tx_size_search_lgr_block = 0;
2173 tx_sf->model_based_prune_tx_search_level = 0;
2174 tx_sf->tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_1;
2175 tx_sf->tx_type_search.ml_tx_split_thresh = 8500;
2176 tx_sf->tx_type_search.use_skip_flag_prediction = 1;
2177 tx_sf->tx_type_search.use_reduced_intra_txset = 0;
2178 tx_sf->tx_type_search.fast_intra_tx_type_search = 0;
2179 tx_sf->tx_type_search.fast_inter_tx_type_prob_thresh = INT_MAX;
2180 tx_sf->tx_type_search.skip_tx_search = 0;
2181 tx_sf->tx_type_search.prune_tx_type_using_stats = 0;
2182 tx_sf->tx_type_search.prune_tx_type_est_rd = 0;
2183 tx_sf->tx_type_search.winner_mode_tx_type_pruning = 0;
2184 tx_sf->txb_split_cap = 1;
2185 tx_sf->adaptive_txb_search_level = 0;
2186 tx_sf->refine_fast_tx_search_results = 1;
2187 tx_sf->prune_tx_size_level = 0;
2188 tx_sf->prune_intra_tx_depths_using_nn = false;
2189 tx_sf->use_rd_based_breakout_for_intra_tx_search = false;
2190 }
2191
init_rd_sf(RD_CALC_SPEED_FEATURES * rd_sf,const AV1EncoderConfig * oxcf)2192 static inline void init_rd_sf(RD_CALC_SPEED_FEATURES *rd_sf,
2193 const AV1EncoderConfig *oxcf) {
2194 const int disable_trellis_quant = oxcf->algo_cfg.disable_trellis_quant;
2195 if (disable_trellis_quant == 3) {
2196 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
2197 ? NO_ESTIMATE_YRD_TRELLIS_OPT
2198 : NO_TRELLIS_OPT;
2199 } else if (disable_trellis_quant == 2) {
2200 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
2201 ? FINAL_PASS_TRELLIS_OPT
2202 : NO_TRELLIS_OPT;
2203 } else if (disable_trellis_quant == 0) {
2204 if (is_lossless_requested(&oxcf->rc_cfg)) {
2205 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
2206 } else {
2207 rd_sf->optimize_coefficients = FULL_TRELLIS_OPT;
2208 }
2209 } else if (disable_trellis_quant == 1) {
2210 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
2211 } else {
2212 assert(0 && "Invalid disable_trellis_quant value");
2213 }
2214 rd_sf->use_mb_rd_hash = 0;
2215 rd_sf->simple_model_rd_from_var = 0;
2216 rd_sf->tx_domain_dist_level = 0;
2217 rd_sf->tx_domain_dist_thres_level = 0;
2218 rd_sf->perform_coeff_opt = 0;
2219 }
2220
init_winner_mode_sf(WINNER_MODE_SPEED_FEATURES * winner_mode_sf)2221 static inline void init_winner_mode_sf(
2222 WINNER_MODE_SPEED_FEATURES *winner_mode_sf) {
2223 winner_mode_sf->motion_mode_for_winner_cand = 0;
2224 // Set this at the appropriate speed levels
2225 winner_mode_sf->tx_size_search_level = 0;
2226 winner_mode_sf->enable_winner_mode_for_coeff_opt = 0;
2227 winner_mode_sf->enable_winner_mode_for_tx_size_srch = 0;
2228 winner_mode_sf->enable_winner_mode_for_use_tx_domain_dist = 0;
2229 winner_mode_sf->multi_winner_mode_type = 0;
2230 winner_mode_sf->dc_blk_pred_level = 0;
2231 winner_mode_sf->winner_mode_ifs = 0;
2232 winner_mode_sf->prune_winner_mode_eval_level = 0;
2233 }
2234
init_lpf_sf(LOOP_FILTER_SPEED_FEATURES * lpf_sf)2235 static inline void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
2236 lpf_sf->disable_loop_restoration_chroma = 0;
2237 lpf_sf->disable_loop_restoration_luma = 0;
2238 lpf_sf->min_lr_unit_size = RESTORATION_PROC_UNIT_SIZE;
2239 lpf_sf->max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2240 lpf_sf->prune_wiener_based_on_src_var = 0;
2241 lpf_sf->prune_sgr_based_on_wiener = 0;
2242 lpf_sf->enable_sgr_ep_pruning = 0;
2243 lpf_sf->reduce_wiener_window_size = 0;
2244 lpf_sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
2245 lpf_sf->use_coarse_filter_level_search = 0;
2246 lpf_sf->cdef_pick_method = CDEF_FULL_SEARCH;
2247 // Set decoder side speed feature to use less dual sgr modes
2248 lpf_sf->dual_sgr_penalty_level = 0;
2249 // Enable Wiener and Self-guided Loop restoration filters by default.
2250 lpf_sf->disable_wiener_filter = false;
2251 lpf_sf->disable_sgr_filter = false;
2252 lpf_sf->disable_wiener_coeff_refine_search = false;
2253 lpf_sf->use_downsampled_wiener_stats = 0;
2254 }
2255
init_rt_sf(REAL_TIME_SPEED_FEATURES * rt_sf)2256 static inline void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
2257 rt_sf->check_intra_pred_nonrd = 0;
2258 rt_sf->skip_intra_pred = 0;
2259 rt_sf->estimate_motion_for_var_based_partition = 0;
2260 rt_sf->nonrd_check_partition_merge_mode = 0;
2261 rt_sf->nonrd_check_partition_split = 0;
2262 rt_sf->mode_search_skip_flags = 0;
2263 rt_sf->nonrd_prune_ref_frame_search = 0;
2264 rt_sf->use_nonrd_pick_mode = 0;
2265 rt_sf->use_nonrd_altref_frame = 0;
2266 rt_sf->use_comp_ref_nonrd = 0;
2267 rt_sf->use_real_time_ref_set = 0;
2268 rt_sf->short_circuit_low_temp_var = 0;
2269 rt_sf->reuse_inter_pred_nonrd = 0;
2270 rt_sf->num_inter_modes_for_tx_search = INT_MAX;
2271 rt_sf->use_nonrd_filter_search = 0;
2272 rt_sf->use_simple_rd_model = 0;
2273 rt_sf->hybrid_intra_pickmode = 0;
2274 rt_sf->prune_palette_search_nonrd = 0;
2275 rt_sf->source_metrics_sb_nonrd = 0;
2276 rt_sf->overshoot_detection_cbr = NO_DETECTION;
2277 rt_sf->check_scene_detection = 0;
2278 rt_sf->rc_adjust_keyframe = 0;
2279 rt_sf->rc_compute_spatial_var_sc = 0;
2280 rt_sf->prefer_large_partition_blocks = 0;
2281 rt_sf->use_temporal_noise_estimate = 0;
2282 rt_sf->fullpel_search_step_param = 0;
2283 for (int i = 0; i < BLOCK_SIZES; ++i)
2284 rt_sf->intra_y_mode_bsize_mask_nrd[i] = INTRA_ALL;
2285 rt_sf->prune_hv_pred_modes_using_src_sad = false;
2286 rt_sf->nonrd_aggressive_skip = 0;
2287 rt_sf->skip_cdef_sb = 0;
2288 rt_sf->force_large_partition_blocks_intra = 0;
2289 rt_sf->skip_tx_no_split_var_based_partition = 0;
2290 rt_sf->skip_newmv_mode_based_on_sse = 0;
2291 rt_sf->gf_length_lvl = 0;
2292 rt_sf->prune_inter_modes_with_golden_ref = 0;
2293 rt_sf->prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
2294 rt_sf->prune_inter_modes_using_temp_var = 0;
2295 rt_sf->reduce_mv_pel_precision_highmotion = 0;
2296 rt_sf->reduce_mv_pel_precision_lowcomplex = 0;
2297 rt_sf->prune_intra_mode_based_on_mv_range = 0;
2298 rt_sf->var_part_split_threshold_shift = 7;
2299 rt_sf->gf_refresh_based_on_qp = 0;
2300 rt_sf->use_rtc_tf = 0;
2301 rt_sf->use_idtx_nonrd = 0;
2302 rt_sf->prune_idtx_nonrd = 0;
2303 rt_sf->dct_only_palette_nonrd = 0;
2304 rt_sf->part_early_exit_zeromv = 0;
2305 rt_sf->sse_early_term_inter_search = EARLY_TERM_DISABLED;
2306 rt_sf->skip_lf_screen = 0;
2307 rt_sf->thresh_active_maps_skip_lf_cdef = 100;
2308 rt_sf->sad_based_adp_altref_lag = 0;
2309 rt_sf->partition_direct_merging = 0;
2310 rt_sf->var_part_based_on_qidx = 0;
2311 rt_sf->tx_size_level_based_on_qstep = 0;
2312 rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
2313 rt_sf->prune_compoundmode_with_singlecompound_var = false;
2314 rt_sf->frame_level_mode_cost_update = false;
2315 rt_sf->prune_h_pred_using_best_mode_so_far = false;
2316 rt_sf->enable_intra_mode_pruning_using_neighbors = false;
2317 rt_sf->prune_intra_mode_using_best_sad_so_far = false;
2318 rt_sf->check_only_zero_zeromv_on_large_blocks = false;
2319 rt_sf->disable_cdf_update_non_reference_frame = false;
2320 rt_sf->prune_compoundmode_with_singlemode_var = false;
2321 rt_sf->skip_compound_based_on_var = false;
2322 rt_sf->set_zeromv_skip_based_on_source_sad = 1;
2323 rt_sf->use_adaptive_subpel_search = false;
2324 rt_sf->screen_content_cdef_filter_qindex_thresh = 0;
2325 rt_sf->enable_ref_short_signaling = false;
2326 rt_sf->check_globalmv_on_single_ref = true;
2327 rt_sf->increase_color_thresh_palette = false;
2328 rt_sf->selective_cdf_update = 0;
2329 rt_sf->force_only_last_ref = 0;
2330 rt_sf->higher_thresh_scene_detection = 1;
2331 rt_sf->skip_newmv_flat_blocks_screen = 0;
2332 rt_sf->skip_encoding_non_reference_slide_change = 0;
2333 rt_sf->rc_faster_convergence_static = 0;
2334 }
2335
2336 static fractional_mv_step_fp
2337 *const fractional_mv_search[SUBPEL_SEARCH_METHODS] = {
2338 av1_find_best_sub_pixel_tree, // SUBPEL_TREE = 0
2339 av1_find_best_sub_pixel_tree_pruned, // SUBPEL_TREE_PRUNED = 1
2340 av1_find_best_sub_pixel_tree_pruned_more // SUBPEL_TREE_PRUNED_MORE = 2
2341 };
2342
2343 // Populate appropriate sub-pel search method based on speed feature and user
2344 // specified settings
set_subpel_search_method(MotionVectorSearchParams * mv_search_params,unsigned int motion_vector_unit_test,SUBPEL_SEARCH_METHOD subpel_search_method)2345 static void set_subpel_search_method(
2346 MotionVectorSearchParams *mv_search_params,
2347 unsigned int motion_vector_unit_test,
2348 SUBPEL_SEARCH_METHOD subpel_search_method) {
2349 assert(subpel_search_method <= SUBPEL_TREE_PRUNED_MORE);
2350 mv_search_params->find_fractional_mv_step =
2351 fractional_mv_search[subpel_search_method];
2352
2353 // This is only used in motion vector unit test.
2354 if (motion_vector_unit_test == 1)
2355 mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
2356 else if (motion_vector_unit_test == 2)
2357 mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
2358 }
2359
av1_set_speed_features_framesize_dependent(AV1_COMP * cpi,int speed)2360 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
2361 SPEED_FEATURES *const sf = &cpi->sf;
2362 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2363
2364 switch (oxcf->mode) {
2365 case GOOD:
2366 set_good_speed_feature_framesize_dependent(cpi, sf, speed);
2367 break;
2368 case ALLINTRA:
2369 set_allintra_speed_feature_framesize_dependent(cpi, sf, speed);
2370 break;
2371 case REALTIME:
2372 set_rt_speed_feature_framesize_dependent(cpi, sf, speed);
2373 break;
2374 }
2375
2376 if (!cpi->ppi->seq_params_locked) {
2377 cpi->common.seq_params->enable_masked_compound &=
2378 !sf->inter_sf.disable_masked_comp;
2379 cpi->common.seq_params->enable_interintra_compound &=
2380 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
2381 }
2382
2383 set_subpel_search_method(&cpi->mv_search_params,
2384 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2385 sf->mv_sf.subpel_search_method);
2386
2387 // For multi-thread use case with row_mt enabled, cost update for a set of
2388 // SB rows is not desirable. Hence, the sf mv_cost_upd_level is set to
2389 // INTERNAL_COST_UPD_SBROW in such cases.
2390 if ((cpi->oxcf.row_mt == 1) && (cpi->mt_info.num_workers > 1)) {
2391 if (sf->inter_sf.mv_cost_upd_level == INTERNAL_COST_UPD_SBROW_SET) {
2392 // Set mv_cost_upd_level to use row level update.
2393 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
2394 }
2395 }
2396 }
2397
av1_set_speed_features_framesize_independent(AV1_COMP * cpi,int speed)2398 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
2399 SPEED_FEATURES *const sf = &cpi->sf;
2400 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
2401 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2402 int i;
2403
2404 init_hl_sf(&sf->hl_sf);
2405 init_fp_sf(&sf->fp_sf);
2406 init_tpl_sf(&sf->tpl_sf);
2407 init_gm_sf(&sf->gm_sf);
2408 init_part_sf(&sf->part_sf);
2409 init_mv_sf(&sf->mv_sf);
2410 init_inter_sf(&sf->inter_sf);
2411 init_interp_sf(&sf->interp_sf);
2412 init_intra_sf(&sf->intra_sf);
2413 init_tx_sf(&sf->tx_sf);
2414 init_rd_sf(&sf->rd_sf, oxcf);
2415 init_winner_mode_sf(&sf->winner_mode_sf);
2416 init_lpf_sf(&sf->lpf_sf);
2417 init_rt_sf(&sf->rt_sf);
2418
2419 switch (oxcf->mode) {
2420 case GOOD:
2421 set_good_speed_features_framesize_independent(cpi, sf, speed);
2422 break;
2423 case ALLINTRA:
2424 set_allintra_speed_features_framesize_independent(cpi, sf, speed);
2425 break;
2426 case REALTIME:
2427 set_rt_speed_features_framesize_independent(cpi, sf, speed);
2428 break;
2429 }
2430
2431 // Note: when use_nonrd_pick_mode is true, the transform size is the
2432 // minimum of 16x16 and the largest possible size of the current block,
2433 // which conflicts with the speed feature "enable_tx_size_search".
2434 if (!oxcf->txfm_cfg.enable_tx_size_search &&
2435 sf->rt_sf.use_nonrd_pick_mode == 0) {
2436 sf->winner_mode_sf.tx_size_search_level = 3;
2437 }
2438
2439 if (cpi->mt_info.num_workers > 1) {
2440 // Loop restoration stage is conditionally disabled for speed 5, 6 when
2441 // num_workers > 1. Since av1_pick_filter_restoration() is not
2442 // multi-threaded, enabling the Loop restoration stage will cause an
2443 // increase in encode time (3% to 7% increase depends on frame
2444 // resolution).
2445 // TODO(aomedia:3446): Implement multi-threading of
2446 // av1_pick_filter_restoration() and enable Wiener filter for speed 5, 6
2447 // similar to single thread encoding path.
2448 if (speed >= 5) {
2449 sf->lpf_sf.disable_sgr_filter = true;
2450 sf->lpf_sf.disable_wiener_filter = true;
2451 }
2452 }
2453
2454 if (!cpi->ppi->seq_params_locked) {
2455 cpi->common.seq_params->order_hint_info.enable_dist_wtd_comp &=
2456 (sf->inter_sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED);
2457 cpi->common.seq_params->enable_dual_filter &=
2458 !sf->interp_sf.disable_dual_filter;
2459 // Set the flag 'enable_restoration', if one the Loop restoration filters
2460 // (i.e., Wiener or Self-guided) is enabled.
2461 cpi->common.seq_params->enable_restoration &=
2462 (!sf->lpf_sf.disable_wiener_filter || !sf->lpf_sf.disable_sgr_filter);
2463
2464 cpi->common.seq_params->enable_interintra_compound &=
2465 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
2466 }
2467
2468 const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
2469 for (i = 0; i < MAX_MESH_STEP; ++i) {
2470 sf->mv_sf.mesh_patterns[i].range =
2471 good_quality_mesh_patterns[mesh_speed][i].range;
2472 sf->mv_sf.mesh_patterns[i].interval =
2473 good_quality_mesh_patterns[mesh_speed][i].interval;
2474 }
2475
2476 // Update the mesh pattern of exhaustive motion search for intraBC
2477 // Though intraBC mesh pattern is populated for all frame types, it is used
2478 // only for intra frames of screen contents
2479 for (i = 0; i < MAX_MESH_STEP; ++i) {
2480 sf->mv_sf.intrabc_mesh_patterns[i].range =
2481 intrabc_mesh_patterns[mesh_speed][i].range;
2482 sf->mv_sf.intrabc_mesh_patterns[i].interval =
2483 intrabc_mesh_patterns[mesh_speed][i].interval;
2484 }
2485
2486 // Slow quant, dct and trellis not worthwhile for first pass
2487 // so make sure they are always turned off.
2488 if (is_stat_generation_stage(cpi))
2489 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
2490
2491 // No recode for 1 pass.
2492 if (oxcf->pass == AOM_RC_ONE_PASS && has_no_stats_stage(cpi))
2493 sf->hl_sf.recode_loop = DISALLOW_RECODE;
2494
2495 set_subpel_search_method(&cpi->mv_search_params,
2496 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2497 sf->mv_sf.subpel_search_method);
2498
2499 // assert ensures that tx_domain_dist_level is accessed correctly
2500 assert(cpi->sf.rd_sf.tx_domain_dist_thres_level >= 0 &&
2501 cpi->sf.rd_sf.tx_domain_dist_thres_level < 4);
2502 memcpy(winner_mode_params->tx_domain_dist_threshold,
2503 tx_domain_dist_thresholds[cpi->sf.rd_sf.tx_domain_dist_thres_level],
2504 sizeof(winner_mode_params->tx_domain_dist_threshold));
2505
2506 assert(cpi->sf.rd_sf.tx_domain_dist_level >= 0 &&
2507 cpi->sf.rd_sf.tx_domain_dist_level < TX_DOMAIN_DIST_LEVELS);
2508 memcpy(winner_mode_params->use_transform_domain_distortion,
2509 tx_domain_dist_types[cpi->sf.rd_sf.tx_domain_dist_level],
2510 sizeof(winner_mode_params->use_transform_domain_distortion));
2511
2512 // assert ensures that coeff_opt_thresholds is accessed correctly
2513 assert(cpi->sf.rd_sf.perform_coeff_opt >= 0 &&
2514 cpi->sf.rd_sf.perform_coeff_opt < 9);
2515 memcpy(winner_mode_params->coeff_opt_thresholds,
2516 &coeff_opt_thresholds[cpi->sf.rd_sf.perform_coeff_opt],
2517 sizeof(winner_mode_params->coeff_opt_thresholds));
2518
2519 // assert ensures that predict_skip_levels is accessed correctly
2520 assert(cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction >= 0 &&
2521 cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction < 3);
2522 memcpy(winner_mode_params->skip_txfm_level,
2523 predict_skip_levels[cpi->sf.tx_sf.tx_type_search
2524 .use_skip_flag_prediction],
2525 sizeof(winner_mode_params->skip_txfm_level));
2526
2527 // assert ensures that tx_size_search_level is accessed correctly
2528 assert(cpi->sf.winner_mode_sf.tx_size_search_level >= 0 &&
2529 cpi->sf.winner_mode_sf.tx_size_search_level <= 3);
2530 memcpy(winner_mode_params->tx_size_search_methods,
2531 tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level],
2532 sizeof(winner_mode_params->tx_size_search_methods));
2533 memcpy(winner_mode_params->predict_dc_level,
2534 predict_dc_levels[cpi->sf.winner_mode_sf.dc_blk_pred_level],
2535 sizeof(winner_mode_params->predict_dc_level));
2536
2537 if (cpi->oxcf.row_mt == 1 && (cpi->mt_info.num_workers > 1)) {
2538 if (sf->inter_sf.inter_mode_rd_model_estimation == 1) {
2539 // Revert to type 2
2540 sf->inter_sf.inter_mode_rd_model_estimation = 2;
2541 }
2542
2543 #if !CONFIG_FPMT_TEST
2544 // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve
2545 // better parallelism when number of threads available are greater than or
2546 // equal to maximum number of reference frames allowed for global motion.
2547 if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH &&
2548 (cpi->mt_info.num_workers >=
2549 gm_available_reference_frames[sf->gm_sf.gm_search_type]))
2550 sf->gm_sf.prune_ref_frame_for_gm_search = 0;
2551 #endif
2552 }
2553
2554 // This only applies to the real time mode. Adaptive gf refresh is disabled if
2555 // gf_cbr_boost_pct that is set by the user is larger than 0.
2556 if (cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 0)
2557 sf->rt_sf.gf_refresh_based_on_qp = 0;
2558 }
2559
2560 // Override some speed features based on qindex
av1_set_speed_features_qindex_dependent(AV1_COMP * cpi,int speed)2561 void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
2562 AV1_COMMON *const cm = &cpi->common;
2563 SPEED_FEATURES *const sf = &cpi->sf;
2564 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
2565 const int boosted = frame_is_boosted(cpi);
2566 const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480;
2567 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
2568 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
2569 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
2570 const int is_1440p_or_larger = AOMMIN(cm->width, cm->height) >= 1440;
2571 const int is_arf2_bwd_type =
2572 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
2573
2574 if (cpi->oxcf.mode == REALTIME) {
2575 if (speed >= 6) {
2576 const int qindex_thresh = boosted ? 190 : (is_720p_or_larger ? 120 : 150);
2577 sf->part_sf.adjust_var_based_rd_partitioning =
2578 frame_is_intra_only(cm)
2579 ? 0
2580 : cm->quant_params.base_qindex > qindex_thresh;
2581 }
2582 return;
2583 }
2584
2585 if (speed == 0) {
2586 // qindex_thresh for resolution < 720p
2587 const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
2588 if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) {
2589 sf->part_sf.simple_motion_search_split =
2590 cm->features.allow_screen_content_tools ? 1 : 2;
2591 sf->part_sf.simple_motion_search_early_term_none = 1;
2592 sf->tx_sf.model_based_prune_tx_search_level = 0;
2593 }
2594
2595 if (is_720p_or_larger && cm->quant_params.base_qindex <= 128) {
2596 sf->rd_sf.perform_coeff_opt = 2 + is_1080p_or_larger;
2597 memcpy(winner_mode_params->coeff_opt_thresholds,
2598 &coeff_opt_thresholds[sf->rd_sf.perform_coeff_opt],
2599 sizeof(winner_mode_params->coeff_opt_thresholds));
2600 sf->part_sf.simple_motion_search_split =
2601 cm->features.allow_screen_content_tools ? 1 : 2;
2602 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
2603 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
2604 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
2605 sf->tx_sf.model_based_prune_tx_search_level = 0;
2606
2607 if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) {
2608 sf->inter_sf.selective_ref_frame = 2;
2609 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
2610 sf->rd_sf.tx_domain_dist_thres_level = 1;
2611 sf->part_sf.simple_motion_search_early_term_none = 1;
2612 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
2613 sf->interp_sf.cb_pred_filter_search = 0;
2614 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
2615 sf->tx_sf.tx_type_search.skip_tx_search = 1;
2616 }
2617 }
2618 }
2619
2620 if (speed >= 2) {
2621 // Disable extended partitions for lower quantizers
2622 const int aggr = AOMMIN(4, speed - 2);
2623 const int qindex_thresh1[4] = { 50, 50, 80, 100 };
2624 const int qindex_thresh2[4] = { 80, 100, 120, 160 };
2625 int qindex_thresh;
2626 if (aggr <= 1) {
2627 const int qthresh2 =
2628 (!aggr && !is_480p_or_larger) ? 70 : qindex_thresh2[aggr];
2629 qindex_thresh = cm->features.allow_screen_content_tools
2630 ? qindex_thresh1[aggr]
2631 : qthresh2;
2632 if (cm->quant_params.base_qindex <= qindex_thresh && !boosted)
2633 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2634 } else if (aggr <= 2) {
2635 qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr];
2636 if (cm->quant_params.base_qindex <= qindex_thresh &&
2637 !frame_is_intra_only(cm))
2638 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2639 } else if (aggr <= 3) {
2640 if (!is_480p_or_larger) {
2641 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2642 } else if (!is_720p_or_larger && !frame_is_intra_only(cm) &&
2643 !cm->features.allow_screen_content_tools) {
2644 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2645 } else {
2646 qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr];
2647 if (cm->quant_params.base_qindex <= qindex_thresh &&
2648 !frame_is_intra_only(cm))
2649 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2650 }
2651 } else {
2652 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2653 }
2654 }
2655
2656 if (speed >= 4) {
2657 // Disable rectangular partitions for lower quantizers
2658 const int aggr = AOMMIN(1, speed - 4);
2659 const int qindex_thresh[2] = { 65, 80 };
2660 int disable_rect_part;
2661 disable_rect_part = !boosted;
2662 if (cm->quant_params.base_qindex <= qindex_thresh[aggr] &&
2663 disable_rect_part && is_480p_or_larger) {
2664 sf->part_sf.rect_partition_eval_thresh = BLOCK_8X8;
2665 }
2666 }
2667
2668 if (speed <= 2) {
2669 if (!is_stat_generation_stage(cpi)) {
2670 // Use faster full-pel motion search for high quantizers.
2671 // Also use reduced total search range for low resolutions at high
2672 // quantizers.
2673 const int aggr = speed;
2674 const int qindex_thresh1 = ms_qindex_thresh[aggr][is_720p_or_larger][0];
2675 const int qindex_thresh2 = ms_qindex_thresh[aggr][is_720p_or_larger][1];
2676 const SEARCH_METHODS search_method =
2677 motion_search_method[is_720p_or_larger];
2678 if (cm->quant_params.base_qindex > qindex_thresh1) {
2679 sf->mv_sf.search_method = search_method;
2680 sf->tpl_sf.search_method = search_method;
2681 } else if (cm->quant_params.base_qindex > qindex_thresh2) {
2682 sf->mv_sf.search_method = NSTEP_8PT;
2683 }
2684 }
2685 }
2686
2687 if (speed >= 4) {
2688 // Disable LR search at low and high quantizers and enable only for
2689 // mid-quantizer range.
2690 if (!boosted && !is_arf2_bwd_type) {
2691 const int qindex_low[2] = { 100, 60 };
2692 const int qindex_high[2] = { 180, 160 };
2693 if (cm->quant_params.base_qindex <= qindex_low[is_720p_or_larger] ||
2694 cm->quant_params.base_qindex > qindex_high[is_720p_or_larger]) {
2695 sf->lpf_sf.disable_loop_restoration_luma = 1;
2696 }
2697 }
2698 }
2699
2700 if (speed == 1) {
2701 // Reuse interinter wedge mask search from first search for non-boosted
2702 // non-internal-arf frames, except at very high quantizers.
2703 if (cm->quant_params.base_qindex <= 200) {
2704 if (!boosted && !is_arf2_bwd_type)
2705 sf->inter_sf.reuse_mask_search_results = 1;
2706 }
2707 }
2708
2709 if (speed == 5) {
2710 if (!(frame_is_intra_only(&cpi->common) ||
2711 cm->features.allow_screen_content_tools)) {
2712 const int qindex[2] = { 256, 128 };
2713 // Set the sf value as 3 for low resolution and
2714 // for higher resolutions with low quantizers.
2715 if (cm->quant_params.base_qindex < qindex[is_480p_or_larger])
2716 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
2717 }
2718 }
2719
2720 if (speed >= 5) {
2721 // Disable the sf for low quantizers in case of low resolution screen
2722 // contents.
2723 if (cm->features.allow_screen_content_tools &&
2724 cm->quant_params.base_qindex < 128 && is_480p_or_lesser) {
2725 sf->part_sf.prune_sub_8x8_partition_level = 0;
2726 }
2727 }
2728
2729 // Loop restoration size search
2730 // At speed 0, always search all available sizes for the maximum possible gain
2731 sf->lpf_sf.min_lr_unit_size = RESTORATION_PROC_UNIT_SIZE;
2732 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2733
2734 if (speed >= 1) {
2735 // For large frames, small restoration units are almost never useful,
2736 // so prune them away
2737 if (is_1440p_or_larger) {
2738 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2739 } else if (is_720p_or_larger) {
2740 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
2741 }
2742 }
2743
2744 if (speed >= 3 || (cpi->oxcf.mode == ALLINTRA && speed >= 1)) {
2745 // At this speed, a full search is too expensive. Instead, pick a single
2746 // size based on size and qindex. Note that, in general, higher quantizers
2747 // (== lower quality) and larger frames generally want to use larger
2748 // restoration units.
2749 int qindex_thresh = 96;
2750 if (cm->quant_params.base_qindex <= qindex_thresh && !is_1440p_or_larger) {
2751 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
2752 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
2753 } else {
2754 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2755 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2756 }
2757 }
2758
2759 set_subpel_search_method(&cpi->mv_search_params,
2760 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2761 sf->mv_sf.subpel_search_method);
2762 }
2763