1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73
74 #define LAST_NEW_MV_INDEX 6
75
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81 10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82 9975, 9951, 9927, 9903, 9879, 9854, 9830, 9806, 9782, 9758, 9734,
83 9710, 9686, 9662, 9638, 9614, 9589, 9565, 9541, 9517, 9493, 9469,
84 9445, 9421, 9397, 9373, 9349, 9324, 9300, 9276, 9252, 9228, 9204,
85 9180, 9156, 9132, 9108, 9083, 9059, 9035, 9011, 8987, 8963, 8939,
86 8915, 8891, 8867, 8843, 8818, 8794, 8770, 8746, 8722, 8698, 8674,
87 8650, 8626, 8602, 8578, 8553, 8529, 8505, 8481, 8457, 8433, 8409,
88 8385, 8361, 8337, 8312, 8288, 8264, 8240, 8216, 8192, 8168, 8144,
89 8120, 8096, 8072, 8047, 8023, 7999, 7975, 7951, 7927, 7903, 7879,
90 7855, 7831, 7806, 7782, 7758, 7734, 7710, 7686, 7662, 7638, 7614,
91 7590, 7566, 7541, 7517, 7493, 7469, 7445, 7421, 7397, 7373, 7349,
92 7325, 7301, 7276, 7252, 7228, 7204, 7180, 7156, 7132, 7108, 7084,
93 7060, 7035, 7011, 6987, 6963, 6939, 6915, 6891, 6867, 6843, 6819,
94 6795, 6770, 6746, 6722, 6698, 6674, 6650, 6626, 6602, 6578, 6554,
95 6530, 6505, 6481, 6457, 6433, 6409, 6385, 6361, 6337, 6313, 6289,
96 6264, 6240, 6216, 6192, 6168, 6144, 6120, 6096, 6072, 6048, 6024,
97 5999, 5975, 5951, 5927, 5903, 5879, 5855, 5831, 5807, 5783, 5758,
98 5734, 5710, 5686, 5662, 5638, 5614, 5590, 5566, 5542, 5518, 5493,
99 5469, 5445, 5421, 5397, 5373, 5349, 5325, 5301, 5277, 5253, 5228,
100 5204, 5180, 5156, 5132, 5108, 5084, 5060, 5036, 5012, 4987, 4963,
101 4939, 4915, 4891, 4867, 4843, 4819, 4795, 4771, 4747, 4722, 4698,
102 4674, 4650, 4626, 4602, 4578, 4554, 4530, 4506, 4482, 4457, 4433,
103 4409, 4385, 4361, 4337, 4313, 4289, 4265, 4241, 4216, 4192, 4168,
104 4144, 4120, 4096
105 };
106
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108 THR_NEARESTMV,
109 THR_NEARESTL2,
110 THR_NEARESTL3,
111 THR_NEARESTB,
112 THR_NEARESTA2,
113 THR_NEARESTA,
114 THR_NEARESTG,
115
116 THR_NEWMV,
117 THR_NEWL2,
118 THR_NEWL3,
119 THR_NEWB,
120 THR_NEWA2,
121 THR_NEWA,
122 THR_NEWG,
123
124 THR_NEARMV,
125 THR_NEARL2,
126 THR_NEARL3,
127 THR_NEARB,
128 THR_NEARA2,
129 THR_NEARA,
130 THR_NEARG,
131
132 THR_GLOBALMV,
133 THR_GLOBALL2,
134 THR_GLOBALL3,
135 THR_GLOBALB,
136 THR_GLOBALA2,
137 THR_GLOBALA,
138 THR_GLOBALG,
139
140 THR_COMP_NEAREST_NEARESTLA,
141 THR_COMP_NEAREST_NEARESTL2A,
142 THR_COMP_NEAREST_NEARESTL3A,
143 THR_COMP_NEAREST_NEARESTGA,
144 THR_COMP_NEAREST_NEARESTLB,
145 THR_COMP_NEAREST_NEARESTL2B,
146 THR_COMP_NEAREST_NEARESTL3B,
147 THR_COMP_NEAREST_NEARESTGB,
148 THR_COMP_NEAREST_NEARESTLA2,
149 THR_COMP_NEAREST_NEARESTL2A2,
150 THR_COMP_NEAREST_NEARESTL3A2,
151 THR_COMP_NEAREST_NEARESTGA2,
152 THR_COMP_NEAREST_NEARESTLL2,
153 THR_COMP_NEAREST_NEARESTLL3,
154 THR_COMP_NEAREST_NEARESTLG,
155 THR_COMP_NEAREST_NEARESTBA,
156
157 THR_COMP_NEAR_NEARLB,
158 THR_COMP_NEW_NEWLB,
159 THR_COMP_NEW_NEARESTLB,
160 THR_COMP_NEAREST_NEWLB,
161 THR_COMP_NEW_NEARLB,
162 THR_COMP_NEAR_NEWLB,
163 THR_COMP_GLOBAL_GLOBALLB,
164
165 THR_COMP_NEAR_NEARLA,
166 THR_COMP_NEW_NEWLA,
167 THR_COMP_NEW_NEARESTLA,
168 THR_COMP_NEAREST_NEWLA,
169 THR_COMP_NEW_NEARLA,
170 THR_COMP_NEAR_NEWLA,
171 THR_COMP_GLOBAL_GLOBALLA,
172
173 THR_COMP_NEAR_NEARL2A,
174 THR_COMP_NEW_NEWL2A,
175 THR_COMP_NEW_NEARESTL2A,
176 THR_COMP_NEAREST_NEWL2A,
177 THR_COMP_NEW_NEARL2A,
178 THR_COMP_NEAR_NEWL2A,
179 THR_COMP_GLOBAL_GLOBALL2A,
180
181 THR_COMP_NEAR_NEARL3A,
182 THR_COMP_NEW_NEWL3A,
183 THR_COMP_NEW_NEARESTL3A,
184 THR_COMP_NEAREST_NEWL3A,
185 THR_COMP_NEW_NEARL3A,
186 THR_COMP_NEAR_NEWL3A,
187 THR_COMP_GLOBAL_GLOBALL3A,
188
189 THR_COMP_NEAR_NEARGA,
190 THR_COMP_NEW_NEWGA,
191 THR_COMP_NEW_NEARESTGA,
192 THR_COMP_NEAREST_NEWGA,
193 THR_COMP_NEW_NEARGA,
194 THR_COMP_NEAR_NEWGA,
195 THR_COMP_GLOBAL_GLOBALGA,
196
197 THR_COMP_NEAR_NEARL2B,
198 THR_COMP_NEW_NEWL2B,
199 THR_COMP_NEW_NEARESTL2B,
200 THR_COMP_NEAREST_NEWL2B,
201 THR_COMP_NEW_NEARL2B,
202 THR_COMP_NEAR_NEWL2B,
203 THR_COMP_GLOBAL_GLOBALL2B,
204
205 THR_COMP_NEAR_NEARL3B,
206 THR_COMP_NEW_NEWL3B,
207 THR_COMP_NEW_NEARESTL3B,
208 THR_COMP_NEAREST_NEWL3B,
209 THR_COMP_NEW_NEARL3B,
210 THR_COMP_NEAR_NEWL3B,
211 THR_COMP_GLOBAL_GLOBALL3B,
212
213 THR_COMP_NEAR_NEARGB,
214 THR_COMP_NEW_NEWGB,
215 THR_COMP_NEW_NEARESTGB,
216 THR_COMP_NEAREST_NEWGB,
217 THR_COMP_NEW_NEARGB,
218 THR_COMP_NEAR_NEWGB,
219 THR_COMP_GLOBAL_GLOBALGB,
220
221 THR_COMP_NEAR_NEARLA2,
222 THR_COMP_NEW_NEWLA2,
223 THR_COMP_NEW_NEARESTLA2,
224 THR_COMP_NEAREST_NEWLA2,
225 THR_COMP_NEW_NEARLA2,
226 THR_COMP_NEAR_NEWLA2,
227 THR_COMP_GLOBAL_GLOBALLA2,
228
229 THR_COMP_NEAR_NEARL2A2,
230 THR_COMP_NEW_NEWL2A2,
231 THR_COMP_NEW_NEARESTL2A2,
232 THR_COMP_NEAREST_NEWL2A2,
233 THR_COMP_NEW_NEARL2A2,
234 THR_COMP_NEAR_NEWL2A2,
235 THR_COMP_GLOBAL_GLOBALL2A2,
236
237 THR_COMP_NEAR_NEARL3A2,
238 THR_COMP_NEW_NEWL3A2,
239 THR_COMP_NEW_NEARESTL3A2,
240 THR_COMP_NEAREST_NEWL3A2,
241 THR_COMP_NEW_NEARL3A2,
242 THR_COMP_NEAR_NEWL3A2,
243 THR_COMP_GLOBAL_GLOBALL3A2,
244
245 THR_COMP_NEAR_NEARGA2,
246 THR_COMP_NEW_NEWGA2,
247 THR_COMP_NEW_NEARESTGA2,
248 THR_COMP_NEAREST_NEWGA2,
249 THR_COMP_NEW_NEARGA2,
250 THR_COMP_NEAR_NEWGA2,
251 THR_COMP_GLOBAL_GLOBALGA2,
252
253 THR_COMP_NEAR_NEARLL2,
254 THR_COMP_NEW_NEWLL2,
255 THR_COMP_NEW_NEARESTLL2,
256 THR_COMP_NEAREST_NEWLL2,
257 THR_COMP_NEW_NEARLL2,
258 THR_COMP_NEAR_NEWLL2,
259 THR_COMP_GLOBAL_GLOBALLL2,
260
261 THR_COMP_NEAR_NEARLL3,
262 THR_COMP_NEW_NEWLL3,
263 THR_COMP_NEW_NEARESTLL3,
264 THR_COMP_NEAREST_NEWLL3,
265 THR_COMP_NEW_NEARLL3,
266 THR_COMP_NEAR_NEWLL3,
267 THR_COMP_GLOBAL_GLOBALLL3,
268
269 THR_COMP_NEAR_NEARLG,
270 THR_COMP_NEW_NEWLG,
271 THR_COMP_NEW_NEARESTLG,
272 THR_COMP_NEAREST_NEWLG,
273 THR_COMP_NEW_NEARLG,
274 THR_COMP_NEAR_NEWLG,
275 THR_COMP_GLOBAL_GLOBALLG,
276
277 THR_COMP_NEAR_NEARBA,
278 THR_COMP_NEW_NEWBA,
279 THR_COMP_NEW_NEARESTBA,
280 THR_COMP_NEAREST_NEWBA,
281 THR_COMP_NEW_NEARBA,
282 THR_COMP_NEAR_NEWBA,
283 THR_COMP_GLOBAL_GLOBALBA,
284
285 THR_DC,
286 THR_PAETH,
287 THR_SMOOTH,
288 THR_SMOOTH_V,
289 THR_SMOOTH_H,
290 THR_H_PRED,
291 THR_V_PRED,
292 THR_D135_PRED,
293 THR_D203_PRED,
294 THR_D157_PRED,
295 THR_D67_PRED,
296 THR_D113_PRED,
297 THR_D45_PRED,
298 };
299
300 /*!\cond */
301 typedef struct SingleInterModeState {
302 int64_t rd;
303 MV_REFERENCE_FRAME ref_frame;
304 int valid;
305 } SingleInterModeState;
306
307 typedef struct InterModeSearchState {
308 int64_t best_rd;
309 int64_t best_skip_rd[2];
310 MB_MODE_INFO best_mbmode;
311 int best_rate_y;
312 int best_rate_uv;
313 int best_mode_skippable;
314 int best_skip2;
315 THR_MODES best_mode_index;
316 int num_available_refs;
317 int64_t dist_refs[REF_FRAMES];
318 int dist_order_refs[REF_FRAMES];
319 int64_t mode_threshold[MAX_MODES];
320 int64_t best_intra_rd;
321 unsigned int best_pred_sse;
322
323 /*!
324 * \brief Keep track of best intra rd for use in compound mode.
325 */
326 int64_t best_pred_rd[REFERENCE_MODES];
327 // Save a set of single_newmv for each checked ref_mv.
328 int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329 int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330 int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332 // The rd of simple translation in single inter modes
333 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334 int64_t best_single_rd[REF_FRAMES];
335 PREDICTION_MODE best_single_mode[REF_FRAMES];
336
337 // Single search results by [directions][modes][reference frames]
338 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339 int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341 [FWD_REFS];
342 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344 IntraModeSearchState intra_search_state;
345 RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352 md->ready = 0;
353 md->num = 0;
354 md->dist_sum = 0;
355 md->ld_sum = 0;
356 md->sse_sum = 0;
357 md->sse_sse_sum = 0;
358 md->sse_ld_sum = 0;
359 }
360 }
361
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363 int64_t sse, int *est_residue_cost,
364 int64_t *est_dist) {
365 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366 if (md->ready) {
367 if (sse < md->dist_mean) {
368 *est_residue_cost = 0;
369 *est_dist = sse;
370 } else {
371 *est_dist = (int64_t)round(md->dist_mean);
372 const double est_ld = md->a * sse + md->b;
373 // Clamp estimated rate cost by INT_MAX / 2.
374 // TODO([email protected]): find better solution than clamping.
375 if (fabs(est_ld) < 1e-2) {
376 *est_residue_cost = INT_MAX / 2;
377 } else {
378 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379 if (est_residue_cost_dbl < 0) {
380 *est_residue_cost = 0;
381 } else {
382 *est_residue_cost =
383 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384 }
385 }
386 if (*est_residue_cost <= 0) {
387 *est_residue_cost = 0;
388 *est_dist = sse;
389 }
390 }
391 return 1;
392 }
393 return 0;
394 }
395
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398 const int block_idx = inter_mode_data_block_idx(bsize);
399 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400 if (block_idx == -1) continue;
401 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402 continue;
403 } else {
404 if (md->ready == 0) {
405 md->dist_mean = md->dist_sum / md->num;
406 md->ld_mean = md->ld_sum / md->num;
407 md->sse_mean = md->sse_sum / md->num;
408 md->sse_sse_mean = md->sse_sse_sum / md->num;
409 md->sse_ld_mean = md->sse_ld_sum / md->num;
410 } else {
411 const double factor = 3;
412 md->dist_mean =
413 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414 md->ld_mean =
415 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416 md->sse_mean =
417 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418 md->sse_sse_mean =
419 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420 (factor + 1);
421 md->sse_ld_mean =
422 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423 (factor + 1);
424 }
425
426 const double my = md->ld_mean;
427 const double mx = md->sse_mean;
428 const double dx = sqrt(md->sse_sse_mean);
429 const double dxy = md->sse_ld_mean;
430
431 md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432 md->b = my - md->a * mx;
433 md->ready = 1;
434
435 md->num = 0;
436 md->dist_sum = 0;
437 md->ld_sum = 0;
438 md->sse_sum = 0;
439 md->sse_sse_sum = 0;
440 md->sse_ld_sum = 0;
441 }
442 (void)rdmult;
443 }
444 }
445
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static inline void inter_mode_data_push(TileDataEnc *tile_data,
447 BLOCK_SIZE bsize, int64_t sse,
448 int64_t dist, int residue_cost) {
449 if (residue_cost == 0 || sse == dist) return;
450 const int block_idx = inter_mode_data_block_idx(bsize);
451 if (block_idx == -1) return;
452 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454 const double ld = (sse - dist) * 1. / residue_cost;
455 ++rd_model->num;
456 rd_model->dist_sum += dist;
457 rd_model->ld_sum += ld;
458 rd_model->sse_sum += sse;
459 rd_model->sse_sse_sum += (double)sse * (double)sse;
460 rd_model->sse_ld_sum += sse * ld;
461 }
462 }
463
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
465 int mode_rate, int64_t sse, int64_t rd,
466 RD_STATS *rd_cost, RD_STATS *rd_cost_y,
467 RD_STATS *rd_cost_uv,
468 const MB_MODE_INFO *mbmi) {
469 const int num = inter_modes_info->num;
470 assert(num < MAX_INTER_MODES);
471 inter_modes_info->mbmi_arr[num] = *mbmi;
472 inter_modes_info->mode_rate_arr[num] = mode_rate;
473 inter_modes_info->sse_arr[num] = sse;
474 inter_modes_info->est_rd_arr[num] = rd;
475 inter_modes_info->rd_cost_arr[num] = *rd_cost;
476 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
477 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
478 ++inter_modes_info->num;
479 }
480
compare_rd_idx_pair(const void * a,const void * b)481 static int compare_rd_idx_pair(const void *a, const void *b) {
482 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
483 // To avoid inconsistency in qsort() ordering when two elements are equal,
484 // using idx as tie breaker. Refer aomedia:2928
485 if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
486 return 0;
487 else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
488 return 1;
489 else
490 return -1;
491 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
492 return 1;
493 } else {
494 return -1;
495 }
496 }
497
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)498 static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
499 RdIdxPair *rd_idx_pair_arr) {
500 if (inter_modes_info->num == 0) {
501 return;
502 }
503 for (int i = 0; i < inter_modes_info->num; ++i) {
504 rd_idx_pair_arr[i].idx = i;
505 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
506 }
507 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
508 compare_rd_idx_pair);
509 }
510
511 // Similar to get_horver_correlation, but also takes into account first
512 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)513 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
514 int width, int height, float *hcorr,
515 float *vcorr) {
516 // The following notation is used:
517 // x - current pixel
518 // y - left neighbor pixel
519 // z - top neighbor pixel
520 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
521 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
522 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
523
524 // First, process horizontal correlation on just the first row
525 x_sum += diff[0];
526 x2_sum += diff[0] * diff[0];
527 x_firstrow += diff[0];
528 x2_firstrow += diff[0] * diff[0];
529 for (int j = 1; j < width; ++j) {
530 const int16_t x = diff[j];
531 const int16_t y = diff[j - 1];
532 x_sum += x;
533 x_firstrow += x;
534 x2_sum += x * x;
535 x2_firstrow += x * x;
536 xy_sum += x * y;
537 }
538
539 // Process vertical correlation in the first column
540 x_firstcol += diff[0];
541 x2_firstcol += diff[0] * diff[0];
542 for (int i = 1; i < height; ++i) {
543 const int16_t x = diff[i * stride];
544 const int16_t z = diff[(i - 1) * stride];
545 x_sum += x;
546 x_firstcol += x;
547 x2_sum += x * x;
548 x2_firstcol += x * x;
549 xz_sum += x * z;
550 }
551
552 // Now process horiz and vert correlation through the rest unit
553 for (int i = 1; i < height; ++i) {
554 for (int j = 1; j < width; ++j) {
555 const int16_t x = diff[i * stride + j];
556 const int16_t y = diff[i * stride + j - 1];
557 const int16_t z = diff[(i - 1) * stride + j];
558 x_sum += x;
559 x2_sum += x * x;
560 xy_sum += x * y;
561 xz_sum += x * z;
562 }
563 }
564
565 for (int j = 0; j < width; ++j) {
566 x_finalrow += diff[(height - 1) * stride + j];
567 x2_finalrow +=
568 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
569 }
570 for (int i = 0; i < height; ++i) {
571 x_finalcol += diff[i * stride + width - 1];
572 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
573 }
574
575 int64_t xhor_sum = x_sum - x_finalcol;
576 int64_t xver_sum = x_sum - x_finalrow;
577 int64_t y_sum = x_sum - x_firstcol;
578 int64_t z_sum = x_sum - x_firstrow;
579 int64_t x2hor_sum = x2_sum - x2_finalcol;
580 int64_t x2ver_sum = x2_sum - x2_finalrow;
581 int64_t y2_sum = x2_sum - x2_firstcol;
582 int64_t z2_sum = x2_sum - x2_firstrow;
583
584 const float num_hor = (float)(height * (width - 1));
585 const float num_ver = (float)((height - 1) * width);
586
587 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
588 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
589
590 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
591 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
592
593 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
594 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
595
596 if (xhor_var_n > 0 && y_var_n > 0) {
597 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
598 *hcorr = *hcorr < 0 ? 0 : *hcorr;
599 } else {
600 *hcorr = 1.0;
601 }
602 if (xver_var_n > 0 && z_var_n > 0) {
603 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
604 *vcorr = *vcorr < 0 ? 0 : *vcorr;
605 } else {
606 *vcorr = 1.0;
607 }
608 }
609
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)610 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
611 int64_t *sse_y) {
612 const AV1_COMMON *cm = &cpi->common;
613 const int num_planes = av1_num_planes(cm);
614 const MACROBLOCKD *xd = &x->e_mbd;
615 const MB_MODE_INFO *mbmi = xd->mi[0];
616 int64_t total_sse = 0;
617 for (int plane = 0; plane < num_planes; ++plane) {
618 if (plane && !xd->is_chroma_ref) break;
619 const struct macroblock_plane *const p = &x->plane[plane];
620 const struct macroblockd_plane *const pd = &xd->plane[plane];
621 const BLOCK_SIZE bs =
622 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
623 unsigned int sse;
624
625 cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
626 pd->dst.stride, &sse);
627 total_sse += sse;
628 if (!plane && sse_y) *sse_y = sse;
629 }
630 total_sse <<= 4;
631 return total_sse;
632 }
633
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)634 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
635 intptr_t block_size, int64_t *ssz) {
636 int i;
637 int64_t error = 0, sqcoeff = 0;
638
639 for (i = 0; i < block_size; i++) {
640 const int diff = coeff[i] - dqcoeff[i];
641 error += diff * diff;
642 sqcoeff += coeff[i] * coeff[i];
643 }
644
645 *ssz = sqcoeff;
646 return error;
647 }
648
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)649 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
650 intptr_t block_size) {
651 int64_t error = 0;
652
653 for (int i = 0; i < block_size; i++) {
654 const int diff = coeff[i] - dqcoeff[i];
655 error += diff * diff;
656 }
657
658 return error;
659 }
660
661 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)662 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
663 const tran_low_t *dqcoeff, intptr_t block_size,
664 int64_t *ssz, int bd) {
665 int i;
666 int64_t error = 0, sqcoeff = 0;
667 int shift = 2 * (bd - 8);
668 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
669
670 for (i = 0; i < block_size; i++) {
671 const int64_t diff = coeff[i] - dqcoeff[i];
672 error += diff * diff;
673 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
674 }
675 assert(error >= 0 && sqcoeff >= 0);
676 error = (error + rounding) >> shift;
677 sqcoeff = (sqcoeff + rounding) >> shift;
678
679 *ssz = sqcoeff;
680 return error;
681 }
682 #endif
683
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)684 static int conditional_skipintra(PREDICTION_MODE mode,
685 PREDICTION_MODE best_intra_mode) {
686 if (mode == D113_PRED && best_intra_mode != V_PRED &&
687 best_intra_mode != D135_PRED)
688 return 1;
689 if (mode == D67_PRED && best_intra_mode != V_PRED &&
690 best_intra_mode != D45_PRED)
691 return 1;
692 if (mode == D203_PRED && best_intra_mode != H_PRED &&
693 best_intra_mode != D45_PRED)
694 return 1;
695 if (mode == D157_PRED && best_intra_mode != H_PRED &&
696 best_intra_mode != D135_PRED)
697 return 1;
698 return 0;
699 }
700
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)701 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
702 int16_t mode_context) {
703 if (is_inter_compound_mode(mode)) {
704 return mode_costs
705 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
706 }
707
708 int mode_cost = 0;
709 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
710
711 assert(is_inter_mode(mode));
712
713 if (mode == NEWMV) {
714 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
715 return mode_cost;
716 } else {
717 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
718 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
719
720 if (mode == GLOBALMV) {
721 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
722 return mode_cost;
723 } else {
724 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
725 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
726 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
727 return mode_cost;
728 }
729 }
730 }
731
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)732 static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
733 int ref_idx) {
734 return ref_idx ? compound_ref1_mode(this_mode)
735 : compound_ref0_mode(this_mode);
736 }
737
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])738 static inline void estimate_ref_frame_costs(
739 const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
740 int segment_id, unsigned int *ref_costs_single,
741 unsigned int (*ref_costs_comp)[REF_FRAMES]) {
742 int seg_ref_active =
743 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
744 if (seg_ref_active) {
745 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
746 int ref_frame;
747 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
748 memset(ref_costs_comp[ref_frame], 0,
749 REF_FRAMES * sizeof((*ref_costs_comp)[0]));
750 } else {
751 int intra_inter_ctx = av1_get_intra_inter_context(xd);
752 ref_costs_single[INTRA_FRAME] =
753 mode_costs->intra_inter_cost[intra_inter_ctx][0];
754 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
755
756 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
757 ref_costs_single[i] = base_cost;
758
759 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
760 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
761 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
762 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
763 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
764 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
765
766 // Determine cost of a single ref frame, where frame types are represented
767 // by a tree:
768 // Level 0: add cost whether this ref is a forward or backward ref
769 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
770 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
774 ref_costs_single[ALTREF2_FRAME] +=
775 mode_costs->single_ref_cost[ctx_p1][0][1];
776 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
777
778 // Level 1: if this ref is forward ref,
779 // add cost whether it is last/last2 or last3/golden
780 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
781 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
783 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784
785 // Level 1: if this ref is backward ref
786 // then add cost whether this ref is altref or backward ref
787 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
788 ref_costs_single[ALTREF2_FRAME] +=
789 mode_costs->single_ref_cost[ctx_p2][1][0];
790 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
791
792 // Level 2: further add cost whether this ref is last or last2
793 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
794 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
795
796 // Level 2: last3 or golden
797 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
798 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
799
800 // Level 2: bwdref or altref2
801 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
802 ref_costs_single[ALTREF2_FRAME] +=
803 mode_costs->single_ref_cost[ctx_p6][5][1];
804
805 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
806 // Similar to single ref, determine cost of compound ref frames.
807 // cost_compound_refs = cost_first_ref + cost_second_ref
808 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
809 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
810 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
811 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
812 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
813
814 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
815 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
816
817 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
818 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
819 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
820 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
821 ref_bicomp_costs[ALTREF_FRAME] = 0;
822
823 // cost of first ref frame
824 ref_bicomp_costs[LAST_FRAME] +=
825 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
826 ref_bicomp_costs[LAST2_FRAME] +=
827 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
828 ref_bicomp_costs[LAST3_FRAME] +=
829 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
830 ref_bicomp_costs[GOLDEN_FRAME] +=
831 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
832
833 ref_bicomp_costs[LAST_FRAME] +=
834 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
835 ref_bicomp_costs[LAST2_FRAME] +=
836 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
837
838 ref_bicomp_costs[LAST3_FRAME] +=
839 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
840 ref_bicomp_costs[GOLDEN_FRAME] +=
841 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
842
843 // cost of second ref frame
844 ref_bicomp_costs[BWDREF_FRAME] +=
845 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
846 ref_bicomp_costs[ALTREF2_FRAME] +=
847 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
848 ref_bicomp_costs[ALTREF_FRAME] +=
849 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
850
851 ref_bicomp_costs[BWDREF_FRAME] +=
852 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
853 ref_bicomp_costs[ALTREF2_FRAME] +=
854 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
855
856 // cost: if one ref frame is forward ref, the other ref is backward ref
857 int ref0, ref1;
858 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
859 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
860 ref_costs_comp[ref0][ref1] =
861 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
862 }
863 }
864
865 // cost: if both ref frames are the same side.
866 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
867 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
868 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
869 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
870 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
871 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
872 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
873 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
874 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
875 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
876 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
877 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
878 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
879 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
880 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
881 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
882 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
883 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
884 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
885 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
886 } else {
887 int ref0, ref1;
888 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
889 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
890 ref_costs_comp[ref0][ref1] = 512;
891 }
892 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
893 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
894 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
895 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
896 }
897 }
898 }
899
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)900 static inline void store_coding_context(
901 #if CONFIG_INTERNAL_STATS
902 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
903 #else
904 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
905 #endif // CONFIG_INTERNAL_STATS
906 int skippable) {
907 MACROBLOCKD *const xd = &x->e_mbd;
908
909 // Take a snapshot of the coding context so it can be
910 // restored if we decide to encode this way
911 ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
912 ctx->skippable = skippable;
913 #if CONFIG_INTERNAL_STATS
914 ctx->best_mode_index = mode_index;
915 #endif // CONFIG_INTERNAL_STATS
916 ctx->mic = *xd->mi[0];
917 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
918 av1_ref_frame_type(xd->mi[0]->ref_frame));
919 }
920
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])921 static inline void setup_buffer_ref_mvs_inter(
922 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
923 BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
924 const AV1_COMMON *cm = &cpi->common;
925 const int num_planes = av1_num_planes(cm);
926 const YV12_BUFFER_CONFIG *scaled_ref_frame =
927 av1_get_scaled_ref_frame(cpi, ref_frame);
928 MACROBLOCKD *const xd = &x->e_mbd;
929 MB_MODE_INFO *const mbmi = xd->mi[0];
930 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
931 const struct scale_factors *const sf =
932 get_ref_scale_factors_const(cm, ref_frame);
933 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
934 assert(yv12 != NULL);
935
936 if (scaled_ref_frame) {
937 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
938 // support scaling.
939 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
940 num_planes);
941 } else {
942 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
943 }
944
945 // Gets an initial list of candidate vectors from neighbours and orders them
946 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
947 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
948 mbmi_ext->mode_context);
949 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
950 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
951 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
952 // Further refinement that is encode side only to test the top few candidates
953 // in full and choose the best as the center point for subsequent searches.
954 // The current implementation doesn't support scaling.
955 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
956 ref_frame, block_size);
957
958 // Go back to unscaled reference.
959 if (scaled_ref_frame) {
960 // We had temporarily setup pred block based on scaled reference above. Go
961 // back to unscaled reference now, for subsequent use.
962 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
963 }
964 }
965
966 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
967 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968
969 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)970 static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
971 const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
972 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
973 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
974 xd->mb_to_bottom_edge +
975 RIGHT_BOTTOM_MARGIN };
976 clamp_mv(mv, &mv_limits);
977 }
978
979 /* If the current mode shares the same mv with other modes with higher cost,
980 * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)981 static int skip_repeated_mv(const AV1_COMMON *const cm,
982 const MACROBLOCK *const x,
983 PREDICTION_MODE this_mode,
984 const MV_REFERENCE_FRAME ref_frames[2],
985 InterModeSearchState *search_state) {
986 const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
987 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
988 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
989 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
990 PREDICTION_MODE compare_mode = MB_MODE_COUNT;
991 if (!is_comp_pred) {
992 if (this_mode == NEARMV) {
993 if (ref_mv_count == 0) {
994 // NEARMV has the same motion vector as NEARESTMV
995 compare_mode = NEARESTMV;
996 }
997 if (ref_mv_count == 1 &&
998 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
999 // NEARMV has the same motion vector as GLOBALMV
1000 compare_mode = GLOBALMV;
1001 }
1002 }
1003 if (this_mode == GLOBALMV) {
1004 if (ref_mv_count == 0 &&
1005 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1006 // GLOBALMV has the same motion vector as NEARESTMV
1007 compare_mode = NEARESTMV;
1008 }
1009 if (ref_mv_count == 1) {
1010 // GLOBALMV has the same motion vector as NEARMV
1011 compare_mode = NEARMV;
1012 }
1013 }
1014
1015 if (compare_mode != MB_MODE_COUNT) {
1016 // Use modelled_rd to check whether compare mode was searched
1017 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1018 INT64_MAX) {
1019 const int16_t mode_ctx =
1020 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1021 const int compare_cost =
1022 cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1023 const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1024
1025 // Only skip if the mode cost is larger than compare mode cost
1026 if (this_cost > compare_cost) {
1027 search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1028 search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1029 return 1;
1030 }
1031 }
1032 }
1033 }
1034 return 0;
1035 }
1036
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1037 static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1038 const AV1_COMMON *cm,
1039 const MACROBLOCK *x) {
1040 const MACROBLOCKD *const xd = &x->e_mbd;
1041 *out_mv = in_mv;
1042 lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1043 cm->features.cur_frame_force_integer_mv);
1044 clamp_mv2(&out_mv->as_mv, xd);
1045 return av1_is_fullmv_in_range(&x->mv_limits,
1046 get_fullmv_from_mv(&out_mv->as_mv));
1047 }
1048
1049 // To use single newmv directly for compound modes, need to clamp the mv to the
1050 // valid mv range. Without this, encoder would generate out of range mv, and
1051 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1052 static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1053 int ref_idx) {
1054 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1055 SubpelMvLimits mv_limits;
1056
1057 av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1058 clamp_mv(&mv->as_mv, &mv_limits);
1059 }
1060
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1061 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1062 const BLOCK_SIZE bsize, int_mv *cur_mv,
1063 int *const rate_mv, HandleInterModeArgs *const args,
1064 inter_mode_info *mode_info) {
1065 MACROBLOCKD *const xd = &x->e_mbd;
1066 MB_MODE_INFO *const mbmi = xd->mi[0];
1067 const int is_comp_pred = has_second_ref(mbmi);
1068 const PREDICTION_MODE this_mode = mbmi->mode;
1069 const int refs[2] = { mbmi->ref_frame[0],
1070 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1071 const int ref_mv_idx = mbmi->ref_mv_idx;
1072
1073 if (is_comp_pred) {
1074 const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1075 const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1076 if (this_mode == NEW_NEWMV) {
1077 if (valid_mv0) {
1078 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1079 clamp_mv_in_range(x, &cur_mv[0], 0);
1080 }
1081 if (valid_mv1) {
1082 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1083 clamp_mv_in_range(x, &cur_mv[1], 1);
1084 }
1085 *rate_mv = 0;
1086 for (int i = 0; i < 2; ++i) {
1087 const int_mv ref_mv = av1_get_ref_mv(x, i);
1088 *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1089 x->mv_costs->nmv_joint_cost,
1090 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1091 }
1092 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1093 if (valid_mv1) {
1094 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1095 clamp_mv_in_range(x, &cur_mv[1], 1);
1096 }
1097 const int_mv ref_mv = av1_get_ref_mv(x, 1);
1098 *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1099 x->mv_costs->nmv_joint_cost,
1100 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1101 } else {
1102 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1103 if (valid_mv0) {
1104 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1105 clamp_mv_in_range(x, &cur_mv[0], 0);
1106 }
1107 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1108 *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1109 x->mv_costs->nmv_joint_cost,
1110 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1111 }
1112 } else {
1113 // Single ref case.
1114 const int ref_idx = 0;
1115 int search_range = INT_MAX;
1116
1117 if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1118 const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1119 int min_mv_diff = INT_MAX;
1120 int best_match = -1;
1121 MV prev_ref_mv[2] = { { 0 } };
1122 for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1123 prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1124 idx, &x->mbmi_ext)
1125 .as_mv;
1126 const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1127 abs(ref_mv.col - prev_ref_mv[idx].col));
1128
1129 if (min_mv_diff > ref_mv_diff) {
1130 min_mv_diff = ref_mv_diff;
1131 best_match = idx;
1132 }
1133 }
1134
1135 if (min_mv_diff < (16 << 3)) {
1136 if (args->single_newmv_valid[best_match][refs[0]]) {
1137 search_range = min_mv_diff;
1138 search_range +=
1139 AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1140 prev_ref_mv[best_match].row),
1141 abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1142 prev_ref_mv[best_match].col));
1143 // Get full pixel search range.
1144 search_range = (search_range + 4) >> 3;
1145 }
1146 }
1147 }
1148
1149 int_mv best_mv;
1150 av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1151 mode_info, &best_mv, args);
1152 if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1153
1154 args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1155 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1156 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1157 cur_mv[0].as_int = best_mv.as_int;
1158
1159 // Return after single_newmv is set.
1160 if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1161 }
1162
1163 return 0;
1164 }
1165
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1166 static inline void update_mode_start_end_index(
1167 const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1168 int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1169 int interintra_allowed, int eval_motion_mode) {
1170 *mode_index_start = (int)SIMPLE_TRANSLATION;
1171 *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1172 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1173 if (!eval_motion_mode) {
1174 *mode_index_end = (int)SIMPLE_TRANSLATION;
1175 } else {
1176 // Set the start index appropriately to process motion modes other than
1177 // simple translation
1178 *mode_index_start = 1;
1179 }
1180 }
1181 if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1182 *mode_index_end = SIMPLE_TRANSLATION;
1183 }
1184
1185 /*!\brief AV1 motion mode search
1186 *
1187 * \ingroup inter_mode_search
1188 * Function to search over and determine the motion mode. It will update
1189 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1190 * WARPED_CAUSAL and determine any necessary side information for the selected
1191 * motion mode. It will also perform the full transform search, unless the
1192 * input parameter do_tx_search indicates to do an estimation of the RD rather
1193 * than an RD corresponding to a full transform search. It will return the
1194 * RD for the final motion_mode.
1195 * Do the RD search for a given inter mode and compute all information relevant
1196 * to the input mode. It will compute the best MV,
1197 * compound parameters (if the mode is a compound mode) and interpolation filter
1198 * parameters.
1199 *
1200 * \param[in] cpi Top-level encoder structure.
1201 * \param[in] tile_data Pointer to struct holding adaptive
1202 * data/contexts/models for the tile during
1203 * encoding.
1204 * \param[in] x Pointer to struct holding all the data for
1205 * the current macroblock.
1206 * \param[in] bsize Current block size.
1207 * \param[in,out] rd_stats Struct to keep track of the overall RD
1208 * information.
1209 * \param[in,out] rd_stats_y Struct to keep track of the RD information
1210 * for only the Y plane.
1211 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
1212 * for only the UV planes.
1213 * \param[in] args HandleInterModeArgs struct holding
1214 * miscellaneous arguments for inter mode
1215 * search. See the documentation for this
1216 * struct for a description of each member.
1217 * \param[in] ref_best_rd Best RD found so far for this block.
1218 * It is used for early termination of this
1219 * search if the RD exceeds this value.
1220 * \param[in,out] ref_skip_rd A length 2 array, where skip_rd[0] is the
1221 * best total RD for a skip mode so far, and
1222 * skip_rd[1] is the best RD for a skip mode so
1223 * far in luma. This is used as a speed feature
1224 * to skip the transform search if the computed
1225 * skip RD for the current mode is not better
1226 * than the best skip_rd so far.
1227 * \param[in,out] rate_mv The rate associated with the motion vectors.
1228 * This will be modified if a motion search is
1229 * done in the motion mode search.
1230 * \param[in,out] orig_dst A prediction buffer to hold a computed
1231 * prediction. This will eventually hold the
1232 * final prediction, and the tmp_dst info will
1233 * be copied here.
1234 * \param[in,out] best_est_rd Estimated RD for motion mode search if
1235 * do_tx_search (see below) is 0.
1236 * \param[in] do_tx_search Parameter to indicate whether or not to do
1237 * a full transform search. This will compute
1238 * an estimated RD for the modes without the
1239 * transform search and later perform the full
1240 * transform search on the best candidates.
1241 * \param[in] inter_modes_info InterModesInfo struct to hold inter mode
1242 * information to perform a full transform
1243 * search only on winning candidates searched
1244 * with an estimate for transform coding RD.
1245 * \param[in] eval_motion_mode Boolean whether or not to evaluate motion
1246 * motion modes other than SIMPLE_TRANSLATION.
1247 * \param[out] yrd Stores the rdcost corresponding to encoding
1248 * the luma plane.
1249 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1250 * current motion mode being tested should be skipped. It returns 0 if the
1251 * motion mode search is a success.
1252 */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1253 static int64_t motion_mode_rd(
1254 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1255 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1256 RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1257 int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1258 int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1259 int eval_motion_mode, int64_t *yrd) {
1260 const AV1_COMMON *const cm = &cpi->common;
1261 const FeatureFlags *const features = &cm->features;
1262 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1263 const int num_planes = av1_num_planes(cm);
1264 MACROBLOCKD *xd = &x->e_mbd;
1265 MB_MODE_INFO *mbmi = xd->mi[0];
1266 const int is_comp_pred = has_second_ref(mbmi);
1267 const PREDICTION_MODE this_mode = mbmi->mode;
1268 const int rate2_nocoeff = rd_stats->rate;
1269 int best_xskip_txfm = 0;
1270 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1271 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1272 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1273 const int rate_mv0 = *rate_mv;
1274 const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1275 is_interintra_allowed(mbmi) &&
1276 mbmi->compound_idx;
1277 WARP_SAMPLE_INFO *const warp_sample_info =
1278 &x->warp_sample_info[mbmi->ref_frame[0]];
1279 int *pts0 = warp_sample_info->pts;
1280 int *pts_inref0 = warp_sample_info->pts_inref;
1281
1282 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1283 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1284 av1_invalid_rd_stats(&best_rd_stats);
1285 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1
1286 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1287 *yrd = INT64_MAX;
1288 if (features->switchable_motion_mode) {
1289 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1290 // is allowed.
1291 last_motion_mode_allowed = motion_mode_allowed(
1292 xd->global_motion, xd, mbmi, features->allow_warped_motion);
1293 }
1294
1295 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1296 // Collect projection samples used in least squares approximation of
1297 // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1298 if (warp_sample_info->num < 0) {
1299 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1300 }
1301 mbmi->num_proj_ref = warp_sample_info->num;
1302 }
1303 const int total_samples = mbmi->num_proj_ref;
1304 if (total_samples == 0) {
1305 // Do not search WARPED_CAUSAL if there are no samples to use to determine
1306 // warped parameters.
1307 last_motion_mode_allowed = OBMC_CAUSAL;
1308 }
1309
1310 const MB_MODE_INFO base_mbmi = *mbmi;
1311 MB_MODE_INFO best_mbmi;
1312 const int interp_filter = features->interp_filter;
1313 const int switchable_rate =
1314 av1_is_interp_needed(xd)
1315 ? av1_get_switchable_rate(x, xd, interp_filter,
1316 cm->seq_params->enable_dual_filter)
1317 : 0;
1318 int64_t best_rd = INT64_MAX;
1319 int best_rate_mv = rate_mv0;
1320 const int mi_row = xd->mi_row;
1321 const int mi_col = xd->mi_col;
1322 int mode_index_start, mode_index_end;
1323 const int txfm_rd_gate_level =
1324 get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1325 cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1326 TX_SEARCH_MOTION_MODE, eval_motion_mode);
1327
1328 // Modify the start and end index according to speed features. For example,
1329 // if SIMPLE_TRANSLATION has already been searched according to
1330 // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1331 // to avoid searching it again.
1332 update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1333 last_motion_mode_allowed, interintra_allowed,
1334 eval_motion_mode);
1335 // Main function loop. This loops over all of the possible motion modes and
1336 // computes RD to determine the best one. This process includes computing
1337 // any necessary side information for the motion mode and performing the
1338 // transform search.
1339 for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1340 mode_index++) {
1341 if (args->skip_motion_mode && mode_index) continue;
1342 int tmp_rate2 = rate2_nocoeff;
1343 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1344 int tmp_rate_mv = rate_mv0;
1345
1346 *mbmi = base_mbmi;
1347 if (is_interintra_mode) {
1348 // Only use SIMPLE_TRANSLATION for interintra
1349 mbmi->motion_mode = SIMPLE_TRANSLATION;
1350 } else {
1351 mbmi->motion_mode = (MOTION_MODE)mode_index;
1352 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1353 }
1354
1355 // Do not search OBMC if the probability of selecting it is below a
1356 // predetermined threshold for this update_type and block size.
1357 const FRAME_UPDATE_TYPE update_type =
1358 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1359 int use_actual_frame_probs = 1;
1360 int prune_obmc;
1361 #if CONFIG_FPMT_TEST
1362 use_actual_frame_probs =
1363 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1364 if (!use_actual_frame_probs) {
1365 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1366 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1367 }
1368 #endif
1369 if (use_actual_frame_probs) {
1370 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1371 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1372 }
1373 if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1374 mbmi->motion_mode == OBMC_CAUSAL)
1375 continue;
1376
1377 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1378 // SIMPLE_TRANSLATION mode: no need to recalculate.
1379 // The prediction is calculated before motion_mode_rd() is called in
1380 // handle_inter_mode()
1381 } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1382 const uint32_t cur_mv = mbmi->mv[0].as_int;
1383 // OBMC_CAUSAL not allowed for compound prediction
1384 assert(!is_comp_pred);
1385 if (have_newmv_in_inter_mode(this_mode)) {
1386 av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1387 &mbmi->mv[0], NULL);
1388 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1389 }
1390 if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1391 // Build the predictor according to the current motion vector if it has
1392 // not already been built
1393 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1394 0, av1_num_planes(cm) - 1);
1395 }
1396 // Build the inter predictor by blending the predictor corresponding to
1397 // this MV, and the neighboring blocks using the OBMC model
1398 av1_build_obmc_inter_prediction(
1399 cm, xd, args->above_pred_buf, args->above_pred_stride,
1400 args->left_pred_buf, args->left_pred_stride);
1401 #if !CONFIG_REALTIME_ONLY
1402 } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1403 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1404 mbmi->motion_mode = WARPED_CAUSAL;
1405 mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1406 mbmi->interp_filters =
1407 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1408
1409 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1410 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1411 // Select the samples according to motion vector difference
1412 if (mbmi->num_proj_ref > 1) {
1413 mbmi->num_proj_ref = av1_selectSamples(
1414 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1415 }
1416
1417 // Compute the warped motion parameters with a least squares fit
1418 // using the collected samples
1419 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1420 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1421 &mbmi->wm_params, mi_row, mi_col)) {
1422 assert(!is_comp_pred);
1423 if (have_newmv_in_inter_mode(this_mode)) {
1424 // Refine MV for NEWMV mode
1425 const int_mv mv0 = mbmi->mv[0];
1426 const WarpedMotionParams wm_params0 = mbmi->wm_params;
1427 const int num_proj_ref0 = mbmi->num_proj_ref;
1428
1429 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1430 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1431 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1432 &ref_mv.as_mv, NULL);
1433
1434 // Refine MV in a small range.
1435 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1436 total_samples, cpi->sf.mv_sf.warp_search_method,
1437 cpi->sf.mv_sf.warp_search_iters);
1438
1439 if (mv0.as_int != mbmi->mv[0].as_int) {
1440 // Keep the refined MV and WM parameters.
1441 tmp_rate_mv = av1_mv_bit_cost(
1442 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1443 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1444 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1445 } else {
1446 // Restore the old MV and WM parameters.
1447 mbmi->mv[0] = mv0;
1448 mbmi->wm_params = wm_params0;
1449 mbmi->num_proj_ref = num_proj_ref0;
1450 }
1451 }
1452
1453 // Build the warped predictor
1454 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1455 av1_num_planes(cm) - 1);
1456 } else {
1457 continue;
1458 }
1459 #endif // !CONFIG_REALTIME_ONLY
1460 } else if (is_interintra_mode) {
1461 const int ret =
1462 av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1463 &tmp_rate_mv, &tmp_rate2, orig_dst);
1464 if (ret < 0) continue;
1465 }
1466
1467 // If we are searching newmv and the mv is the same as refmv, skip the
1468 // current mode
1469 if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1470
1471 // Update rd_stats for the current motion mode
1472 txfm_info->skip_txfm = 0;
1473 rd_stats->dist = 0;
1474 rd_stats->sse = 0;
1475 rd_stats->skip_txfm = 1;
1476 rd_stats->rate = tmp_rate2;
1477 const ModeCosts *mode_costs = &x->mode_costs;
1478 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1479 if (interintra_allowed) {
1480 rd_stats->rate +=
1481 mode_costs->interintra_cost[size_group_lookup[bsize]]
1482 [mbmi->ref_frame[1] == INTRA_FRAME];
1483 }
1484 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1485 (mbmi->ref_frame[1] != INTRA_FRAME)) {
1486 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1487 rd_stats->rate +=
1488 mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1489 } else {
1490 rd_stats->rate +=
1491 mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1492 }
1493 }
1494
1495 int64_t this_yrd = INT64_MAX;
1496
1497 if (!do_tx_search) {
1498 // Avoid doing a transform search here to speed up the overall mode
1499 // search. It will be done later in the mode search if the current
1500 // motion mode seems promising.
1501 int64_t curr_sse = -1;
1502 int64_t sse_y = -1;
1503 int est_residue_cost = 0;
1504 int64_t est_dist = 0;
1505 int64_t est_rd = 0;
1506 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1507 curr_sse = get_sse(cpi, x, &sse_y);
1508 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1509 &est_residue_cost, &est_dist);
1510 (void)has_est_rd;
1511 assert(has_est_rd);
1512 } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1513 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1514 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1515 cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1516 NULL, &curr_sse, NULL, NULL, NULL);
1517 sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1518 }
1519 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1520 if (est_rd * 0.80 > *best_est_rd) {
1521 mbmi->ref_frame[1] = ref_frame_1;
1522 continue;
1523 }
1524 const int mode_rate = rd_stats->rate;
1525 rd_stats->rate += est_residue_cost;
1526 rd_stats->dist = est_dist;
1527 rd_stats->rdcost = est_rd;
1528 if (rd_stats->rdcost < *best_est_rd) {
1529 *best_est_rd = rd_stats->rdcost;
1530 assert(sse_y >= 0);
1531 ref_skip_rd[1] = txfm_rd_gate_level
1532 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1533 : INT64_MAX;
1534 }
1535 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1536 if (!is_comp_pred) {
1537 assert(curr_sse >= 0);
1538 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1539 rd_stats->rdcost, rd_stats, rd_stats_y,
1540 rd_stats_uv, mbmi);
1541 }
1542 } else {
1543 assert(curr_sse >= 0);
1544 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1545 rd_stats->rdcost, rd_stats, rd_stats_y,
1546 rd_stats_uv, mbmi);
1547 }
1548 mbmi->skip_txfm = 0;
1549 } else {
1550 // Perform full transform search
1551 int64_t skip_rd = INT64_MAX;
1552 int64_t skip_rdy = INT64_MAX;
1553 if (txfm_rd_gate_level) {
1554 // Check if the mode is good enough based on skip RD
1555 int64_t sse_y = INT64_MAX;
1556 int64_t curr_sse = get_sse(cpi, x, &sse_y);
1557 skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1558 skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1559 int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1560 txfm_rd_gate_level, 0);
1561 if (!eval_txfm) continue;
1562 }
1563
1564 // Do transform search
1565 const int mode_rate = rd_stats->rate;
1566 if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1567 rd_stats->rate, ref_best_rd)) {
1568 if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1569 return INT64_MAX;
1570 }
1571 continue;
1572 }
1573 const int skip_ctx = av1_get_skip_txfm_context(xd);
1574 const int y_rate =
1575 rd_stats->skip_txfm
1576 ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1577 : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1578 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1579
1580 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1581 if (curr_rd < ref_best_rd) {
1582 ref_best_rd = curr_rd;
1583 ref_skip_rd[0] = skip_rd;
1584 ref_skip_rd[1] = skip_rdy;
1585 }
1586 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1587 inter_mode_data_push(
1588 tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1589 rd_stats_y->rate + rd_stats_uv->rate +
1590 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1591 }
1592 }
1593
1594 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1595 if (is_nontrans_global_motion(xd, xd->mi[0])) {
1596 mbmi->interp_filters =
1597 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1598 }
1599 }
1600
1601 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1602 if (mode_index == 0) {
1603 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1604 }
1605 if (mode_index == 0 || tmp_rd < best_rd) {
1606 // Update best_rd data if this is the best motion mode so far
1607 best_mbmi = *mbmi;
1608 best_rd = tmp_rd;
1609 best_rd_stats = *rd_stats;
1610 best_rd_stats_y = *rd_stats_y;
1611 best_rate_mv = tmp_rate_mv;
1612 *yrd = this_yrd;
1613 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1614 memcpy(best_blk_skip, txfm_info->blk_skip,
1615 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1616 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1617 best_xskip_txfm = mbmi->skip_txfm;
1618 }
1619 }
1620 // Update RD and mbmi stats for selected motion mode
1621 mbmi->ref_frame[1] = ref_frame_1;
1622 *rate_mv = best_rate_mv;
1623 if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1624 av1_invalid_rd_stats(rd_stats);
1625 restore_dst_buf(xd, *orig_dst, num_planes);
1626 return INT64_MAX;
1627 }
1628 *mbmi = best_mbmi;
1629 *rd_stats = best_rd_stats;
1630 *rd_stats_y = best_rd_stats_y;
1631 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1632 memcpy(txfm_info->blk_skip, best_blk_skip,
1633 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1634 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1635 txfm_info->skip_txfm = best_xskip_txfm;
1636
1637 restore_dst_buf(xd, *orig_dst, num_planes);
1638 return 0;
1639 }
1640
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t best_rd)1641 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1642 MACROBLOCK *const x, BLOCK_SIZE bsize,
1643 const BUFFER_SET *const orig_dst, int64_t best_rd) {
1644 assert(bsize < BLOCK_SIZES_ALL);
1645 const AV1_COMMON *cm = &cpi->common;
1646 const int num_planes = av1_num_planes(cm);
1647 MACROBLOCKD *const xd = &x->e_mbd;
1648 const int mi_row = xd->mi_row;
1649 const int mi_col = xd->mi_col;
1650 int64_t total_sse = 0;
1651 int64_t this_rd = INT64_MAX;
1652 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1653 rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1654
1655 for (int plane = 0; plane < num_planes; ++plane) {
1656 // Call av1_enc_build_inter_predictor() for one plane at a time.
1657 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1658 plane, plane);
1659 const struct macroblockd_plane *const pd = &xd->plane[plane];
1660 const BLOCK_SIZE plane_bsize =
1661 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1662
1663 av1_subtract_plane(x, plane_bsize, plane);
1664
1665 int64_t sse =
1666 av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1667 if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1668 sse <<= 4;
1669 total_sse += sse;
1670 // When current rd cost is more than the best rd, skip evaluation of
1671 // remaining planes.
1672 this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1673 if (this_rd > best_rd) break;
1674 }
1675
1676 rd_stats->dist = rd_stats->sse = total_sse;
1677 rd_stats->rdcost = this_rd;
1678
1679 restore_dst_buf(xd, *orig_dst, num_planes);
1680 return 0;
1681 }
1682
1683 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1684 // mode
1685 // Note(rachelbarker): This speed feature currently does not interact correctly
1686 // with global motion. The issue is that, when global motion is used, GLOBALMV
1687 // produces a different prediction to NEARESTMV/NEARMV even if the motion
1688 // vectors are the same. Thus GLOBALMV should not be pruned in this case.
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1689 static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1690 int ref_idx,
1691 const MV_REFERENCE_FRAME *ref_frame,
1692 PREDICTION_MODE single_mode) {
1693 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1694 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1695 assert(single_mode != NEWMV);
1696 if (single_mode == NEARESTMV) {
1697 return 0;
1698 } else if (single_mode == NEARMV) {
1699 // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1700 // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1701 if (ref_mv_count < 2) return 1;
1702 } else if (single_mode == GLOBALMV) {
1703 // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1704 if (ref_mv_count == 0) return 1;
1705 // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1706 else if (ref_mv_count == 1)
1707 return 0;
1708
1709 int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1710 // Check GLOBALMV is matching with any mv in ref_mv_stack
1711 for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1712 int_mv this_mv;
1713
1714 if (ref_idx == 0)
1715 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1716 else
1717 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1718
1719 if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1720 return 1;
1721 }
1722 }
1723 return 0;
1724 }
1725
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1726 static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1727 int ref_idx, int ref_mv_idx,
1728 int skip_repeated_ref_mv,
1729 const MV_REFERENCE_FRAME *ref_frame,
1730 const MB_MODE_INFO_EXT *mbmi_ext) {
1731 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1732 assert(is_inter_singleref_mode(single_mode));
1733 if (single_mode == NEWMV) {
1734 this_mv->as_int = INVALID_MV;
1735 } else if (single_mode == GLOBALMV) {
1736 if (skip_repeated_ref_mv &&
1737 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1738 return 0;
1739 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1740 } else {
1741 assert(single_mode == NEARMV || single_mode == NEARESTMV);
1742 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1743 const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1744 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1745 assert(ref_mv_offset >= 0);
1746 if (ref_idx == 0) {
1747 *this_mv =
1748 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1749 } else {
1750 *this_mv =
1751 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1752 }
1753 } else {
1754 if (skip_repeated_ref_mv &&
1755 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1756 return 0;
1757 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1758 }
1759 }
1760 return 1;
1761 }
1762
1763 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1764 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type,PREDICTION_MODE best_mode)1765 static inline int skip_nearest_near_mv_using_refmv_weight(
1766 const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1767 const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
1768 if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1769 // Do not skip the mode if the current block has not yet obtained a valid
1770 // inter mode.
1771 if (!is_inter_mode(best_mode)) return 0;
1772
1773 const MACROBLOCKD *xd = &x->e_mbd;
1774 // Do not skip the mode if both the top and left neighboring blocks are not
1775 // available.
1776 if (!xd->left_available || !xd->up_available) return 0;
1777 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1778 const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1779 const int ref_mv_count =
1780 AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1781
1782 if (ref_mv_count == 0) return 0;
1783 // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1784 if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1785
1786 // Count number of ref mvs populated from nearest candidates
1787 int nearest_refmv_count = 0;
1788 for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1789 if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1790 }
1791
1792 // nearest_refmv_count indicates the closeness of block motion characteristics
1793 // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1794 // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1795 // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1796 // mode since these modes work well for blocks that shares similar motion
1797 // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1798 // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1799 // mode is pruned if none of the ref mvs are populated from nearest candidate.
1800 const int prune_thresh = 1 + (ref_mv_count >= 2);
1801 if (nearest_refmv_count < prune_thresh) return 1;
1802 return 0;
1803 }
1804
1805 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1806 static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1807 const AV1_COMMON *cm, const MACROBLOCK *x,
1808 int skip_repeated_ref_mv) {
1809 const MACROBLOCKD *xd = &x->e_mbd;
1810 const MB_MODE_INFO *mbmi = xd->mi[0];
1811 const int is_comp_pred = has_second_ref(mbmi);
1812
1813 int ret = 1;
1814 for (int i = 0; i < is_comp_pred + 1; ++i) {
1815 int_mv this_mv;
1816 this_mv.as_int = INVALID_MV;
1817 ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1818 skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1819 if (!ret) return 0;
1820 const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1821 if (single_mode == NEWMV) {
1822 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1823 cur_mv[i] =
1824 (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1825 .this_mv
1826 : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1827 .comp_mv;
1828 } else {
1829 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1830 }
1831 }
1832 return ret;
1833 }
1834
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1835 static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
1836 const MB_MODE_INFO_EXT *mbmi_ext,
1837 const int (*const drl_mode_cost0)[2],
1838 int8_t ref_frame_type) {
1839 int cost = 0;
1840 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1841 for (int idx = 0; idx < 2; ++idx) {
1842 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1843 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1844 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1845 if (mbmi->ref_mv_idx == idx) return cost;
1846 }
1847 }
1848 return cost;
1849 }
1850
1851 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1852 for (int idx = 1; idx < 3; ++idx) {
1853 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1854 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1855 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1856 if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1857 }
1858 }
1859 return cost;
1860 }
1861 return cost;
1862 }
1863
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1864 static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
1865 const MB_MODE_INFO *const mbmi,
1866 PREDICTION_MODE this_mode) {
1867 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1868 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1869 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1870 if (single_mode == NEWMV &&
1871 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1872 return 0;
1873 }
1874 }
1875 return 1;
1876 }
1877
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1878 static int get_drl_refmv_count(const MACROBLOCK *const x,
1879 const MV_REFERENCE_FRAME *ref_frame,
1880 PREDICTION_MODE mode) {
1881 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1882 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1883 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1884 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1885 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1886 const int has_drl =
1887 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1888 const int ref_set =
1889 has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1890
1891 return ref_set;
1892 }
1893
1894 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1895 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1896 const int qindex,
1897 const int ref_mv_idx) {
1898 if (reduce_inter_modes >= 3) return 1;
1899 // Q-index logic based pruning is enabled only for
1900 // reduce_inter_modes = 2.
1901 assert(reduce_inter_modes == 2);
1902 // When reduce_inter_modes=2, pruning happens as below based on q index.
1903 // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1904 // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1905 // For q index range between 171 and 255: no pruning.
1906 const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1907 return (ref_mv_idx >= min_prune_ref_mv_idx);
1908 }
1909
1910 // Whether this reference motion vector can be skipped, based on initial
1911 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1912 static bool ref_mv_idx_early_breakout(
1913 const SPEED_FEATURES *const sf,
1914 const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1915 const HandleInterModeArgs *const args, int64_t ref_best_rd,
1916 int ref_mv_idx) {
1917 MACROBLOCKD *xd = &x->e_mbd;
1918 MB_MODE_INFO *mbmi = xd->mi[0];
1919 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1920 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1921 const int is_comp_pred = has_second_ref(mbmi);
1922 if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1923 if (mbmi->ref_frame[0] == LAST2_FRAME ||
1924 mbmi->ref_frame[0] == LAST3_FRAME ||
1925 mbmi->ref_frame[1] == LAST2_FRAME ||
1926 mbmi->ref_frame[1] == LAST3_FRAME) {
1927 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1928 if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1929 REF_CAT_LEVEL) {
1930 return true;
1931 }
1932 }
1933 // TODO(any): Experiment with reduce_inter_modes for compound prediction
1934 if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1935 have_newmv_in_inter_mode(mbmi->mode)) {
1936 if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1937 mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1938 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1939 const int do_prune = prune_ref_mv_idx_using_qindex(
1940 sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1941 if (do_prune &&
1942 (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1943 REF_CAT_LEVEL)) {
1944 return true;
1945 }
1946 }
1947 }
1948 }
1949
1950 mbmi->ref_mv_idx = ref_mv_idx;
1951 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1952 return true;
1953 }
1954 size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1955 const int drl_cost = get_drl_cost(
1956 mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1957 est_rd_rate += drl_cost;
1958 if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1959 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1960 return true;
1961 }
1962 return false;
1963 }
1964
1965 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1966 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1967 RD_STATS *rd_stats,
1968 HandleInterModeArgs *args,
1969 int ref_mv_idx, int64_t ref_best_rd,
1970 BLOCK_SIZE bsize) {
1971 MACROBLOCKD *xd = &x->e_mbd;
1972 MB_MODE_INFO *mbmi = xd->mi[0];
1973 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1974 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1975 const AV1_COMMON *cm = &cpi->common;
1976 const int is_comp_pred = has_second_ref(mbmi);
1977 const ModeCosts *mode_costs = &x->mode_costs;
1978
1979 struct macroblockd_plane *p = xd->plane;
1980 const BUFFER_SET orig_dst = {
1981 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1982 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1983 };
1984 av1_init_rd_stats(rd_stats);
1985
1986 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1987 mbmi->comp_group_idx = 0;
1988 mbmi->compound_idx = 1;
1989 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1990 mbmi->ref_frame[1] = NONE_FRAME;
1991 }
1992 int16_t mode_ctx =
1993 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1994
1995 mbmi->num_proj_ref = 0;
1996 mbmi->motion_mode = SIMPLE_TRANSLATION;
1997 mbmi->ref_mv_idx = ref_mv_idx;
1998
1999 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
2000 const int drl_cost =
2001 get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2002 rd_stats->rate += drl_cost;
2003
2004 int_mv cur_mv[2];
2005 if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2006 return INT64_MAX;
2007 }
2008 assert(have_nearmv_in_inter_mode(mbmi->mode));
2009 for (int i = 0; i < is_comp_pred + 1; ++i) {
2010 mbmi->mv[i].as_int = cur_mv[i].as_int;
2011 }
2012 const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2013 rd_stats->rate += ref_mv_cost;
2014
2015 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2016 return INT64_MAX;
2017 }
2018
2019 mbmi->motion_mode = SIMPLE_TRANSLATION;
2020 mbmi->num_proj_ref = 0;
2021 if (is_comp_pred) {
2022 // Only compound_average
2023 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2024 mbmi->comp_group_idx = 0;
2025 mbmi->compound_idx = 1;
2026 }
2027 set_default_interp_filters(mbmi, cm->features.interp_filter);
2028
2029 const int mi_row = xd->mi_row;
2030 const int mi_col = xd->mi_col;
2031 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2032 AOM_PLANE_Y, AOM_PLANE_Y);
2033 int est_rate;
2034 int64_t est_dist;
2035 model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2036 NULL, NULL, NULL, NULL, NULL);
2037 return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2038 }
2039
2040 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2041 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2042 // it is included.
mask_set_bit(int * mask,int index)2043 static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2044
mask_check_bit(int mask,int index)2045 static inline bool mask_check_bit(int mask, int index) {
2046 return (mask >> index) & 0x1;
2047 }
2048
2049 // Before performing the full MV search in handle_inter_mode, do a simple
2050 // translation search and see if we can eliminate any motion vectors.
2051 // Returns an integer where, if the i-th bit is set, it means that the i-th
2052 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2053 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2054 RD_STATS *rd_stats,
2055 HandleInterModeArgs *const args,
2056 int64_t ref_best_rd, BLOCK_SIZE bsize,
2057 const int ref_set) {
2058 // If the number of ref mv count is equal to 1, do not prune the same. It
2059 // is better to evaluate the same than to prune it.
2060 if (ref_set == 1) return 1;
2061 AV1_COMMON *const cm = &cpi->common;
2062 const MACROBLOCKD *const xd = &x->e_mbd;
2063 const MB_MODE_INFO *const mbmi = xd->mi[0];
2064 const PREDICTION_MODE this_mode = mbmi->mode;
2065
2066 // Only search indices if they have some chance of being good.
2067 int good_indices = 0;
2068 for (int i = 0; i < ref_set; ++i) {
2069 if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2070 ref_best_rd, i)) {
2071 continue;
2072 }
2073 mask_set_bit(&good_indices, i);
2074 }
2075
2076 // Only prune in NEARMV mode, if the speed feature is set, and the block size
2077 // is large enough. If these conditions are not met, return all good indices
2078 // found so far.
2079 if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2080 return good_indices;
2081 if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2082 if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2083 // Do not prune when there is internal resizing. TODO(elliottk) fix this
2084 // so b/2384 can be resolved.
2085 if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2086 (mbmi->ref_frame[1] > 0 &&
2087 av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2088 return good_indices;
2089 }
2090
2091 // Calculate the RD cost for the motion vectors using simple translation.
2092 int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2093 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2094 // If this index is bad, ignore it.
2095 if (!mask_check_bit(good_indices, ref_mv_idx)) {
2096 continue;
2097 }
2098 idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2099 cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2100 }
2101 // Find the index with the best RD cost.
2102 int best_idx = 0;
2103 for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2104 if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2105 best_idx = i;
2106 }
2107 }
2108 // Only include indices that are good and within a % of the best.
2109 const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2110 // If the simple translation cost is not within this multiple of the
2111 // best RD, skip it. Note that the cutoff is derived experimentally.
2112 const double ref_dth = 5;
2113 int result = 0;
2114 for (int i = 0; i < ref_set; ++i) {
2115 if (mask_check_bit(good_indices, i) &&
2116 (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2117 (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2118 mask_set_bit(&result, i);
2119 }
2120 }
2121 return result;
2122 }
2123
2124 /*!\brief Motion mode information for inter mode search speedup.
2125 *
2126 * Used in a speed feature to search motion modes other than
2127 * SIMPLE_TRANSLATION only on winning candidates.
2128 */
2129 typedef struct motion_mode_candidate {
2130 /*!
2131 * Mode info for the motion mode candidate.
2132 */
2133 MB_MODE_INFO mbmi;
2134 /*!
2135 * Rate describing the cost of the motion vectors for this candidate.
2136 */
2137 int rate_mv;
2138 /*!
2139 * Rate before motion mode search and transform coding is applied.
2140 */
2141 int rate2_nocoeff;
2142 /*!
2143 * An integer value 0 or 1 which indicates whether or not to skip the motion
2144 * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2145 * candidate.
2146 */
2147 int skip_motion_mode;
2148 /*!
2149 * Total RD cost for this candidate.
2150 */
2151 int64_t rd_cost;
2152 } motion_mode_candidate;
2153
2154 /*!\cond */
2155 typedef struct motion_mode_best_st_candidate {
2156 motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2157 int num_motion_mode_cand;
2158 } motion_mode_best_st_candidate;
2159
2160 // Checks if the current reference frame matches with neighbouring block's
2161 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2162 static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2163 MB_MODE_INFO *nb_mbmi) {
2164 MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2165 nb_mbmi->ref_frame[1] };
2166 MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2167 cur_mbmi->ref_frame[1] };
2168 const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2169 int match_found = 0;
2170
2171 for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2172 if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2173 (cur_ref_frames[i] == nb_ref_frames[1]))
2174 match_found = 1;
2175 }
2176 return match_found;
2177 }
2178
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2179 static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2180 MACROBLOCKD *xd) {
2181 if (!xd->up_available) return 1;
2182 const int mi_col = xd->mi_col;
2183 MB_MODE_INFO **cur_mbmi = xd->mi;
2184 // prev_row_mi points into the mi array, starting at the beginning of the
2185 // previous row.
2186 MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2187 const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2188 uint8_t mi_step;
2189 for (int above_mi_col = mi_col; above_mi_col < end_col;
2190 above_mi_col += mi_step) {
2191 MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2192 mi_step = mi_size_wide[above_mi[0]->bsize];
2193 int match_found = 0;
2194 if (is_inter_block(*above_mi))
2195 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2196 if (match_found) return 1;
2197 }
2198 return 0;
2199 }
2200
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2201 static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2202 MACROBLOCKD *xd) {
2203 if (!xd->left_available) return 1;
2204 const int mi_row = xd->mi_row;
2205 MB_MODE_INFO **cur_mbmi = xd->mi;
2206 // prev_col_mi points into the mi array, starting at the top of the
2207 // previous column
2208 MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2209 const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2210 uint8_t mi_step;
2211 for (int left_mi_row = mi_row; left_mi_row < end_row;
2212 left_mi_row += mi_step) {
2213 MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2214 mi_step = mi_size_high[left_mi[0]->bsize];
2215 int match_found = 0;
2216 if (is_inter_block(*left_mi))
2217 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2218 if (match_found) return 1;
2219 }
2220 return 0;
2221 }
2222 /*!\endcond */
2223
2224 /*! \brief Struct used to hold TPL data to
2225 * narrow down parts of the inter mode search.
2226 */
2227 typedef struct {
2228 /*!
2229 * The best inter cost out of all of the reference frames.
2230 */
2231 int64_t best_inter_cost;
2232 /*!
2233 * The inter cost for each reference frame.
2234 */
2235 int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2236 } PruneInfoFromTpl;
2237
2238 #if !CONFIG_REALTIME_ONLY
2239 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2240 static inline void get_block_level_tpl_stats(
2241 AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2242 PruneInfoFromTpl *inter_cost_info_from_tpl) {
2243 AV1_COMMON *const cm = &cpi->common;
2244
2245 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2246 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2247 const int tpl_idx = cpi->gf_frame_index;
2248 TplParams *const tpl_data = &cpi->ppi->tpl_data;
2249 if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2250 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2251 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2252 const int mi_wide = mi_size_wide[bsize];
2253 const int mi_high = mi_size_high[bsize];
2254 const int tpl_stride = tpl_frame->stride;
2255 const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2256 const int mi_col_sr =
2257 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2258 const int mi_col_end_sr =
2259 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2260 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2261
2262 const int row_step = step;
2263 const int col_step_sr =
2264 coded_to_superres_mi(step, cm->superres_scale_denominator);
2265 for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2266 row += row_step) {
2267 for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2268 col += col_step_sr) {
2269 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2270 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2271
2272 // Sums up the inter cost of corresponding ref frames
2273 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2274 inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2275 this_stats->pred_error[ref_idx];
2276 }
2277 }
2278 }
2279
2280 // Computes the best inter cost (minimum inter_cost)
2281 int64_t best_inter_cost = INT64_MAX;
2282 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2283 const int64_t cur_inter_cost =
2284 inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2285 // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2286 // calculating the minimum inter_cost
2287 if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2288 valid_refs[ref_idx])
2289 best_inter_cost = cur_inter_cost;
2290 }
2291 inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2292 }
2293 #endif
2294
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2295 static inline int prune_modes_based_on_tpl_stats(
2296 PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2297 const PREDICTION_MODE this_mode, int prune_mode_level) {
2298 const int have_newmv = have_newmv_in_inter_mode(this_mode);
2299 if ((prune_mode_level < 2) && have_newmv) return 0;
2300
2301 const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2302 if (best_inter_cost == INT64_MAX) return 0;
2303
2304 const int prune_level = prune_mode_level - 1;
2305 int64_t cur_inter_cost;
2306
2307 const int is_globalmv =
2308 (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2309 const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2310
2311 // Thresholds used for pruning:
2312 // Lower value indicates aggressive pruning and higher value indicates
2313 // conservative pruning which is set based on ref_mv_idx and speed feature.
2314 // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2315 // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2316 static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2317 { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2318 };
2319
2320 const int is_comp_pred = (refs[1] > INTRA_FRAME);
2321 if (!is_comp_pred) {
2322 cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2323 } else {
2324 const int64_t inter_cost_ref0 =
2325 inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2326 const int64_t inter_cost_ref1 =
2327 inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2328 // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2329 // more aggressive pruning
2330 cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2331 }
2332
2333 // Prune the mode if cur_inter_cost is greater than threshold times
2334 // best_inter_cost
2335 if (cur_inter_cost >
2336 ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2337 best_inter_cost) >>
2338 2))
2339 return 1;
2340 return 0;
2341 }
2342
2343 /*!\brief High level function to select parameters for compound mode.
2344 *
2345 * \ingroup inter_mode_search
2346 * The main search functionality is done in the call to av1_compound_type_rd().
2347 *
2348 * \param[in] cpi Top-level encoder structure.
2349 * \param[in] x Pointer to struct holding all the data for
2350 * the current macroblock.
2351 * \param[in] args HandleInterModeArgs struct holding
2352 * miscellaneous arguments for inter mode
2353 * search. See the documentation for this
2354 * struct for a description of each member.
2355 * \param[in] ref_best_rd Best RD found so far for this block.
2356 * It is used for early termination of this
2357 * search if the RD exceeds this value.
2358 * \param[in,out] cur_mv Current motion vector.
2359 * \param[in] bsize Current block size.
2360 * \param[in,out] compmode_interinter_cost RD of the selected interinter
2361 compound mode.
2362 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2363 * allocated buffers for the compound
2364 * predictors and masks in the compound type
2365 * search.
2366 * \param[in,out] orig_dst A prediction buffer to hold a computed
2367 * prediction. This will eventually hold the
2368 * final prediction, and the tmp_dst info will
2369 * be copied here.
2370 * \param[in] tmp_dst A temporary prediction buffer to hold a
2371 * computed prediction.
2372 * \param[in,out] rate_mv The rate associated with the motion vectors.
2373 * This will be modified if a motion search is
2374 * done in the motion mode search.
2375 * \param[in,out] rd_stats Struct to keep track of the overall RD
2376 * information.
2377 * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the
2378 * best total RD for a skip mode so far, and
2379 * skip_rd[1] is the best RD for a skip mode so
2380 * far in luma. This is used as a speed feature
2381 * to skip the transform search if the computed
2382 * skip RD for the current mode is not better
2383 * than the best skip_rd so far.
2384 * \param[in,out] skip_build_pred Indicates whether or not to build the inter
2385 * predictor. If this is 0, the inter predictor
2386 * has already been built and thus we can avoid
2387 * repeating computation.
2388 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2389 * a viable candidate.
2390 */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2391 static int process_compound_inter_mode(
2392 AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2393 int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2394 int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2395 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2396 RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2397 MACROBLOCKD *xd = &x->e_mbd;
2398 MB_MODE_INFO *mbmi = xd->mi[0];
2399 const AV1_COMMON *cm = &cpi->common;
2400 const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2401 cm->seq_params->enable_masked_compound;
2402 int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2403 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2404
2405 const int num_planes = av1_num_planes(cm);
2406 const int mi_row = xd->mi_row;
2407 const int mi_col = xd->mi_col;
2408 int is_luma_interp_done = 0;
2409 set_default_interp_filters(mbmi, cm->features.interp_filter);
2410
2411 int64_t best_rd_compound;
2412 int64_t rd_thresh;
2413 const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2414 const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2415 rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2416 comp_type_rd_scale);
2417 // Select compound type and any parameters related to that type
2418 // (for example, the mask parameters if it is a masked mode) and compute
2419 // the RD
2420 *compmode_interinter_cost = av1_compound_type_rd(
2421 cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2422 orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2423 ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2424 if (ref_best_rd < INT64_MAX &&
2425 (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2426 ref_best_rd) {
2427 restore_dst_buf(xd, *orig_dst, num_planes);
2428 return 1;
2429 }
2430
2431 // Build only uv predictor for COMPOUND_AVERAGE.
2432 // Note there is no need to call av1_enc_build_inter_predictor
2433 // for luma if COMPOUND_AVERAGE is selected because it is the first
2434 // candidate in av1_compound_type_rd, which means it used the dst_buf
2435 // rather than the tmp_buf.
2436 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2437 if (num_planes > 1) {
2438 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2439 AOM_PLANE_U, num_planes - 1);
2440 }
2441 *skip_build_pred = 1;
2442 }
2443 return 0;
2444 }
2445
2446 // Speed feature to prune out MVs that are similar to previous MVs if they
2447 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2448 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2449 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2450 MB_MODE_INFO *mbmi, int pruning_factor) {
2451 int i;
2452 const int is_comp_pred = has_second_ref(mbmi);
2453 const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2454
2455 // Skip the evaluation if an MV match is found.
2456 if (ref_mv_idx > 0) {
2457 for (int idx = 0; idx < ref_mv_idx; ++idx) {
2458 if (save_mv[idx][0].as_int == INVALID_MV) continue;
2459
2460 int mv_diff = 0;
2461 for (i = 0; i < 1 + is_comp_pred; ++i) {
2462 mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2463 abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2464 }
2465
2466 // If this mode is not the best one, and current MV is similar to
2467 // previous stored MV, terminate this ref_mv_idx evaluation.
2468 if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2469 }
2470 }
2471
2472 if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2473 for (i = 0; i < is_comp_pred + 1; ++i)
2474 save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2475 }
2476
2477 return 0;
2478 }
2479
2480 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2481 *
2482 * \ingroup inter_mode_search
2483 *
2484 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2485 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2486 * Else returns 0.
2487 *
2488 * Note that the sse of here comes from single_motion_search. So it is
2489 * interpolated with the filter in motion search, not the actual interpolation
2490 * filter used in encoding.
2491 *
2492 * \param[in] fn_ptr A table of function pointers to compute SSE.
2493 * \param[in] x Pointer to struct holding all the data for
2494 * the current macroblock.
2495 * \param[in] bsize The current block_size.
2496 * \param[in] args The args to handle_inter_mode, used to track
2497 * the best SSE.
2498 * \param[in] prune_zero_mv_with_sse The argument holds speed feature
2499 * prune_zero_mv_with_sse value
2500 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2501 */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2502 static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2503 const MACROBLOCK *x, BLOCK_SIZE bsize,
2504 const HandleInterModeArgs *args,
2505 int prune_zero_mv_with_sse) {
2506 const MACROBLOCKD *xd = &x->e_mbd;
2507 const MB_MODE_INFO *mbmi = xd->mi[0];
2508
2509 const int is_comp_pred = has_second_ref(mbmi);
2510 const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2511
2512 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2513 if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2514 // Pruning logic only works for IDENTITY type models
2515 // Note: In theory we could apply similar logic for TRANSLATION
2516 // type models, but we do not code these due to a spec bug
2517 // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2518 assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2519 return 0;
2520 }
2521
2522 // Don't prune if we have invalid data
2523 assert(mbmi->mv[idx].as_int == 0);
2524 if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2525 return 0;
2526 }
2527 }
2528
2529 // Sum up the sse of ZEROMV and best NEWMV
2530 unsigned int this_sse_sum = 0;
2531 unsigned int best_sse_sum = 0;
2532 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2533 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2534 const struct macroblockd_plane *pd = xd->plane;
2535 const struct buf_2d *src_buf = &p->src;
2536 const struct buf_2d *ref_buf = &pd->pre[idx];
2537 const uint8_t *src = src_buf->buf;
2538 const uint8_t *ref = ref_buf->buf;
2539 const int src_stride = src_buf->stride;
2540 const int ref_stride = ref_buf->stride;
2541
2542 unsigned int this_sse;
2543 fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2544 this_sse_sum += this_sse;
2545
2546 const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2547 best_sse_sum += best_sse;
2548 }
2549
2550 const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2551 if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2552 return 1;
2553 }
2554
2555 return 0;
2556 }
2557
2558 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2559 *
2560 * \ingroup inter_mode_search
2561 *
2562 * Does a simple interpolation filter search during winner mode evaluation. This
2563 * is currently only used by realtime mode as \ref
2564 * av1_interpolation_filter_search is not called during realtime encoding.
2565 *
2566 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2567 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2568 * higher res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2569 * *
2570 * \param[in] cpi Pointer to the compressor. Used for feature
2571 * flags.
2572 * \param[in,out] x Pointer to macroblock. This is primarily
2573 * used to access the buffers.
2574 * \param[in] mi_row The current row in mi unit (4X4 pixels).
2575 * \param[in] mi_col The current col in mi unit (4X4 pixels).
2576 * \param[in] bsize The current block_size.
2577 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2578 */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2579 static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2580 int mi_row, int mi_col,
2581 BLOCK_SIZE bsize) {
2582 static const InterpFilters filters_ref_set[3] = {
2583 { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2584 { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2585 { MULTITAP_SHARP, MULTITAP_SHARP }
2586 };
2587
2588 const AV1_COMMON *const cm = &cpi->common;
2589 MACROBLOCKD *const xd = &x->e_mbd;
2590 MB_MODE_INFO *const mi = xd->mi[0];
2591 int64_t best_cost = INT64_MAX;
2592 int best_filter_index = -1;
2593 // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2594 const int num_planes = av1_num_planes(cm);
2595 const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2596 assert(is_inter_mode(mi->mode));
2597 assert(mi->motion_mode == SIMPLE_TRANSLATION);
2598 assert(!is_inter_compound_mode(mi->mode));
2599
2600 if (!av1_is_interp_needed(xd)) {
2601 return false;
2602 }
2603
2604 struct macroblockd_plane *pd = xd->plane;
2605 const BUFFER_SET orig_dst = {
2606 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2607 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2608 };
2609 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2610 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2611 tmp_buf + 2 * MAX_SB_SQUARE },
2612 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2613 const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2614
2615 for (int i = 0; i < 3; ++i) {
2616 if (is_240p_or_lesser) {
2617 if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2618 continue;
2619 }
2620 } else {
2621 if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2622 continue;
2623 }
2624 }
2625 int64_t cost;
2626 RD_STATS tmp_rd = { 0 };
2627
2628 mi->interp_filters.as_filters = filters_ref_set[i];
2629 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2630
2631 model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2632 ? MODELRD_LEGACY
2633 : MODELRD_TYPE_INTERP_FILTER](
2634 cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2635 &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2636
2637 tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2638 cm->seq_params->enable_dual_filter);
2639 cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2640 if (cost < best_cost) {
2641 best_filter_index = i;
2642 best_cost = cost;
2643 swap_dst_buf(xd, dst_bufs, num_planes);
2644 }
2645 }
2646 assert(best_filter_index >= 0);
2647
2648 mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2649
2650 const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2651
2652 if (is_best_pred_in_orig) {
2653 swap_dst_buf(xd, dst_bufs, num_planes);
2654 } else {
2655 // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2656 // is_best_pred_in_orig is false, that means the current buffer is the
2657 // original one.
2658 assert(&orig_dst == dst_bufs[0]);
2659 assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2660 const int width = block_size_wide[bsize];
2661 const int height = block_size_high[bsize];
2662 #if CONFIG_AV1_HIGHBITDEPTH
2663 const bool is_hbd = is_cur_buf_hbd(xd);
2664 if (is_hbd) {
2665 aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2666 tmp_dst.stride[AOM_PLANE_Y],
2667 CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2668 orig_dst.stride[AOM_PLANE_Y], width, height);
2669 } else {
2670 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2671 orig_dst.plane[AOM_PLANE_Y],
2672 orig_dst.stride[AOM_PLANE_Y], width, height);
2673 }
2674 #else
2675 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2676 orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2677 width, height);
2678 #endif
2679 }
2680
2681 // Build the YUV predictor.
2682 if (num_planes > 1) {
2683 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2684 AOM_PLANE_U, AOM_PLANE_V);
2685 }
2686
2687 return true;
2688 }
2689
2690 /*!\brief AV1 inter mode RD computation
2691 *
2692 * \ingroup inter_mode_search
2693 * Do the RD search for a given inter mode and compute all information relevant
2694 * to the input mode. It will compute the best MV,
2695 * compound parameters (if the mode is a compound mode) and interpolation filter
2696 * parameters.
2697 *
2698 * \param[in] cpi Top-level encoder structure.
2699 * \param[in] tile_data Pointer to struct holding adaptive
2700 * data/contexts/models for the tile during
2701 * encoding.
2702 * \param[in] x Pointer to structure holding all the data
2703 * for the current macroblock.
2704 * \param[in] bsize Current block size.
2705 * \param[in,out] rd_stats Struct to keep track of the overall RD
2706 * information.
2707 * \param[in,out] rd_stats_y Struct to keep track of the RD information
2708 * for only the Y plane.
2709 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
2710 * for only the UV planes.
2711 * \param[in] args HandleInterModeArgs struct holding
2712 * miscellaneous arguments for inter mode
2713 * search. See the documentation for this
2714 * struct for a description of each member.
2715 * \param[in] ref_best_rd Best RD found so far for this block.
2716 * It is used for early termination of this
2717 * search if the RD exceeds this value.
2718 * \param[in] tmp_buf Temporary buffer used to hold predictors
2719 * built in this search.
2720 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2721 * allocated buffers for the compound
2722 * predictors and masks in the compound type
2723 * search.
2724 * \param[in,out] best_est_rd Estimated RD for motion mode search if
2725 * do_tx_search (see below) is 0.
2726 * \param[in] do_tx_search Parameter to indicate whether or not to do
2727 * a full transform search. This will compute
2728 * an estimated RD for the modes without the
2729 * transform search and later perform the full
2730 * transform search on the best candidates.
2731 * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode
2732 * information to perform a full transform
2733 * search only on winning candidates searched
2734 * with an estimate for transform coding RD.
2735 * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store
2736 * motion mode information used in a speed
2737 * feature to search motion modes other than
2738 * SIMPLE_TRANSLATION only on winning
2739 * candidates.
2740 * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the
2741 * best total RD for a skip mode so far, and
2742 * skip_rd[1] is the best RD for a skip mode so
2743 * far in luma. This is used as a speed feature
2744 * to skip the transform search if the computed
2745 * skip RD for the current mode is not better
2746 * than the best skip_rd so far.
2747 * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2748 * narrow down the search based on data
2749 * collected in the TPL model.
2750 * \param[out] yrd Stores the rdcost corresponding to encoding
2751 * the luma plane.
2752 *
2753 * \return The RD cost for the mode being searched.
2754 */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2755 static int64_t handle_inter_mode(
2756 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2757 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2758 RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2759 uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2760 int64_t *best_est_rd, const int do_tx_search,
2761 InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2762 int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2763 int64_t *yrd) {
2764 const AV1_COMMON *cm = &cpi->common;
2765 const int num_planes = av1_num_planes(cm);
2766 MACROBLOCKD *xd = &x->e_mbd;
2767 MB_MODE_INFO *mbmi = xd->mi[0];
2768 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2769 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2770 const int is_comp_pred = has_second_ref(mbmi);
2771 const PREDICTION_MODE this_mode = mbmi->mode;
2772
2773 #if CONFIG_REALTIME_ONLY
2774 const int prune_modes_based_on_tpl = 0;
2775 #else // CONFIG_REALTIME_ONLY
2776 const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2777 const int prune_modes_based_on_tpl =
2778 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2779 av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2780 #endif // CONFIG_REALTIME_ONLY
2781 int i;
2782 // Reference frames for this mode
2783 const int refs[2] = { mbmi->ref_frame[0],
2784 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2785 int rate_mv = 0;
2786 int64_t rd = INT64_MAX;
2787 // Do first prediction into the destination buffer. Do the next
2788 // prediction into a temporary buffer. Then keep track of which one
2789 // of these currently holds the best predictor, and use the other
2790 // one for future predictions. In the end, copy from tmp_buf to
2791 // dst if necessary.
2792 struct macroblockd_plane *pd = xd->plane;
2793 const BUFFER_SET orig_dst = {
2794 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2795 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2796 };
2797 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2798 tmp_buf + 2 * MAX_SB_SQUARE },
2799 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2800
2801 int64_t ret_val = INT64_MAX;
2802 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2803 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2804 int64_t best_rd = INT64_MAX;
2805 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2806 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2807 int64_t best_yrd = INT64_MAX;
2808 MB_MODE_INFO best_mbmi = *mbmi;
2809 int best_xskip_txfm = 0;
2810 int64_t newmv_ret_val = INT64_MAX;
2811 inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2812
2813 // Do not prune the mode based on inter cost from tpl if the current ref frame
2814 // is the winner ref in neighbouring blocks.
2815 int ref_match_found_in_above_nb = 0;
2816 int ref_match_found_in_left_nb = 0;
2817 if (prune_modes_based_on_tpl) {
2818 ref_match_found_in_above_nb =
2819 find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2820 ref_match_found_in_left_nb =
2821 find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2822 }
2823
2824 // First, perform a simple translation search for each of the indices. If
2825 // an index performs well, it will be fully searched in the main loop
2826 // of this function.
2827 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2828 // Save MV results from first 2 ref_mv_idx.
2829 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2830 int best_ref_mv_idx = -1;
2831 const int idx_mask =
2832 ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2833 const int16_t mode_ctx =
2834 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2835 const ModeCosts *mode_costs = &x->mode_costs;
2836 const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2837 const int base_rate =
2838 args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2839
2840 for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2841 save_mv[i][0].as_int = INVALID_MV;
2842 save_mv[i][1].as_int = INVALID_MV;
2843 }
2844 args->start_mv_cnt = 0;
2845
2846 // Main loop of this function. This will iterate over all of the ref mvs
2847 // in the dynamic reference list and do the following:
2848 // 1.) Get the current MV. Create newmv MV if necessary
2849 // 2.) Search compound type and parameters if applicable
2850 // 3.) Do interpolation filter search
2851 // 4.) Build the inter predictor
2852 // 5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2853 // WARPED_CAUSAL)
2854 // 6.) Update stats if best so far
2855 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2856 mbmi->ref_mv_idx = ref_mv_idx;
2857
2858 mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2859 mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2860 const int drl_cost = get_drl_cost(
2861 mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2862 mode_info[ref_mv_idx].drl_cost = drl_cost;
2863 mode_info[ref_mv_idx].skip = 0;
2864
2865 if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2866 // MV did not perform well in simple translation search. Skip it.
2867 continue;
2868 }
2869 if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2870 !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2871 // Skip mode if TPL model indicates it will not be beneficial.
2872 if (prune_modes_based_on_tpl_stats(
2873 inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2874 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2875 continue;
2876 }
2877 av1_init_rd_stats(rd_stats);
2878
2879 // Initialize compound mode data
2880 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2881 mbmi->comp_group_idx = 0;
2882 mbmi->compound_idx = 1;
2883 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2884
2885 mbmi->num_proj_ref = 0;
2886 mbmi->motion_mode = SIMPLE_TRANSLATION;
2887
2888 // Compute cost for signalling this DRL index
2889 rd_stats->rate = base_rate;
2890 rd_stats->rate += drl_cost;
2891
2892 int rs = 0;
2893 int compmode_interinter_cost = 0;
2894
2895 int_mv cur_mv[2];
2896
2897 // TODO(Cherma): Extend this speed feature to support compound mode
2898 int skip_repeated_ref_mv =
2899 is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2900 // Generate the current mv according to the prediction mode
2901 if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2902 continue;
2903 }
2904
2905 // The above call to build_cur_mv does not handle NEWMV modes. Build
2906 // the mv here if we have NEWMV for any predictors.
2907 if (have_newmv_in_inter_mode(this_mode)) {
2908 #if CONFIG_COLLECT_COMPONENT_TIMING
2909 start_timing(cpi, handle_newmv_time);
2910 #endif
2911 newmv_ret_val =
2912 handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2913 #if CONFIG_COLLECT_COMPONENT_TIMING
2914 end_timing(cpi, handle_newmv_time);
2915 #endif
2916
2917 if (newmv_ret_val != 0) continue;
2918
2919 if (is_inter_singleref_mode(this_mode) &&
2920 cur_mv[0].as_int != INVALID_MV) {
2921 const MV_REFERENCE_FRAME ref = refs[0];
2922 const unsigned int this_sse = x->pred_sse[ref];
2923 if (this_sse < args->best_single_sse_in_refs[ref]) {
2924 args->best_single_sse_in_refs[ref] = this_sse;
2925 }
2926
2927 if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2928 const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2929 const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2930 const double scale_factor[3][11] = {
2931 { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2932 { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2933 { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2934 };
2935 assert(pix_idx >= 0);
2936 assert(th_idx <= 2);
2937 if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2938 continue;
2939 }
2940 }
2941
2942 rd_stats->rate += rate_mv;
2943 }
2944 // Copy the motion vector for this mode into mbmi struct
2945 for (i = 0; i < is_comp_pred + 1; ++i) {
2946 mbmi->mv[i].as_int = cur_mv[i].as_int;
2947 }
2948
2949 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2950 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2951 continue;
2952 }
2953
2954 // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2955 // is enabled, and the current MV is similar to a previous one.
2956 if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2957 prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2958 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2959 continue;
2960
2961 if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2962 (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2963 if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2964 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2965 continue;
2966 }
2967 }
2968
2969 int skip_build_pred = 0;
2970 const int mi_row = xd->mi_row;
2971 const int mi_col = xd->mi_col;
2972
2973 // Handle a compound predictor, continue if it is determined this
2974 // cannot be the best compound mode
2975 if (is_comp_pred) {
2976 #if CONFIG_COLLECT_COMPONENT_TIMING
2977 start_timing(cpi, compound_type_rd_time);
2978 #endif
2979 const int not_best_mode = process_compound_inter_mode(
2980 cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2981 rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2982 &skip_build_pred);
2983 #if CONFIG_COLLECT_COMPONENT_TIMING
2984 end_timing(cpi, compound_type_rd_time);
2985 #endif
2986 if (not_best_mode) continue;
2987 }
2988
2989 if (!args->skip_ifs) {
2990 #if CONFIG_COLLECT_COMPONENT_TIMING
2991 start_timing(cpi, interpolation_filter_search_time);
2992 #endif
2993 // Determine the interpolation filter for this mode
2994 ret_val = av1_interpolation_filter_search(
2995 x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2996 &skip_build_pred, args, ref_best_rd);
2997 #if CONFIG_COLLECT_COMPONENT_TIMING
2998 end_timing(cpi, interpolation_filter_search_time);
2999 #endif
3000 if (args->modelled_rd != NULL && !is_comp_pred) {
3001 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3002 }
3003 if (ret_val != 0) {
3004 restore_dst_buf(xd, orig_dst, num_planes);
3005 continue;
3006 } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3007 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3008 restore_dst_buf(xd, orig_dst, num_planes);
3009 continue;
3010 }
3011
3012 // Compute modelled RD if enabled
3013 if (args->modelled_rd != NULL) {
3014 if (is_comp_pred) {
3015 const int mode0 = compound_ref0_mode(this_mode);
3016 const int mode1 = compound_ref1_mode(this_mode);
3017 const int64_t mrd =
3018 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3019 args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3020 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3021 restore_dst_buf(xd, orig_dst, num_planes);
3022 continue;
3023 }
3024 }
3025 }
3026 }
3027
3028 rd_stats->rate += compmode_interinter_cost;
3029 if (skip_build_pred != 1) {
3030 // Build this inter predictor if it has not been previously built
3031 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3032 av1_num_planes(cm) - 1);
3033 }
3034
3035 #if CONFIG_COLLECT_COMPONENT_TIMING
3036 start_timing(cpi, motion_mode_rd_time);
3037 #endif
3038 int rate2_nocoeff = rd_stats->rate;
3039 // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3040 // OBMC_CAUSAL or WARPED_CAUSAL
3041 int64_t this_yrd;
3042 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3043 rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3044 &orig_dst, best_est_rd, do_tx_search,
3045 inter_modes_info, 0, &this_yrd);
3046 #if CONFIG_COLLECT_COMPONENT_TIMING
3047 end_timing(cpi, motion_mode_rd_time);
3048 #endif
3049 assert(
3050 IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3051
3052 if (ret_val != INT64_MAX) {
3053 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3054 const THR_MODES mode_enum = get_prediction_mode_idx(
3055 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3056 // Collect mode stats for multiwinner mode processing
3057 store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3058 rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3059 cpi->sf.winner_mode_sf.multi_winner_mode_type,
3060 do_tx_search);
3061 if (tmp_rd < best_rd) {
3062 best_yrd = this_yrd;
3063 // Update the best rd stats if we found the best mode so far
3064 best_rd_stats = *rd_stats;
3065 best_rd_stats_y = *rd_stats_y;
3066 best_rd_stats_uv = *rd_stats_uv;
3067 best_rd = tmp_rd;
3068 best_mbmi = *mbmi;
3069 best_xskip_txfm = txfm_info->skip_txfm;
3070 memcpy(best_blk_skip, txfm_info->blk_skip,
3071 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3072 av1_copy_array(best_tx_type_map, xd->tx_type_map,
3073 xd->height * xd->width);
3074 motion_mode_cand->rate_mv = rate_mv;
3075 motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3076 }
3077
3078 if (tmp_rd < ref_best_rd) {
3079 ref_best_rd = tmp_rd;
3080 best_ref_mv_idx = ref_mv_idx;
3081 }
3082 }
3083 restore_dst_buf(xd, orig_dst, num_planes);
3084 }
3085
3086 if (best_rd == INT64_MAX) return INT64_MAX;
3087
3088 // re-instate status of the best choice
3089 *rd_stats = best_rd_stats;
3090 *rd_stats_y = best_rd_stats_y;
3091 *rd_stats_uv = best_rd_stats_uv;
3092 *yrd = best_yrd;
3093 *mbmi = best_mbmi;
3094 txfm_info->skip_txfm = best_xskip_txfm;
3095 assert(IMPLIES(mbmi->comp_group_idx == 1,
3096 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3097 memcpy(txfm_info->blk_skip, best_blk_skip,
3098 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3099 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3100
3101 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3102
3103 return rd_stats->rdcost;
3104 }
3105
3106 /*!\brief Search for the best intrabc predictor
3107 *
3108 * \ingroup intra_mode_search
3109 * \callergraph
3110 * This function performs a motion search to find the best intrabc predictor.
3111 *
3112 * \returns Returns the best overall rdcost (including the non-intrabc modes
3113 * search before this function).
3114 */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3115 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3116 PICK_MODE_CONTEXT *ctx,
3117 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3118 int64_t best_rd) {
3119 const AV1_COMMON *const cm = &cpi->common;
3120 if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3121 !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3122 return INT64_MAX;
3123 const int num_planes = av1_num_planes(cm);
3124
3125 MACROBLOCKD *const xd = &x->e_mbd;
3126 const TileInfo *tile = &xd->tile;
3127 MB_MODE_INFO *mbmi = xd->mi[0];
3128 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3129
3130 const int mi_row = xd->mi_row;
3131 const int mi_col = xd->mi_col;
3132 const int w = block_size_wide[bsize];
3133 const int h = block_size_high[bsize];
3134 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3135 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3136
3137 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3138 const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3139 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3140 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3141 mbmi_ext->mode_context);
3142 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3143 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3144 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3145 int_mv nearestmv, nearmv;
3146 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3147 0);
3148
3149 if (nearestmv.as_int == INVALID_MV) {
3150 nearestmv.as_int = 0;
3151 }
3152 if (nearmv.as_int == INVALID_MV) {
3153 nearmv.as_int = 0;
3154 }
3155
3156 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3157 if (dv_ref.as_int == 0) {
3158 av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3159 }
3160 // Ref DV should not have sub-pel.
3161 assert((dv_ref.as_mv.col & 7) == 0);
3162 assert((dv_ref.as_mv.row & 7) == 0);
3163 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3164
3165 struct buf_2d yv12_mb[MAX_MB_PLANE];
3166 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3167 for (int i = 0; i < num_planes; ++i) {
3168 xd->plane[i].pre[0] = yv12_mb[i];
3169 }
3170
3171 enum IntrabcMotionDirection {
3172 IBC_MOTION_ABOVE,
3173 IBC_MOTION_LEFT,
3174 IBC_MOTION_DIRECTIONS
3175 };
3176
3177 MB_MODE_INFO best_mbmi = *mbmi;
3178 RD_STATS best_rdstats = *rd_stats;
3179 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3180 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3181 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3182
3183 FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3184 const SEARCH_METHODS search_method =
3185 av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3186 const search_site_config *lookahead_search_sites =
3187 cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3188 const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3189 av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3190 &dv_ref.as_mv, start_mv,
3191 lookahead_search_sites, search_method,
3192 /*fine_search_interval=*/0);
3193 const IntraBCMVCosts *const dv_costs = x->dv_costs;
3194 av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3195
3196 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3197 dir < IBC_MOTION_DIRECTIONS; ++dir) {
3198 switch (dir) {
3199 case IBC_MOTION_ABOVE:
3200 fullms_params.mv_limits.col_min =
3201 (tile->mi_col_start - mi_col) * MI_SIZE;
3202 fullms_params.mv_limits.col_max =
3203 (tile->mi_col_end - mi_col) * MI_SIZE - w;
3204 fullms_params.mv_limits.row_min =
3205 (tile->mi_row_start - mi_row) * MI_SIZE;
3206 fullms_params.mv_limits.row_max =
3207 (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3208 break;
3209 case IBC_MOTION_LEFT:
3210 fullms_params.mv_limits.col_min =
3211 (tile->mi_col_start - mi_col) * MI_SIZE;
3212 fullms_params.mv_limits.col_max =
3213 (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3214 // TODO([email protected]): Minimize the overlap between above and
3215 // left areas.
3216 fullms_params.mv_limits.row_min =
3217 (tile->mi_row_start - mi_row) * MI_SIZE;
3218 int bottom_coded_mi_edge =
3219 AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3220 fullms_params.mv_limits.row_max =
3221 (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3222 break;
3223 default: assert(0);
3224 }
3225 assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3226 assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3227 assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3228 assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3229
3230 av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3231
3232 if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3233 fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3234 continue;
3235 }
3236
3237 const int step_param = cpi->mv_search_params.mv_step_param;
3238 IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3239 int_mv best_mv, best_hash_mv;
3240 FULLPEL_MV_STATS best_mv_stats;
3241
3242 int bestsme =
3243 av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3244 &best_mv.as_fullmv, &best_mv_stats, NULL);
3245 const int hashsme = av1_intrabc_hash_search(
3246 cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3247 if (hashsme < bestsme) {
3248 best_mv = best_hash_mv;
3249 bestsme = hashsme;
3250 }
3251
3252 if (bestsme == INT_MAX) continue;
3253 const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3254 if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3255 get_fullmv_from_mv(&dv)))
3256 continue;
3257 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3258 cm->seq_params->mib_size_log2))
3259 continue;
3260
3261 // DV should not have sub-pel.
3262 assert((dv.col & 7) == 0);
3263 assert((dv.row & 7) == 0);
3264 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3265 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3266 mbmi->use_intrabc = 1;
3267 mbmi->mode = DC_PRED;
3268 mbmi->uv_mode = UV_DC_PRED;
3269 mbmi->motion_mode = SIMPLE_TRANSLATION;
3270 mbmi->mv[0].as_mv = dv;
3271 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3272 mbmi->skip_txfm = 0;
3273 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3274 av1_num_planes(cm) - 1);
3275
3276 // TODO([email protected]): The full motion field defining discount
3277 // in MV_COST_WEIGHT is too large. Explore other values.
3278 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3279 dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3280 const int rate_mode = x->mode_costs.intrabc_cost[1];
3281 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3282 if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3283 &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3284 continue;
3285 rd_stats_yuv.rdcost =
3286 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3287 if (rd_stats_yuv.rdcost < best_rd) {
3288 best_rd = rd_stats_yuv.rdcost;
3289 best_mbmi = *mbmi;
3290 best_rdstats = rd_stats_yuv;
3291 memcpy(best_blk_skip, txfm_info->blk_skip,
3292 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3293 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3294 }
3295 }
3296 *mbmi = best_mbmi;
3297 *rd_stats = best_rdstats;
3298 memcpy(txfm_info->blk_skip, best_blk_skip,
3299 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3300 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3301 #if CONFIG_RD_DEBUG
3302 mbmi->rd_stats = *rd_stats;
3303 #endif
3304 return best_rd;
3305 }
3306
3307 // TODO([email protected]): We are using struct $struct_name instead of their
3308 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3309 // the typedef will prevent doxygen from finding this function and generating
3310 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3311 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3312 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3313 struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3314 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3315 const AV1_COMMON *const cm = &cpi->common;
3316 MACROBLOCKD *const xd = &x->e_mbd;
3317 MB_MODE_INFO *const mbmi = xd->mi[0];
3318 const int num_planes = av1_num_planes(cm);
3319 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3320 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3321 uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3322 int64_t dist_y = 0, dist_uv = 0;
3323
3324 ctx->rd_stats.skip_txfm = 0;
3325 mbmi->ref_frame[0] = INTRA_FRAME;
3326 mbmi->ref_frame[1] = NONE_FRAME;
3327 mbmi->use_intrabc = 0;
3328 mbmi->mv[0].as_int = 0;
3329 mbmi->skip_mode = 0;
3330
3331 const int64_t intra_yrd =
3332 av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3333 &y_skip_txfm, bsize, best_rd, ctx);
3334
3335 // Initialize default mode evaluation params
3336 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3337
3338 if (intra_yrd < best_rd) {
3339 // Search intra modes for uv planes if needed
3340 if (num_planes > 1) {
3341 // Set up the tx variables for reproducing the y predictions in case we
3342 // need it for chroma-from-luma.
3343 if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3344 memcpy(txfm_info->blk_skip, ctx->blk_skip,
3345 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3346 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3347 }
3348 const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3349 av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3350 &dist_uv, &uv_skip_txfm, bsize,
3351 max_uv_tx_size);
3352 }
3353
3354 // Intra block is always coded as non-skip
3355 rd_cost->rate =
3356 rate_y + rate_uv +
3357 x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3358 rd_cost->dist = dist_y + dist_uv;
3359 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3360 rd_cost->skip_txfm = 0;
3361 } else {
3362 rd_cost->rate = INT_MAX;
3363 }
3364
3365 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3366 best_rd = rd_cost->rdcost;
3367 if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3368 ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3369 memcpy(ctx->blk_skip, txfm_info->blk_skip,
3370 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3371 assert(rd_cost->rate != INT_MAX);
3372 }
3373 if (rd_cost->rate == INT_MAX) return;
3374
3375 ctx->mic = *xd->mi[0];
3376 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3377 av1_ref_frame_type(xd->mi[0]->ref_frame));
3378 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3379 }
3380
3381 static inline void calc_target_weighted_pred(
3382 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3383 const uint8_t *above, int above_stride, const uint8_t *left,
3384 int left_stride);
3385
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3386 static inline void rd_pick_skip_mode(
3387 RD_STATS *rd_cost, InterModeSearchState *search_state,
3388 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3389 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3390 const AV1_COMMON *const cm = &cpi->common;
3391 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3392 const int num_planes = av1_num_planes(cm);
3393 MACROBLOCKD *const xd = &x->e_mbd;
3394 MB_MODE_INFO *const mbmi = xd->mi[0];
3395
3396 x->compound_idx = 1; // COMPOUND_AVERAGE
3397 RD_STATS skip_mode_rd_stats;
3398 av1_invalid_rd_stats(&skip_mode_rd_stats);
3399
3400 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3401 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3402 return;
3403 }
3404
3405 const MV_REFERENCE_FRAME ref_frame =
3406 LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3407 const MV_REFERENCE_FRAME second_ref_frame =
3408 LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3409 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3410 const THR_MODES mode_index =
3411 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3412
3413 if (mode_index == THR_INVALID) {
3414 return;
3415 }
3416
3417 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3418 cpi->sf.inter_sf.disable_onesided_comp) &&
3419 cpi->all_one_sided_refs) {
3420 return;
3421 }
3422
3423 mbmi->mode = this_mode;
3424 mbmi->uv_mode = UV_DC_PRED;
3425 mbmi->ref_frame[0] = ref_frame;
3426 mbmi->ref_frame[1] = second_ref_frame;
3427 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3428 if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3429 MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3430 if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3431 mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3432 return;
3433 }
3434 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3435 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3436 mbmi_ext->mode_context);
3437 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3438 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3439 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3440 }
3441
3442 assert(this_mode == NEAREST_NEARESTMV);
3443 if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3444 return;
3445 }
3446
3447 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3448 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3449 mbmi->comp_group_idx = 0;
3450 mbmi->compound_idx = x->compound_idx;
3451 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3452 mbmi->motion_mode = SIMPLE_TRANSLATION;
3453 mbmi->ref_mv_idx = 0;
3454 mbmi->skip_mode = mbmi->skip_txfm = 1;
3455 mbmi->palette_mode_info.palette_size[0] = 0;
3456 mbmi->palette_mode_info.palette_size[1] = 0;
3457
3458 set_default_interp_filters(mbmi, cm->features.interp_filter);
3459
3460 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3461 for (int i = 0; i < num_planes; i++) {
3462 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3463 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3464 }
3465
3466 BUFFER_SET orig_dst;
3467 for (int i = 0; i < num_planes; i++) {
3468 orig_dst.plane[i] = xd->plane[i].dst.buf;
3469 orig_dst.stride[i] = xd->plane[i].dst.stride;
3470 }
3471
3472 // Compare the use of skip_mode with the best intra/inter mode obtained.
3473 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3474 int64_t best_intra_inter_mode_cost = INT64_MAX;
3475 if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3476 const ModeCosts *mode_costs = &x->mode_costs;
3477 best_intra_inter_mode_cost = RDCOST(
3478 x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3479 rd_cost->dist);
3480 // Account for non-skip mode rate in total rd stats
3481 rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3482 av1_rd_cost_update(x->rdmult, rd_cost);
3483 }
3484
3485 // Obtain the rdcost for skip_mode.
3486 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3487 best_intra_inter_mode_cost);
3488
3489 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3490 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3491 assert(mode_index != THR_INVALID);
3492 search_state->best_mbmode.skip_mode = 1;
3493 search_state->best_mbmode = *mbmi;
3494 memset(search_state->best_mbmode.inter_tx_size,
3495 search_state->best_mbmode.tx_size,
3496 sizeof(search_state->best_mbmode.inter_tx_size));
3497 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3498 search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3499 xd);
3500 search_state->best_mode_index = mode_index;
3501
3502 // Update rd_cost
3503 rd_cost->rate = skip_mode_rd_stats.rate;
3504 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3505 rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3506
3507 search_state->best_rd = rd_cost->rdcost;
3508 search_state->best_skip2 = 1;
3509 search_state->best_mode_skippable = 1;
3510
3511 x->txfm_search_info.skip_txfm = 1;
3512 }
3513 }
3514
3515 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3516 static inline MB_MODE_INFO *get_winner_mode_stats(
3517 MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3518 int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3519 RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3520 THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3521 int mode_idx) {
3522 MB_MODE_INFO *winner_mbmi;
3523 if (multi_winner_mode_type) {
3524 assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3525 WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3526 winner_mbmi = &winner_mode_stat->mbmi;
3527
3528 *winner_rd_cost = &winner_mode_stat->rd_cost;
3529 *winner_rate_y = winner_mode_stat->rate_y;
3530 *winner_rate_uv = winner_mode_stat->rate_uv;
3531 *winner_mode_index = winner_mode_stat->mode_index;
3532 } else {
3533 winner_mbmi = best_mbmode;
3534 *winner_rd_cost = best_rd_cost;
3535 *winner_rate_y = best_rate_y;
3536 *winner_rate_uv = best_rate_uv;
3537 *winner_mode_index = *best_mode_index;
3538 }
3539 return winner_mbmi;
3540 }
3541
3542 // speed feature: fast intra/inter transform type search
3543 // Used for speed >= 2
3544 // When this speed feature is on, in rd mode search, only DCT is used.
3545 // After the mode is determined, this function is called, to select
3546 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3547 static inline void refine_winner_mode_tx(
3548 const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3549 PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3550 MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3551 int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3552 const AV1_COMMON *const cm = &cpi->common;
3553 MACROBLOCKD *const xd = &x->e_mbd;
3554 MB_MODE_INFO *const mbmi = xd->mi[0];
3555 TxfmSearchParams *txfm_params = &x->txfm_search_params;
3556 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3557 int64_t best_rd;
3558 const int num_planes = av1_num_planes(cm);
3559
3560 if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3561 rd_cost->skip_txfm))
3562 return;
3563
3564 // Set params for winner mode evaluation
3565 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3566
3567 // No best mode identified so far
3568 if (*best_mode_index == THR_INVALID) return;
3569
3570 best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3571 for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3572 RD_STATS *winner_rd_stats = NULL;
3573 int winner_rate_y = 0, winner_rate_uv = 0;
3574 THR_MODES winner_mode_index = 0;
3575
3576 // TODO(any): Combine best mode and multi-winner mode processing paths
3577 // Get winner mode stats for current mode index
3578 MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3579 x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3580 &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3581 cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3582
3583 if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3584 winner_mode_index != THR_INVALID &&
3585 is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3586 rd_cost->skip_txfm)) {
3587 RD_STATS rd_stats = *winner_rd_stats;
3588 int skip_blk = 0;
3589 RD_STATS rd_stats_y, rd_stats_uv;
3590 const int skip_ctx = av1_get_skip_txfm_context(xd);
3591
3592 *mbmi = *winner_mbmi;
3593
3594 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3595
3596 // Select prediction reference frames.
3597 for (int i = 0; i < num_planes; i++) {
3598 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3599 if (has_second_ref(mbmi))
3600 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3601 }
3602
3603 if (is_inter_mode(mbmi->mode)) {
3604 const int mi_row = xd->mi_row;
3605 const int mi_col = xd->mi_col;
3606 bool is_predictor_built = false;
3607 const PREDICTION_MODE prediction_mode = mbmi->mode;
3608 // Do interpolation filter search for realtime mode if applicable.
3609 if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3610 cpi->oxcf.mode == REALTIME &&
3611 cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3612 is_inter_mode(prediction_mode) &&
3613 mbmi->motion_mode == SIMPLE_TRANSLATION &&
3614 !is_inter_compound_mode(prediction_mode)) {
3615 is_predictor_built =
3616 fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3617 }
3618 if (!is_predictor_built) {
3619 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3620 av1_num_planes(cm) - 1);
3621 }
3622 if (mbmi->motion_mode == OBMC_CAUSAL)
3623 av1_build_obmc_inter_predictors_sb(cm, xd);
3624
3625 av1_subtract_plane(x, bsize, 0);
3626 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3627 !xd->lossless[mbmi->segment_id]) {
3628 av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3629 INT64_MAX);
3630 assert(rd_stats_y.rate != INT_MAX);
3631 } else {
3632 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3633 INT64_MAX);
3634 memset(mbmi->inter_tx_size, mbmi->tx_size,
3635 sizeof(mbmi->inter_tx_size));
3636 for (int i = 0; i < xd->height * xd->width; ++i)
3637 set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3638 }
3639 } else {
3640 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3641 INT64_MAX);
3642 }
3643
3644 if (num_planes > 1) {
3645 av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3646 } else {
3647 av1_init_rd_stats(&rd_stats_uv);
3648 }
3649
3650 const ModeCosts *mode_costs = &x->mode_costs;
3651 if (is_inter_mode(mbmi->mode) &&
3652 RDCOST(x->rdmult,
3653 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3654 rd_stats_uv.rate,
3655 (rd_stats_y.dist + rd_stats_uv.dist)) >
3656 RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3657 (rd_stats_y.sse + rd_stats_uv.sse))) {
3658 skip_blk = 1;
3659 rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3660 rd_stats_uv.rate = 0;
3661 rd_stats_y.dist = rd_stats_y.sse;
3662 rd_stats_uv.dist = rd_stats_uv.sse;
3663 } else {
3664 skip_blk = 0;
3665 rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3666 }
3667 int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3668 winner_rate_y - winner_rate_uv;
3669 int64_t this_rd =
3670 RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3671 if (best_rd > this_rd) {
3672 *best_mbmode = *mbmi;
3673 *best_mode_index = winner_mode_index;
3674 av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3675 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3676 rd_cost->rate = this_rate;
3677 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3678 rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3679 rd_cost->rdcost = this_rd;
3680 best_rd = this_rd;
3681 *best_skip2 = skip_blk;
3682 }
3683 }
3684 }
3685 }
3686
3687 /*!\cond */
3688 typedef struct {
3689 // Mask for each reference frame, specifying which prediction modes to NOT try
3690 // during search.
3691 uint32_t pred_modes[REF_FRAMES];
3692 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3693 // reference frames (i, j).
3694 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3695 // (NONE_FRAME).
3696 bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3697 } mode_skip_mask_t;
3698 /*!\endcond */
3699
3700 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3701 static inline void disable_reference(
3702 MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3703 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3704 ref_combo[ref][ref2 + 1] = true;
3705 }
3706 }
3707
3708 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3709 static inline void disable_inter_references_except_altref(
3710 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3711 disable_reference(LAST_FRAME, ref_combo);
3712 disable_reference(LAST2_FRAME, ref_combo);
3713 disable_reference(LAST3_FRAME, ref_combo);
3714 disable_reference(GOLDEN_FRAME, ref_combo);
3715 disable_reference(BWDREF_FRAME, ref_combo);
3716 disable_reference(ALTREF2_FRAME, ref_combo);
3717 }
3718
3719 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3720 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME },
3721 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME },
3722 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
3723 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME },
3724 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME },
3725 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3726 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
3727 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME },
3728 };
3729
3730 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3731
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3732 static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
3733 if (ref_set == REF_SET_FULL) {
3734 // Everything available by default.
3735 memset(mask, 0, sizeof(*mask));
3736 } else {
3737 // All modes available by default.
3738 memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3739 // All references disabled first.
3740 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3741 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3742 mask->ref_combo[ref1][ref2 + 1] = true;
3743 }
3744 }
3745 const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3746 int num_ref_combos;
3747
3748 // Then enable reduced set of references explicitly.
3749 switch (ref_set) {
3750 case REF_SET_REDUCED:
3751 ref_set_combos = reduced_ref_combos;
3752 num_ref_combos =
3753 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3754 break;
3755 case REF_SET_REALTIME:
3756 ref_set_combos = real_time_ref_combos;
3757 num_ref_combos =
3758 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3759 break;
3760 default: assert(0); num_ref_combos = 0;
3761 }
3762
3763 for (int i = 0; i < num_ref_combos; ++i) {
3764 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3765 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3766 }
3767 }
3768 }
3769
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3770 static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
3771 const AV1_COMP *cpi, MACROBLOCK *x,
3772 BLOCK_SIZE bsize) {
3773 const AV1_COMMON *const cm = &cpi->common;
3774 const struct segmentation *const seg = &cm->seg;
3775 MACROBLOCKD *const xd = &x->e_mbd;
3776 MB_MODE_INFO *const mbmi = xd->mi[0];
3777 unsigned char segment_id = mbmi->segment_id;
3778 const SPEED_FEATURES *const sf = &cpi->sf;
3779 const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
3780 REF_SET ref_set = REF_SET_FULL;
3781
3782 if (sf->rt_sf.use_real_time_ref_set)
3783 ref_set = REF_SET_REALTIME;
3784 else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3785 ref_set = REF_SET_REDUCED;
3786
3787 default_skip_mask(mask, ref_set);
3788
3789 int min_pred_mv_sad = INT_MAX;
3790 MV_REFERENCE_FRAME ref_frame;
3791 if (ref_set == REF_SET_REALTIME) {
3792 // For real-time encoding, we only look at a subset of ref frames. So the
3793 // threshold for pruning should be computed from this subset as well.
3794 const int num_rt_refs =
3795 sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3796 for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3797 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3798 if (ref != INTRA_FRAME) {
3799 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3800 }
3801 }
3802 } else {
3803 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3804 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3805 }
3806
3807 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3808 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3809 // Skip checking missing reference in both single and compound reference
3810 // modes.
3811 disable_reference(ref_frame, mask->ref_combo);
3812 } else {
3813 // Skip fixed mv modes for poor references
3814 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3815 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3816 }
3817 }
3818 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3819 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3820 // Reference not used for the segment.
3821 disable_reference(ref_frame, mask->ref_combo);
3822 }
3823 }
3824 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3825 // is disabled for this segment. This is to prevent the possibility that we
3826 // end up unable to pick any mode.
3827 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3828 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3829 // unless ARNR filtering is enabled in which case we want
3830 // an unfiltered alternative. We allow near/nearest as well
3831 // because they may result in zero-zero MVs but be cheaper.
3832 if (cpi->rc.is_src_frame_alt_ref &&
3833 (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3834 disable_inter_references_except_altref(mask->ref_combo);
3835
3836 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3837 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3838 int_mv near_mv, nearest_mv, global_mv;
3839 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3840 &x->mbmi_ext);
3841 get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3842 get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3843
3844 if (near_mv.as_int != global_mv.as_int)
3845 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3846 if (nearest_mv.as_int != global_mv.as_int)
3847 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3848 }
3849 }
3850
3851 if (cpi->rc.is_src_frame_alt_ref) {
3852 if (inter_sf->alt_ref_search_fp &&
3853 (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3854 mask->pred_modes[ALTREF_FRAME] = 0;
3855 disable_inter_references_except_altref(mask->ref_combo);
3856 disable_reference(INTRA_FRAME, mask->ref_combo);
3857 }
3858 }
3859
3860 if (inter_sf->alt_ref_search_fp) {
3861 if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3862 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3863 // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3864 // those are past frames
3865 MV_REFERENCE_FRAME start_frame =
3866 inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3867 for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3868 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3869 0) {
3870 // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3871 // to the relative dist of LAST_FRAME.
3872 if (inter_sf->alt_ref_search_fp == 1 &&
3873 (abs(cpi->ref_frame_dist_info
3874 .ref_relative_dist[ref_frame - LAST_FRAME]) >
3875 1.5 * abs(cpi->ref_frame_dist_info
3876 .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3877 continue;
3878 }
3879 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3880 mask->pred_modes[ref_frame] |= INTER_ALL;
3881 }
3882 }
3883 }
3884 }
3885
3886 if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3887 if (x->best_pred_mv_sad[0] < INT_MAX) {
3888 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3889 const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3890
3891 // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3892 for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3893 ref_frame = prune_ref_list[ref_idx];
3894 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3895 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3896 }
3897 }
3898 }
3899
3900 if (bsize > sf->part_sf.max_intra_bsize) {
3901 disable_reference(INTRA_FRAME, mask->ref_combo);
3902 }
3903
3904 if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3905 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3906 mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3907 mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3908 }
3909 }
3910
3911 mask->pred_modes[INTRA_FRAME] |=
3912 ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3913
3914 // Prune reference frames which are not the closest to the current
3915 // frame and with large pred_mv_sad.
3916 if (inter_sf->prune_single_ref) {
3917 assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
3918 const double prune_threshes[2] = { 1.20, 1.05 };
3919
3920 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3921 const RefFrameDistanceInfo *const ref_frame_dist_info =
3922 &cpi->ref_frame_dist_info;
3923 const int is_closest_ref =
3924 (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
3925 (ref_frame == ref_frame_dist_info->nearest_future_ref);
3926
3927 if (!is_closest_ref) {
3928 const int dir =
3929 (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3930 ? 0
3931 : 1;
3932 if (x->best_pred_mv_sad[dir] < INT_MAX &&
3933 x->pred_mv_sad[ref_frame] >
3934 prune_threshes[inter_sf->prune_single_ref - 1] *
3935 x->best_pred_mv_sad[dir])
3936 mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
3937 }
3938 }
3939 }
3940 }
3941
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3942 static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
3943 HandleInterModeArgs *const args,
3944 int is_hbd) {
3945 if (is_hbd) {
3946 const int len = sizeof(uint16_t);
3947 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3948 args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3949 (MAX_SB_SQUARE >> 1) * len);
3950 args->above_pred_buf[2] =
3951 CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3952 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3953 args->left_pred_buf[1] =
3954 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3955 args->left_pred_buf[2] =
3956 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3957 } else {
3958 args->above_pred_buf[0] = obmc_buffer->above_pred;
3959 args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3960 args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3961 args->left_pred_buf[0] = obmc_buffer->left_pred;
3962 args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3963 args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3964 }
3965 }
3966
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3967 static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3968 MV_REFERENCE_FRAME ref_frame) {
3969 const AV1_COMMON *const cm = &cpi->common;
3970 MV_REFERENCE_FRAME rf[2];
3971 av1_set_ref_frame(rf, ref_frame);
3972
3973 if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3974
3975 if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3976 cm->cur_frame->ref_display_order_hint)) {
3977 return 1;
3978 }
3979
3980 return 0;
3981 }
3982
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3983 static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
3984 int skip_ref_frame_mask) {
3985 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3986 if (!(skip_ref_frame_mask & (1 << r))) {
3987 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3988 if (rf[0] == ref_frame || rf[1] == ref_frame) {
3989 return 1;
3990 }
3991 }
3992 }
3993 return 0;
3994 }
3995
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3996 static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3997 const MB_MODE_INFO *mi_cache) {
3998 if (!mi_cache) {
3999 return 0;
4000 }
4001
4002 if (ref_frame < REF_FRAMES) {
4003 return (ref_frame == mi_cache->ref_frame[0] ||
4004 ref_frame == mi_cache->ref_frame[1]);
4005 }
4006
4007 // if we are here, then the current mode is compound.
4008 MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4009 return ref_frame == cached_ref_type;
4010 }
4011
4012 // Please add/modify parameter setting in this function, making it consistent
4013 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])4014 static inline void set_params_rd_pick_inter_mode(
4015 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4016 BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4017 unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4018 struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4019 const AV1_COMMON *const cm = &cpi->common;
4020 MACROBLOCKD *const xd = &x->e_mbd;
4021 MB_MODE_INFO *const mbmi = xd->mi[0];
4022 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4023 unsigned char segment_id = mbmi->segment_id;
4024
4025 init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4026 av1_collect_neighbors_ref_counts(xd);
4027 estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4028 ref_costs_comp);
4029
4030 const int mi_row = xd->mi_row;
4031 const int mi_col = xd->mi_col;
4032 x->best_pred_mv_sad[0] = INT_MAX;
4033 x->best_pred_mv_sad[1] = INT_MAX;
4034
4035 for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4036 ++ref_frame) {
4037 x->pred_mv_sad[ref_frame] = INT_MAX;
4038 mbmi_ext->mode_context[ref_frame] = 0;
4039 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4040 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4041 // Skip the ref frame if the mask says skip and the ref is not used by
4042 // compound ref.
4043 if (skip_ref_frame_mask & (1 << ref_frame) &&
4044 !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4045 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4046 continue;
4047 }
4048 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4049 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4050 }
4051 if (cpi->sf.inter_sf.alt_ref_search_fp ||
4052 cpi->sf.inter_sf.prune_single_ref ||
4053 cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4054 // Store the best pred_mv_sad across all past frames
4055 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4056 0)
4057 x->best_pred_mv_sad[0] =
4058 AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4059 else
4060 // Store the best pred_mv_sad across all future frames
4061 x->best_pred_mv_sad[1] =
4062 AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4063 }
4064 }
4065
4066 if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4067 // No second reference on RT ref set, so no need to initialize
4068 for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4069 ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4070 mbmi_ext->mode_context[ref_frame] = 0;
4071 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4072 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4073 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4074 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4075 continue;
4076 }
4077
4078 if (skip_ref_frame_mask & (1 << ref_frame) &&
4079 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4080 continue;
4081 }
4082 // Ref mv list population is not required, when compound references are
4083 // pruned.
4084 if (prune_ref_frame(cpi, x, ref_frame)) continue;
4085
4086 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4087 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4088 mbmi_ext->mode_context);
4089 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4090 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4091 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4092 }
4093 }
4094
4095 av1_count_overlappable_neighbors(cm, xd);
4096 const FRAME_UPDATE_TYPE update_type =
4097 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4098 int use_actual_frame_probs = 1;
4099 int prune_obmc;
4100 #if CONFIG_FPMT_TEST
4101 use_actual_frame_probs =
4102 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4103 if (!use_actual_frame_probs) {
4104 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4105 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4106 }
4107 #endif
4108 if (use_actual_frame_probs) {
4109 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4110 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4111 }
4112 if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4113 if (check_num_overlappable_neighbors(mbmi) &&
4114 is_motion_variation_allowed_bsize(bsize)) {
4115 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4116 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4117 MAX_SB_SIZE >> 1 };
4118 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4119 MAX_SB_SIZE >> 1 };
4120 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4121 av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4122 dst_width1, dst_height1,
4123 args->above_pred_stride);
4124 av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4125 dst_width2, dst_height2,
4126 args->left_pred_stride);
4127 const int num_planes = av1_num_planes(cm);
4128 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4129 mi_col, 0, num_planes);
4130 calc_target_weighted_pred(
4131 cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4132 args->left_pred_buf[0], args->left_pred_stride[0]);
4133 }
4134 }
4135
4136 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4137
4138 // Set params for mode evaluation
4139 set_mode_eval_params(cpi, x, MODE_EVAL);
4140
4141 x->comp_rd_stats_idx = 0;
4142
4143 for (int idx = 0; idx < REF_FRAMES; idx++) {
4144 args->best_single_sse_in_refs[idx] = INT32_MAX;
4145 }
4146 }
4147
init_single_inter_mode_search_state(InterModeSearchState * search_state)4148 static inline void init_single_inter_mode_search_state(
4149 InterModeSearchState *search_state) {
4150 for (int dir = 0; dir < 2; ++dir) {
4151 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4152 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4153 SingleInterModeState *state;
4154
4155 state = &search_state->single_state[dir][mode][ref_frame];
4156 state->ref_frame = NONE_FRAME;
4157 state->rd = INT64_MAX;
4158
4159 state = &search_state->single_state_modelled[dir][mode][ref_frame];
4160 state->ref_frame = NONE_FRAME;
4161 state->rd = INT64_MAX;
4162
4163 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4164 }
4165 }
4166 }
4167
4168 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4169 search_state->best_single_rd[ref_frame] = INT64_MAX;
4170 search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4171 }
4172 av1_zero(search_state->single_state_cnt);
4173 av1_zero(search_state->single_state_modelled_cnt);
4174 }
4175
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4176 static inline void init_inter_mode_search_state(
4177 InterModeSearchState *search_state, const AV1_COMP *cpi,
4178 const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4179 init_intra_mode_search_state(&search_state->intra_search_state);
4180 av1_invalid_rd_stats(&search_state->best_y_rdcost);
4181
4182 search_state->best_rd = best_rd_so_far;
4183 search_state->best_skip_rd[0] = INT64_MAX;
4184 search_state->best_skip_rd[1] = INT64_MAX;
4185
4186 av1_zero(search_state->best_mbmode);
4187
4188 search_state->best_rate_y = INT_MAX;
4189
4190 search_state->best_rate_uv = INT_MAX;
4191
4192 search_state->best_mode_skippable = 0;
4193
4194 search_state->best_skip2 = 0;
4195
4196 search_state->best_mode_index = THR_INVALID;
4197
4198 const MACROBLOCKD *const xd = &x->e_mbd;
4199 const MB_MODE_INFO *const mbmi = xd->mi[0];
4200 const unsigned char segment_id = mbmi->segment_id;
4201
4202 search_state->num_available_refs = 0;
4203 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4204 memset(search_state->dist_order_refs, -1,
4205 sizeof(search_state->dist_order_refs));
4206
4207 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4208 search_state->mode_threshold[i] = 0;
4209 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4210 for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4211 search_state->mode_threshold[i] =
4212 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4213 RD_THRESH_FAC_FRAC_BITS;
4214
4215 search_state->best_intra_rd = INT64_MAX;
4216
4217 search_state->best_pred_sse = UINT_MAX;
4218
4219 av1_zero(search_state->single_newmv);
4220 av1_zero(search_state->single_newmv_rate);
4221 av1_zero(search_state->single_newmv_valid);
4222 for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4223 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4224 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4225 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4226 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4227 }
4228 }
4229 }
4230
4231 for (int i = 0; i < REFERENCE_MODES; ++i) {
4232 search_state->best_pred_rd[i] = INT64_MAX;
4233 }
4234
4235 if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4236 for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4237 search_state->mode_threshold[i] =
4238 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4239 RD_THRESH_FAC_FRAC_BITS;
4240
4241 for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4242 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4243 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4244 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4245 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4246 }
4247 }
4248 }
4249
4250 init_single_inter_mode_search_state(search_state);
4251 }
4252 }
4253
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4254 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4255 const MV_REFERENCE_FRAME *ref_frame,
4256 const PREDICTION_MODE this_mode) {
4257 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4258 return true;
4259 }
4260
4261 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4262 }
4263
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4264 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4265 BLOCK_SIZE bsize,
4266 PREDICTION_MODE curr_mode,
4267 const MV_REFERENCE_FRAME *ref_frames) {
4268 const int comp_pred = ref_frames[1] > INTRA_FRAME;
4269 if (comp_pred) {
4270 if (!is_comp_ref_allowed(bsize)) return 1;
4271 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4272 return 1;
4273 }
4274
4275 const AV1_COMMON *const cm = &cpi->common;
4276 if (frame_is_intra_only(cm)) return 1;
4277
4278 const CurrentFrame *const current_frame = &cm->current_frame;
4279 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4280
4281 const struct segmentation *const seg = &cm->seg;
4282 const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4283 // Do not allow compound prediction if the segment level reference frame
4284 // feature is in use as in this case there can only be one reference.
4285 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4286 }
4287
4288 if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4289 // Mode must be compatible
4290 if (!is_interintra_allowed_bsize(bsize)) return 1;
4291 if (!is_interintra_allowed_mode(curr_mode)) return 1;
4292 }
4293
4294 return 0;
4295 }
4296
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4297 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4298 BLOCK_SIZE bsize, int mib_size) {
4299 const int sb_size_mask = mib_size - 1;
4300 const MACROBLOCKD *const xd = &x->e_mbd;
4301 const int mi_row = xd->mi_row;
4302 const int mi_col = xd->mi_col;
4303 const int mi_row_in_sb = mi_row & sb_size_mask;
4304 const int mi_col_in_sb = mi_col & sb_size_mask;
4305 const int mi_w = mi_size_wide[bsize];
4306 const int mi_h = mi_size_high[bsize];
4307 int picked_ref_frames_mask = 0;
4308 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4309 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4310 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4311 }
4312 }
4313 return picked_ref_frames_mask;
4314 }
4315
4316 // Check if reference frame pair of the current block matches with the given
4317 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4318 static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4319 const MV_REFERENCE_FRAME *ref_frames) {
4320 return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4321 (ref_frames[1] == mbmi->ref_frame[1]));
4322 }
4323
4324 // Case 1: return 0, means don't skip this mode
4325 // Case 2: return 1, means skip this mode completely
4326 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4327 static int inter_mode_search_order_independent_skip(
4328 const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4329 InterModeSearchState *search_state, int skip_ref_frame_mask,
4330 PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4331 if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4332 return 1;
4333 }
4334
4335 const int ref_type = av1_ref_frame_type(ref_frame);
4336 if (!cpi->sf.rt_sf.use_real_time_ref_set)
4337 if (prune_ref_frame(cpi, x, ref_type)) return 1;
4338
4339 // This is only used in motion vector unit test.
4340 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4341 ref_frame[0] == INTRA_FRAME)
4342 return 1;
4343
4344 const AV1_COMMON *const cm = &cpi->common;
4345 if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4346 return 1;
4347 }
4348
4349 // Reuse the prediction mode in cache
4350 if (x->use_mb_mode_cache) {
4351 const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4352 const PREDICTION_MODE cached_mode = cached_mi->mode;
4353 const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4354 const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4355
4356 // If the cached mode is intra, then we just need to match the mode.
4357 if (is_mode_intra(cached_mode) && mode != cached_mode) {
4358 return 1;
4359 }
4360
4361 // If the cached mode is single inter mode, then we match the mode and
4362 // reference frame.
4363 if (cached_mode_is_single) {
4364 if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4365 return 1;
4366 }
4367 } else {
4368 // If the cached mode is compound, then we need to consider several cases.
4369 const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4370 if (mode_is_single) {
4371 // If the mode is single, we know the modes can't match. But we might
4372 // still want to search it if compound mode depends on the current mode.
4373 int skip_motion_mode_only = 0;
4374 if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4375 skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4376 } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4377 skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4378 } else if (cached_mode == NEW_NEWMV) {
4379 skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4380 ref_frame[0] == cached_frame[1]);
4381 }
4382
4383 return 1 + skip_motion_mode_only;
4384 } else {
4385 // If both modes are compound, then everything must match.
4386 if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4387 ref_frame[1] != cached_frame[1]) {
4388 return 1;
4389 }
4390 }
4391 }
4392 }
4393
4394 const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4395 // If no valid mode has been found so far in PARTITION_NONE when finding a
4396 // valid partition is required, do not skip mode.
4397 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4398 x->must_find_valid_partition)
4399 return 0;
4400
4401 const SPEED_FEATURES *const sf = &cpi->sf;
4402 // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4403 // frames
4404 if (sf->inter_sf.prune_nearmv_using_neighbors &&
4405 (mode == NEAR_NEARMV || mode == NEARMV)) {
4406 const MACROBLOCKD *const xd = &x->e_mbd;
4407 if (search_state->best_rd != INT64_MAX && xd->left_available &&
4408 xd->up_available) {
4409 const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4410 { 1, 1, 0 },
4411 { 2, 1, 0 } };
4412 const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4413
4414 assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4415 qindex_sub_range < 3);
4416 const int num_ref_frame_pair_match_thresh =
4417 thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4418 [qindex_sub_range];
4419
4420 assert(num_ref_frame_pair_match_thresh <= 2 &&
4421 num_ref_frame_pair_match_thresh >= 0);
4422 int num_ref_frame_pair_match = 0;
4423
4424 num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4425 num_ref_frame_pair_match +=
4426 match_ref_frame_pair(xd->above_mbmi, ref_frame);
4427
4428 // Pruning based on ref frame pair match with neighbors.
4429 if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4430 }
4431 }
4432
4433 int skip_motion_mode = 0;
4434 if (mbmi->partition != PARTITION_NONE) {
4435 int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4436 if (ref_type <= ALTREF_FRAME && skip_ref) {
4437 // Since the compound ref modes depends on the motion estimation result of
4438 // two single ref modes (best mv of single ref modes as the start point),
4439 // if current single ref mode is marked skip, we need to check if it will
4440 // be used in compound ref modes.
4441 if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4442 // Found a not skipped compound ref mode which contains current
4443 // single ref. So this single ref can't be skipped completely
4444 // Just skip its motion mode search, still try its simple
4445 // transition mode.
4446 skip_motion_mode = 1;
4447 skip_ref = 0;
4448 }
4449 }
4450 // If we are reusing the prediction from cache, and the current frame is
4451 // required by the cache, then we cannot prune it.
4452 if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4453 skip_ref = 0;
4454 // If the cache only needs the current reference type for compound
4455 // prediction, then we can skip motion mode search.
4456 skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4457 x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4458 }
4459 if (skip_ref) return 1;
4460 }
4461
4462 if (ref_frame[0] == INTRA_FRAME) {
4463 if (mode != DC_PRED) {
4464 // Disable intra modes other than DC_PRED for blocks with low variance
4465 // Threshold for intra skipping based on source variance
4466 // TODO(debargha): Specialize the threshold for super block sizes
4467 const unsigned int skip_intra_var_thresh = 64;
4468 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4469 x->source_variance < skip_intra_var_thresh)
4470 return 1;
4471 }
4472 }
4473
4474 if (skip_motion_mode) return 2;
4475
4476 return 0;
4477 }
4478
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4479 static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4480 const MV_REFERENCE_FRAME *ref_frames,
4481 const AV1_COMMON *cm) {
4482 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4483 mbmi->ref_mv_idx = 0;
4484 mbmi->mode = curr_mode;
4485 mbmi->uv_mode = UV_DC_PRED;
4486 mbmi->ref_frame[0] = ref_frames[0];
4487 mbmi->ref_frame[1] = ref_frames[1];
4488 pmi->palette_size[0] = 0;
4489 pmi->palette_size[1] = 0;
4490 mbmi->filter_intra_mode_info.use_filter_intra = 0;
4491 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4492 mbmi->motion_mode = SIMPLE_TRANSLATION;
4493 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4494 set_default_interp_filters(mbmi, cm->features.interp_filter);
4495 }
4496
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4497 static inline void collect_single_states(MACROBLOCK *x,
4498 InterModeSearchState *search_state,
4499 const MB_MODE_INFO *const mbmi) {
4500 int i, j;
4501 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4502 const PREDICTION_MODE this_mode = mbmi->mode;
4503 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4504 const int mode_offset = INTER_OFFSET(this_mode);
4505 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4506
4507 // Simple rd
4508 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4509 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4510 const int64_t rd =
4511 search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4512 if (rd < simple_rd) simple_rd = rd;
4513 }
4514
4515 // Insertion sort of single_state
4516 const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4517 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4518 i = search_state->single_state_cnt[dir][mode_offset];
4519 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4520 state_s[j] = state_s[j - 1];
4521 state_s[j] = this_state_s;
4522 search_state->single_state_cnt[dir][mode_offset]++;
4523
4524 // Modelled rd
4525 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4526 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4527 const int64_t rd =
4528 search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4529 if (rd < modelled_rd) modelled_rd = rd;
4530 }
4531
4532 // Insertion sort of single_state_modelled
4533 const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4534 SingleInterModeState *state_m =
4535 search_state->single_state_modelled[dir][mode_offset];
4536 i = search_state->single_state_modelled_cnt[dir][mode_offset];
4537 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4538 state_m[j] = state_m[j - 1];
4539 state_m[j] = this_state_m;
4540 search_state->single_state_modelled_cnt[dir][mode_offset]++;
4541 }
4542
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4543 static inline void analyze_single_states(const AV1_COMP *cpi,
4544 InterModeSearchState *search_state) {
4545 const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4546 assert(prune_level >= 1);
4547 int i, j, dir, mode;
4548
4549 for (dir = 0; dir < 2; ++dir) {
4550 int64_t best_rd;
4551 SingleInterModeState(*state)[FWD_REFS];
4552 const int prune_factor = prune_level >= 2 ? 6 : 5;
4553
4554 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4555 // reference frames for all the modes (NEARESTMV and NEARMV may not
4556 // have same motion vectors). Always keep the best of each mode
4557 // because it might form the best possible combination with other mode.
4558 state = search_state->single_state[dir];
4559 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4560 state[INTER_OFFSET(GLOBALMV)][0].rd);
4561 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4562 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4563 if (state[mode][i].rd != INT64_MAX &&
4564 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4565 state[mode][i].valid = 0;
4566 }
4567 }
4568 }
4569
4570 state = search_state->single_state_modelled[dir];
4571 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4572 state[INTER_OFFSET(GLOBALMV)][0].rd);
4573 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4574 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4575 if (state[mode][i].rd != INT64_MAX &&
4576 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4577 state[mode][i].valid = 0;
4578 }
4579 }
4580 }
4581 }
4582
4583 // Ordering by simple rd first, then by modelled rd
4584 for (dir = 0; dir < 2; ++dir) {
4585 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4586 const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4587 const int state_cnt_m =
4588 search_state->single_state_modelled_cnt[dir][mode];
4589 SingleInterModeState *state_s = search_state->single_state[dir][mode];
4590 SingleInterModeState *state_m =
4591 search_state->single_state_modelled[dir][mode];
4592 int count = 0;
4593 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4594 for (i = 0; i < state_cnt_s; ++i) {
4595 if (state_s[i].rd == INT64_MAX) break;
4596 if (state_s[i].valid) {
4597 search_state->single_rd_order[dir][mode][count++] =
4598 state_s[i].ref_frame;
4599 }
4600 }
4601 if (count >= max_candidates) continue;
4602
4603 for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4604 if (state_m[i].rd == INT64_MAX) break;
4605 if (!state_m[i].valid) continue;
4606 const int ref_frame = state_m[i].ref_frame;
4607 int match = 0;
4608 // Check if existing already
4609 for (j = 0; j < count; ++j) {
4610 if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4611 match = 1;
4612 break;
4613 }
4614 }
4615 if (match) continue;
4616 // Check if this ref_frame is removed in simple rd
4617 int valid = 1;
4618 for (j = 0; j < state_cnt_s; ++j) {
4619 if (ref_frame == state_s[j].ref_frame) {
4620 valid = state_s[j].valid;
4621 break;
4622 }
4623 }
4624 if (valid) {
4625 search_state->single_rd_order[dir][mode][count++] = ref_frame;
4626 }
4627 }
4628 }
4629 }
4630 }
4631
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4632 static int compound_skip_get_candidates(
4633 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4634 const int dir, const PREDICTION_MODE mode) {
4635 const int mode_offset = INTER_OFFSET(mode);
4636 const SingleInterModeState *state =
4637 search_state->single_state[dir][mode_offset];
4638 const SingleInterModeState *state_modelled =
4639 search_state->single_state_modelled[dir][mode_offset];
4640
4641 int max_candidates = 0;
4642 for (int i = 0; i < FWD_REFS; ++i) {
4643 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4644 max_candidates++;
4645 }
4646
4647 int candidates = max_candidates;
4648 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4649 candidates = AOMMIN(2, max_candidates);
4650 }
4651 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4652 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4653 state[0].ref_frame == state_modelled[0].ref_frame)
4654 candidates = 1;
4655 if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4656 }
4657
4658 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4659 // Limit the number of candidates to 1 in each direction for compound
4660 // prediction
4661 candidates = AOMMIN(1, candidates);
4662 }
4663 return candidates;
4664 }
4665
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4666 static int compound_skip_by_single_states(
4667 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4668 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4669 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4670 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4671 const int mode[2] = { compound_ref0_mode(this_mode),
4672 compound_ref1_mode(this_mode) };
4673 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4674 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4675 refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4676 int ref_searched[2] = { 0, 0 };
4677 int ref_mv_match[2] = { 1, 1 };
4678 int i, j;
4679
4680 for (i = 0; i < 2; ++i) {
4681 const SingleInterModeState *state =
4682 search_state->single_state[mode_dir[i]][mode_offset[i]];
4683 const int state_cnt =
4684 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4685 for (j = 0; j < state_cnt; ++j) {
4686 if (state[j].ref_frame == refs[i]) {
4687 ref_searched[i] = 1;
4688 break;
4689 }
4690 }
4691 }
4692
4693 const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4694 for (i = 0; i < 2; ++i) {
4695 if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4696 continue;
4697 }
4698 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4699 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4700 int_mv single_mv;
4701 int_mv comp_mv;
4702 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4703 &x->mbmi_ext);
4704 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4705 if (single_mv.as_int != comp_mv.as_int) {
4706 ref_mv_match[i] = 0;
4707 break;
4708 }
4709 }
4710 }
4711
4712 for (i = 0; i < 2; ++i) {
4713 if (!ref_searched[i] || !ref_mv_match[i]) continue;
4714 const int candidates =
4715 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4716 const MV_REFERENCE_FRAME *ref_order =
4717 search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4718 int match = 0;
4719 for (j = 0; j < candidates; ++j) {
4720 if (refs[i] == ref_order[j]) {
4721 match = 1;
4722 break;
4723 }
4724 }
4725 if (!match) return 1;
4726 }
4727
4728 return 0;
4729 }
4730
4731 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4732 static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
4733 const MV_REFERENCE_FRAME *ref_frames,
4734 int *const is_ref_match) {
4735 if (is_inter_block(mbmi)) {
4736 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4737 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4738 if (has_second_ref(mbmi)) {
4739 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4740 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4741 }
4742 }
4743 }
4744
4745 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4746 static inline int compound_skip_using_neighbor_refs(
4747 MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4748 const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4749 // Exclude non-extended compound modes from pruning
4750 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4751 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4752 return 0;
4753
4754 if (prune_ext_comp_using_neighbors >= 3) return 1;
4755
4756 int is_ref_match[2] = { 0 }; // 0 - match for forward refs
4757 // 1 - match for backward refs
4758 // Check if ref frames of this block matches with left neighbor.
4759 if (xd->left_available)
4760 match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4761
4762 // Check if ref frames of this block matches with above neighbor.
4763 if (xd->up_available)
4764 match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4765
4766 // Combine ref frame match with neighbors in forward and backward refs.
4767 const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4768
4769 // Pruning based on ref frame match with neighbors.
4770 if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4771 return 1;
4772 }
4773
4774 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4775 static inline void update_best_single_mode(InterModeSearchState *search_state,
4776 const PREDICTION_MODE this_mode,
4777 const MV_REFERENCE_FRAME ref_frame,
4778 int64_t this_rd) {
4779 if (this_rd < search_state->best_single_rd[ref_frame]) {
4780 search_state->best_single_rd[ref_frame] = this_rd;
4781 search_state->best_single_mode[ref_frame] = this_mode;
4782 }
4783 }
4784
4785 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4786 static inline int skip_compound_using_best_single_mode_ref(
4787 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4788 const PREDICTION_MODE *best_single_mode,
4789 int prune_comp_using_best_single_mode_ref) {
4790 // Exclude non-extended compound modes from pruning
4791 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4792 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4793 return 0;
4794
4795 assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4796 const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4797 // Get ref frame direction corresponding to NEWMV
4798 // 0 - NEWMV corresponding to forward direction
4799 // 1 - NEWMV corresponding to backward direction
4800 const int newmv_dir = comp_mode_ref0 != NEWMV;
4801
4802 // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4803 // have NEWMV as single mode winner.
4804 // Example: For an extended-compound mode,
4805 // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4806 // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4807 // - Avoid pruning this mode, if best single mode corresponding to ref frame
4808 // ALTREF_FRAME is NEWMV
4809 const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4810 if (single_mode == NEWMV) return 0;
4811
4812 // Avoid pruning the compound mode when best single mode is not available
4813 if (prune_comp_using_best_single_mode_ref == 1)
4814 if (single_mode == MB_MODE_COUNT) return 0;
4815 return 1;
4816 }
4817
compare_int64(const void * a,const void * b)4818 static int compare_int64(const void *a, const void *b) {
4819 int64_t a64 = *((int64_t *)a);
4820 int64_t b64 = *((int64_t *)b);
4821 if (a64 < b64) {
4822 return -1;
4823 } else if (a64 == b64) {
4824 return 0;
4825 } else {
4826 return 1;
4827 }
4828 }
4829
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4830 static inline void update_search_state(
4831 InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4832 PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4833 const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4834 THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4835 const MACROBLOCKD *xd = &x->e_mbd;
4836 const MB_MODE_INFO *mbmi = xd->mi[0];
4837 const int skip_ctx = av1_get_skip_txfm_context(xd);
4838 const int skip_txfm =
4839 mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4840 const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4841
4842 search_state->best_rd = new_best_rd_stats->rdcost;
4843 search_state->best_mode_index = new_best_mode;
4844 *best_rd_stats_dst = *new_best_rd_stats;
4845 search_state->best_mbmode = *mbmi;
4846 search_state->best_skip2 = skip_txfm;
4847 search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4848 // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4849 // rate_uv because av1_txfm_search process is replaced by rd estimation.
4850 // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4851 // These two values will be updated when av1_txfm_search is called.
4852 if (txfm_search_done) {
4853 search_state->best_rate_y =
4854 new_best_rd_stats_y->rate +
4855 x->mode_costs.skip_txfm_cost[skip_ctx]
4856 [new_best_rd_stats->skip_txfm || skip_txfm];
4857 search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4858 }
4859 search_state->best_y_rdcost = *new_best_rd_stats_y;
4860 memcpy(ctx->blk_skip, txfm_info->blk_skip,
4861 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4862 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4863 }
4864
4865 // Find the best RD for a reference frame (among single reference modes)
4866 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4867 static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4868 assert(ref_frame_rd[0] == INT64_MAX);
4869 int64_t ref_copy[REF_FRAMES - 1];
4870 memcpy(ref_copy, ref_frame_rd + 1,
4871 sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4872 qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4873
4874 int64_t cutoff = ref_copy[0];
4875 // The cut-off is within 10% of the best.
4876 if (cutoff != INT64_MAX) {
4877 assert(cutoff < INT64_MAX / 200);
4878 cutoff = (110 * cutoff) / 100;
4879 }
4880 ref_frame_rd[0] = cutoff;
4881 }
4882
4883 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4884 static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4885 MV_REFERENCE_FRAME frame1,
4886 MV_REFERENCE_FRAME frame2) {
4887 assert(frame2 > 0);
4888 return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4889 ref_frame_rd[frame2] <= ref_frame_rd[0];
4890 }
4891
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4892 static inline void evaluate_motion_mode_for_winner_candidates(
4893 const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4894 HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4895 PICK_MODE_CONTEXT *const ctx,
4896 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4897 const motion_mode_best_st_candidate *const best_motion_mode_cands,
4898 int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4899 InterModeSearchState *const search_state, int64_t *yrd) {
4900 const AV1_COMMON *const cm = &cpi->common;
4901 const int num_planes = av1_num_planes(cm);
4902 MACROBLOCKD *const xd = &x->e_mbd;
4903 MB_MODE_INFO *const mbmi = xd->mi[0];
4904 InterModesInfo *const inter_modes_info = x->inter_modes_info;
4905 const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4906
4907 for (int cand = 0; cand < num_best_cand; cand++) {
4908 RD_STATS rd_stats;
4909 RD_STATS rd_stats_y;
4910 RD_STATS rd_stats_uv;
4911 av1_init_rd_stats(&rd_stats);
4912 av1_init_rd_stats(&rd_stats_y);
4913 av1_init_rd_stats(&rd_stats_uv);
4914 int rate_mv;
4915
4916 rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4917 args->skip_motion_mode =
4918 best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4919 *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4920 rd_stats.rate =
4921 best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4922
4923 // Continue if the best candidate is compound.
4924 if (!is_inter_singleref_mode(mbmi->mode)) continue;
4925
4926 x->txfm_search_info.skip_txfm = 0;
4927 struct macroblockd_plane *pd = xd->plane;
4928 const BUFFER_SET orig_dst = {
4929 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4930 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4931 };
4932
4933 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4934 // Initialize motion mode to simple translation
4935 // Calculation of switchable rate depends on it.
4936 mbmi->motion_mode = 0;
4937 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4938 for (int i = 0; i < num_planes; i++) {
4939 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4940 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4941 }
4942
4943 int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4944 search_state->best_skip_rd[1] };
4945 int64_t this_yrd = INT64_MAX;
4946 int64_t ret_value = motion_mode_rd(
4947 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4948 search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4949 do_tx_search, inter_modes_info, 1, &this_yrd);
4950
4951 if (ret_value != INT64_MAX) {
4952 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4953 const THR_MODES mode_enum = get_prediction_mode_idx(
4954 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4955 // Collect mode stats for multiwinner mode processing
4956 store_winner_mode_stats(
4957 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4958 mode_enum, NULL, bsize, rd_stats.rdcost,
4959 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4960 if (rd_stats.rdcost < search_state->best_rd) {
4961 *yrd = this_yrd;
4962 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4963 &rd_stats_uv, mode_enum, x, do_tx_search);
4964 if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4965 }
4966 }
4967 }
4968 }
4969
4970 /*!\cond */
4971 // Arguments for speed feature pruning of inter mode search
4972 typedef struct {
4973 int *skip_motion_mode;
4974 mode_skip_mask_t *mode_skip_mask;
4975 InterModeSearchState *search_state;
4976 int skip_ref_frame_mask;
4977 int reach_first_comp_mode;
4978 int mode_thresh_mul_fact;
4979 int num_single_modes_processed;
4980 int prune_cpd_using_sr_stats_ready;
4981 } InterModeSFArgs;
4982 /*!\endcond */
4983
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4984 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4985 int64_t *ref_frame_rd, int midx,
4986 InterModeSFArgs *args, int is_low_temp_var) {
4987 const SPEED_FEATURES *const sf = &cpi->sf;
4988 MACROBLOCKD *const xd = &x->e_mbd;
4989 // Get the actual prediction mode we are trying in this iteration
4990 const THR_MODES mode_enum = av1_default_mode_order[midx];
4991 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4992 const PREDICTION_MODE this_mode = mode_def->mode;
4993 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4994 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4995 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4996 const int comp_pred = second_ref_frame > INTRA_FRAME;
4997
4998 if (ref_frame == INTRA_FRAME) return 1;
4999
5000 const FRAME_UPDATE_TYPE update_type =
5001 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5002 if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5003 comp_pred) {
5004 return 1;
5005 }
5006
5007 // This is for real time encoding.
5008 if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5009 this_mode != NEARESTMV)
5010 return 1;
5011
5012 // Check if this mode should be skipped because it is incompatible with the
5013 // current frame
5014 if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5015 return 1;
5016 const int ret = inter_mode_search_order_independent_skip(
5017 cpi, x, args->mode_skip_mask, args->search_state,
5018 args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5019 if (ret == 1) return 1;
5020 *(args->skip_motion_mode) = (ret == 2);
5021
5022 // We've reached the first compound prediction mode, get stats from the
5023 // single reference predictors to help with pruning.
5024 // Disable this pruning logic if interpolation filter search was skipped for
5025 // single prediction modes as it can result in aggressive pruning of compound
5026 // prediction modes due to the absence of modelled_rd populated by
5027 // av1_interpolation_filter_search().
5028 // TODO(Remya): Check the impact of the sf
5029 // 'prune_comp_search_by_single_result' if compound prediction modes are
5030 // enabled in future for REALTIME encode.
5031 if (!sf->interp_sf.skip_interp_filter_search &&
5032 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5033 args->reach_first_comp_mode == 0) {
5034 analyze_single_states(cpi, args->search_state);
5035 args->reach_first_comp_mode = 1;
5036 }
5037
5038 // Prune aggressively when best mode is skippable.
5039 int mul_fact = args->search_state->best_mode_skippable
5040 ? args->mode_thresh_mul_fact
5041 : (1 << MODE_THRESH_QBITS);
5042 int64_t mode_threshold =
5043 (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5044 MODE_THRESH_QBITS;
5045
5046 if (args->search_state->best_rd < mode_threshold) return 1;
5047
5048 // Skip this compound mode based on the RD results from the single prediction
5049 // modes
5050 if (!sf->interp_sf.skip_interp_filter_search &&
5051 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5052 if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5053 ref_frame, second_ref_frame, x))
5054 return 1;
5055 }
5056
5057 if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5058 // After we done with single reference modes, find the 2nd best RD
5059 // for a reference frame. Only search compound modes that have a reference
5060 // frame at least as good as the 2nd best.
5061 if (!args->prune_cpd_using_sr_stats_ready &&
5062 args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5063 find_top_ref(ref_frame_rd);
5064 args->prune_cpd_using_sr_stats_ready = 1;
5065 }
5066 if (args->prune_cpd_using_sr_stats_ready &&
5067 !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5068 return 1;
5069 }
5070
5071 // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5072 if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5073 (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5074 return 1;
5075 }
5076
5077 if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5078 if (compound_skip_using_neighbor_refs(
5079 xd, this_mode, ref_frames,
5080 sf->inter_sf.prune_ext_comp_using_neighbors))
5081 return 1;
5082 }
5083
5084 if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5085 if (skip_compound_using_best_single_mode_ref(
5086 this_mode, ref_frames, args->search_state->best_single_mode,
5087 sf->inter_sf.prune_comp_using_best_single_mode_ref))
5088 return 1;
5089 }
5090
5091 if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5092 const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5093 if (skip_nearest_near_mv_using_refmv_weight(
5094 x, this_mode, ref_frame_type,
5095 args->search_state->best_mbmode.mode)) {
5096 // Ensure the mode is pruned only when the current block has obtained a
5097 // valid inter mode.
5098 assert(is_inter_mode(args->search_state->best_mbmode.mode));
5099 return 1;
5100 }
5101 }
5102
5103 if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5104 ref_frame == GOLDEN_FRAME && !comp_pred) {
5105 const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5106 if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5107 args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5108 if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5109 return 1;
5110 }
5111 }
5112
5113 return 0;
5114 }
5115
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5116 static void record_best_compound(REFERENCE_MODE reference_mode,
5117 RD_STATS *rd_stats, int comp_pred, int rdmult,
5118 InterModeSearchState *search_state,
5119 int compmode_cost) {
5120 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5121
5122 if (reference_mode == REFERENCE_MODE_SELECT) {
5123 single_rate = rd_stats->rate - compmode_cost;
5124 hybrid_rate = rd_stats->rate;
5125 } else {
5126 single_rate = rd_stats->rate;
5127 hybrid_rate = rd_stats->rate + compmode_cost;
5128 }
5129
5130 single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5131 hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5132
5133 if (!comp_pred) {
5134 if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5135 search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5136 } else {
5137 if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5138 search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5139 }
5140 if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5141 search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5142 }
5143
5144 // Does a transform search over a list of the best inter mode candidates.
5145 // This is called if the original mode search computed an RD estimate
5146 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5147 static void tx_search_best_inter_candidates(
5148 AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5149 int64_t best_rd_so_far, BLOCK_SIZE bsize,
5150 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5151 InterModeSearchState *search_state, RD_STATS *rd_cost,
5152 PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5153 AV1_COMMON *const cm = &cpi->common;
5154 MACROBLOCKD *const xd = &x->e_mbd;
5155 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5156 const ModeCosts *mode_costs = &x->mode_costs;
5157 const int num_planes = av1_num_planes(cm);
5158 const int skip_ctx = av1_get_skip_txfm_context(xd);
5159 MB_MODE_INFO *const mbmi = xd->mi[0];
5160 InterModesInfo *inter_modes_info = x->inter_modes_info;
5161 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5162 search_state->best_rd = best_rd_so_far;
5163 search_state->best_mode_index = THR_INVALID;
5164 // Initialize best mode stats for winner mode processing
5165 x->winner_mode_count = 0;
5166 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5167 NULL, bsize, best_rd_so_far,
5168 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5169 inter_modes_info->num =
5170 inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5171 ? inter_modes_info->num
5172 : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5173 const int64_t top_est_rd =
5174 inter_modes_info->num > 0
5175 ? inter_modes_info
5176 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5177 : INT64_MAX;
5178 *yrd = INT64_MAX;
5179 int64_t best_rd_in_this_partition = INT64_MAX;
5180 int num_inter_mode_cands = inter_modes_info->num;
5181 int newmv_mode_evaled = 0;
5182 int max_allowed_cands = INT_MAX;
5183 if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5184 // The bound on the no. of inter mode candidates, beyond which the
5185 // candidates are limited if a newmv mode got evaluated, is set as
5186 // max_allowed_cands + 1.
5187 const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5188 assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5189 max_allowed_cands =
5190 num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5191 }
5192
5193 int num_mode_thresh = INT_MAX;
5194 if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5195 // Bound the no. of transform searches per prediction mode beyond a
5196 // threshold.
5197 const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5198 assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5199 num_mode_thresh =
5200 num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5201 }
5202
5203 int num_tx_cands = 0;
5204 int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5205 // Iterate over best inter mode candidates and perform tx search
5206 for (int j = 0; j < num_inter_mode_cands; ++j) {
5207 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5208 *mbmi = inter_modes_info->mbmi_arr[data_idx];
5209 const PREDICTION_MODE prediction_mode = mbmi->mode;
5210 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5211 if (curr_est_rd * 0.80 > top_est_rd) break;
5212
5213 if (num_tx_cands > num_mode_thresh) {
5214 if ((prediction_mode != NEARESTMV &&
5215 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5216 (prediction_mode == NEARESTMV &&
5217 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5218 continue;
5219 }
5220
5221 txfm_info->skip_txfm = 0;
5222 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5223
5224 // Select prediction reference frames.
5225 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5226 for (int i = 0; i < num_planes; i++) {
5227 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5228 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5229 }
5230
5231 bool is_predictor_built = false;
5232
5233 // Initialize RD stats
5234 RD_STATS rd_stats;
5235 RD_STATS rd_stats_y;
5236 RD_STATS rd_stats_uv;
5237 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5238 int64_t skip_rd = INT64_MAX;
5239 const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5240 cm->seq_params->enable_masked_compound,
5241 cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5242 /*eval_motion_mode=*/0);
5243 if (txfm_rd_gate_level) {
5244 // Check if the mode is good enough based on skip RD
5245 int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5246 skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5247 int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5248 skip_rd, txfm_rd_gate_level, 0);
5249 if (!eval_txfm) continue;
5250 }
5251
5252 // Build the prediction for this mode
5253 if (!is_predictor_built) {
5254 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5255 av1_num_planes(cm) - 1);
5256 }
5257 if (mbmi->motion_mode == OBMC_CAUSAL) {
5258 av1_build_obmc_inter_predictors_sb(cm, xd);
5259 }
5260
5261 num_tx_cands++;
5262 if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5263 num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5264 int64_t this_yrd = INT64_MAX;
5265 // Do the transform search
5266 if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5267 mode_rate, search_state->best_rd)) {
5268 continue;
5269 } else {
5270 const int y_rate =
5271 rd_stats.skip_txfm
5272 ? mode_costs->skip_txfm_cost[skip_ctx][1]
5273 : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5274 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5275
5276 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5277 inter_mode_data_push(
5278 tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5279 rd_stats_y.rate + rd_stats_uv.rate +
5280 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5281 }
5282 }
5283 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5284 if (rd_stats.rdcost < best_rd_in_this_partition) {
5285 best_rd_in_this_partition = rd_stats.rdcost;
5286 *yrd = this_yrd;
5287 }
5288
5289 const THR_MODES mode_enum = get_prediction_mode_idx(
5290 prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5291
5292 // Collect mode stats for multiwinner mode processing
5293 const int txfm_search_done = 1;
5294 store_winner_mode_stats(
5295 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5296 NULL, bsize, rd_stats.rdcost,
5297 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5298
5299 if (rd_stats.rdcost < search_state->best_rd) {
5300 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5301 &rd_stats_uv, mode_enum, x, txfm_search_done);
5302 search_state->best_skip_rd[0] = skip_rd;
5303 // Limit the total number of modes to be evaluated if the first is valid
5304 // and transform skip or compound
5305 if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5306 if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5307 // Evaluate more candidates at high quantizers where occurrence of
5308 // transform skip is high.
5309 const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5310 const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5311 num_inter_mode_cands =
5312 AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5313 } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5314 const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5315 // Evaluate more candidates at low quantizers where occurrence of
5316 // single reference mode is high.
5317 const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5318 { 10, 7, 5, 3 } };
5319 const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5320 num_inter_mode_cands = AOMMIN(
5321 max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5322 }
5323 }
5324 }
5325 // If the number of candidates evaluated exceeds max_allowed_cands, break if
5326 // a newmv mode was evaluated already.
5327 if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5328 }
5329 }
5330
5331 // Indicates number of winner simple translation modes to be used
5332 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5333
5334 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5335 // speed feature. This list consists of modes that have only searched
5336 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5337 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5338 static void handle_winner_cand(
5339 MB_MODE_INFO *const mbmi,
5340 motion_mode_best_st_candidate *best_motion_mode_cands,
5341 int max_winner_motion_mode_cand, int64_t this_rd,
5342 motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5343 // Number of current motion mode candidates in list
5344 const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5345 int valid_motion_mode_cand_loc = num_motion_mode_cand;
5346
5347 // find the best location to insert new motion mode candidate
5348 for (int j = 0; j < num_motion_mode_cand; j++) {
5349 if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5350 valid_motion_mode_cand_loc = j;
5351 break;
5352 }
5353 }
5354
5355 // Insert motion mode if location is found
5356 if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5357 if (num_motion_mode_cand > 0 &&
5358 valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5359 memmove(
5360 &best_motion_mode_cands
5361 ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5362 &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5363 (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5364 valid_motion_mode_cand_loc) *
5365 sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5366 motion_mode_cand->mbmi = *mbmi;
5367 motion_mode_cand->rd_cost = this_rd;
5368 motion_mode_cand->skip_motion_mode = skip_motion_mode;
5369 best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5370 *motion_mode_cand;
5371 best_motion_mode_cands->num_motion_mode_cand =
5372 AOMMIN(max_winner_motion_mode_cand,
5373 best_motion_mode_cands->num_motion_mode_cand + 1);
5374 }
5375 }
5376
5377 /*!\brief Search intra modes in interframes
5378 *
5379 * \ingroup intra_mode_search
5380 *
5381 * This function searches for the best intra mode when the current frame is an
5382 * interframe. This function however does *not* handle luma palette mode.
5383 * Palette mode is currently handled by \ref av1_search_palette_mode.
5384 *
5385 * This function will first iterate through the luma mode candidates to find the
5386 * best luma intra mode. Once the best luma mode it's found, it will then search
5387 * for the best chroma mode. Because palette mode is currently not handled by
5388 * here, a cache of uv mode is stored in
5389 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5390 * av1_search_palette_mode.
5391 *
5392 * \param[in,out] search_state Struct keep track of the prediction mode
5393 * search state in interframe.
5394 *
5395 * \param[in] cpi Top-level encoder structure.
5396 * \param[in,out] x Pointer to struct holding all the data for
5397 * the current prediction block.
5398 * \param[out] rd_cost Stores the best rd_cost among all the
5399 * prediction modes searched.
5400 * \param[in] bsize Current block size.
5401 * \param[in,out] ctx Structure to hold the number of 4x4 blks to
5402 * copy the tx_type and txfm_skip arrays.
5403 * for only the Y plane.
5404 * \param[in] sf_args Stores the list of intra mode candidates
5405 * to be searched.
5406 * \param[in] intra_ref_frame_cost The entropy cost for signaling that the
5407 * current ref frame is an intra frame.
5408 * \param[in] yrd_threshold The rdcost threshold for luma intra mode to
5409 * terminate chroma intra mode search.
5410 *
5411 * \remark If a new best mode is found, search_state and rd_costs are updated
5412 * correspondingly. While x is also modified, it is only used as a temporary
5413 * buffer, and the final decisions are stored in search_state.
5414 */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5415 static inline void search_intra_modes_in_interframe(
5416 InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5417 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5418 const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5419 int64_t yrd_threshold) {
5420 const AV1_COMMON *const cm = &cpi->common;
5421 const SPEED_FEATURES *const sf = &cpi->sf;
5422 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5423 MACROBLOCKD *const xd = &x->e_mbd;
5424 MB_MODE_INFO *const mbmi = xd->mi[0];
5425 IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5426
5427 int is_best_y_mode_intra = 0;
5428 RD_STATS best_intra_rd_stats_y;
5429 int64_t best_rd_y = INT64_MAX;
5430 int best_mode_cost_y = -1;
5431 MB_MODE_INFO best_mbmi = *xd->mi[0];
5432 THR_MODES best_mode_enum = THR_INVALID;
5433 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5434 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5435 const int num_4x4 = bsize_to_num_blk(bsize);
5436
5437 // Performs luma search
5438 int64_t best_model_rd = INT64_MAX;
5439 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5440 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5441 top_intra_model_rd[i] = INT64_MAX;
5442 }
5443 for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5444 if (sf->intra_sf.skip_intra_in_interframe &&
5445 search_state->intra_search_state.skip_intra_modes)
5446 break;
5447 set_y_mode_and_delta_angle(
5448 mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5449 assert(mbmi->mode < INTRA_MODE_END);
5450
5451 // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5452 if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5453 continue;
5454
5455 const THR_MODES mode_enum =
5456 get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5457 if ((!intra_mode_cfg->enable_smooth_intra ||
5458 cpi->sf.intra_sf.disable_smooth_intra) &&
5459 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5460 mbmi->mode == SMOOTH_V_PRED))
5461 continue;
5462 if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5463 continue;
5464 if (av1_is_directional_mode(mbmi->mode) &&
5465 !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5466 mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5467 continue;
5468 const PREDICTION_MODE this_mode = mbmi->mode;
5469
5470 assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5471 assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5472 init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5473 x->txfm_search_info.skip_txfm = 0;
5474
5475 if (this_mode != DC_PRED) {
5476 // Only search the oblique modes if the best so far is
5477 // one of the neighboring directional modes
5478 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5479 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5480 if (search_state->best_mode_index != THR_INVALID &&
5481 search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5482 continue;
5483 }
5484 if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5485 if (conditional_skipintra(
5486 this_mode, search_state->intra_search_state.best_intra_mode))
5487 continue;
5488 }
5489 }
5490
5491 RD_STATS intra_rd_stats_y;
5492 int mode_cost_y;
5493 int64_t intra_rd_y = INT64_MAX;
5494 const int is_luma_result_valid = av1_handle_intra_y_mode(
5495 intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5496 &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5497 &best_model_rd, top_intra_model_rd);
5498 if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5499 is_best_y_mode_intra = 1;
5500 if (intra_rd_y < best_rd_y) {
5501 best_intra_rd_stats_y = intra_rd_stats_y;
5502 best_mode_cost_y = mode_cost_y;
5503 best_rd_y = intra_rd_y;
5504 best_mbmi = *mbmi;
5505 best_mode_enum = mode_enum;
5506 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5507 sizeof(best_blk_skip[0]) * num_4x4);
5508 av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5509 }
5510 }
5511 }
5512
5513 if (!is_best_y_mode_intra) {
5514 return;
5515 }
5516
5517 assert(best_rd_y < INT64_MAX);
5518
5519 // Restores the best luma mode
5520 *mbmi = best_mbmi;
5521 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5522 sizeof(best_blk_skip[0]) * num_4x4);
5523 av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5524
5525 // Performs chroma search
5526 RD_STATS intra_rd_stats, intra_rd_stats_uv;
5527 av1_init_rd_stats(&intra_rd_stats);
5528 av1_init_rd_stats(&intra_rd_stats_uv);
5529 const int num_planes = av1_num_planes(cm);
5530 if (num_planes > 1) {
5531 const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5532 intra_search_state, cpi, x, bsize, &intra_rd_stats,
5533 &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5534
5535 if (!intra_uv_mode_valid) {
5536 return;
5537 }
5538 }
5539
5540 // Merge the luma and chroma rd stats
5541 assert(best_mode_cost_y >= 0);
5542 intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5543 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5544 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5545 // in the tokenonly rate, but for intra blocks, tx_size is always coded
5546 // (prediction granularity), so we account for it in the full rate,
5547 // not the tokenonly rate.
5548 best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5549 }
5550
5551 const ModeCosts *mode_costs = &x->mode_costs;
5552 const PREDICTION_MODE mode = mbmi->mode;
5553 if (num_planes > 1 && xd->is_chroma_ref) {
5554 const int uv_mode_cost =
5555 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5556 intra_rd_stats.rate +=
5557 intra_rd_stats_uv.rate +
5558 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5559 }
5560
5561 // Intra block is always coded as non-skip
5562 intra_rd_stats.skip_txfm = 0;
5563 intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5564 // Add in the cost of the no skip flag.
5565 const int skip_ctx = av1_get_skip_txfm_context(xd);
5566 intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5567 // Calculate the final RD estimate for this mode.
5568 const int64_t this_rd =
5569 RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5570 // Keep record of best intra rd
5571 if (this_rd < search_state->best_intra_rd) {
5572 search_state->best_intra_rd = this_rd;
5573 intra_search_state->best_intra_mode = mode;
5574 }
5575
5576 for (int i = 0; i < REFERENCE_MODES; ++i) {
5577 search_state->best_pred_rd[i] =
5578 AOMMIN(search_state->best_pred_rd[i], this_rd);
5579 }
5580
5581 intra_rd_stats.rdcost = this_rd;
5582
5583 // Collect mode stats for multiwinner mode processing
5584 const int txfm_search_done = 1;
5585 store_winner_mode_stats(
5586 &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5587 &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5588 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5589 if (intra_rd_stats.rdcost < search_state->best_rd) {
5590 update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5591 &best_intra_rd_stats_y, &intra_rd_stats_uv,
5592 best_mode_enum, x, txfm_search_done);
5593 }
5594 }
5595
5596 #if !CONFIG_REALTIME_ONLY
5597 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5598 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5599 static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5600 MACROBLOCK *x, BLOCK_SIZE bsize,
5601 int mi_row, int mi_col,
5602 int64_t *inter_cost,
5603 int64_t *intra_cost) {
5604 const AV1_COMMON *const cm = &cpi->common;
5605 // Only consider full SB.
5606 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5607 const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5608 const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5609 (block_size_high[sb_size] / tpl_bsize_1d);
5610 SuperBlockEnc *sb_enc = &x->sb_enc;
5611 if (sb_enc->tpl_data_count == len) {
5612 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5613 const int tpl_stride = sb_enc->tpl_stride;
5614 const int tplw = mi_size_wide[tpl_bsize];
5615 const int tplh = mi_size_high[tpl_bsize];
5616 const int nw = mi_size_wide[bsize] / tplw;
5617 const int nh = mi_size_high[bsize] / tplh;
5618 if (nw >= 1 && nh >= 1) {
5619 const int of_h = mi_row % mi_size_high[sb_size];
5620 const int of_w = mi_col % mi_size_wide[sb_size];
5621 const int start = of_h / tplh * tpl_stride + of_w / tplw;
5622
5623 for (int k = 0; k < nh; k++) {
5624 for (int l = 0; l < nw; l++) {
5625 *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5626 *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5627 }
5628 }
5629 *inter_cost /= nw * nh;
5630 *intra_cost /= nw * nh;
5631 }
5632 }
5633 }
5634 #endif // !CONFIG_REALTIME_ONLY
5635
5636 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5637 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5638 static inline void skip_intra_modes_in_interframe(
5639 AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5640 InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5641 int64_t inter_cost, int64_t intra_cost) {
5642 MACROBLOCKD *const xd = &x->e_mbd;
5643 const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5644 if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5645 bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5646 const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5647 const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5648 if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5649 x->source_variance > 128) {
5650 search_state->intra_search_state.skip_intra_modes = 1;
5651 return;
5652 }
5653 }
5654
5655 const unsigned int src_var_thresh_intra_skip = 1;
5656 const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5657 if (!(skip_intra_in_interframe &&
5658 (x->source_variance > src_var_thresh_intra_skip)))
5659 return;
5660
5661 // Prune intra search based on best inter mode being transfrom skip.
5662 if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5663 const int qindex_thresh[2] = { 200, MAXQ };
5664 const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5665 if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5666 (x->qindex <= qindex_thresh[ind])) {
5667 search_state->intra_search_state.skip_intra_modes = 1;
5668 return;
5669 } else if ((skip_intra_in_interframe >= 4) &&
5670 (inter_cost < 0 || intra_cost < 0)) {
5671 search_state->intra_search_state.skip_intra_modes = 1;
5672 return;
5673 }
5674 }
5675 // Use ML model to prune intra search.
5676 if (inter_cost >= 0 && intra_cost >= 0) {
5677 const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5678 ? &av1_intrap_nn_config
5679 : &av1_intrap_hd_nn_config;
5680 float nn_features[6];
5681 float scores[2] = { 0.0f };
5682
5683 nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5684 nn_features[1] = (float)mi_size_wide_log2[bsize];
5685 nn_features[2] = (float)mi_size_high_log2[bsize];
5686 nn_features[3] = (float)intra_cost;
5687 nn_features[4] = (float)inter_cost;
5688 const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5689 const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5690 nn_features[5] = (float)(ac_q_max / ac_q);
5691
5692 av1_nn_predict(nn_features, nn_config, 1, scores);
5693
5694 // For two parameters, the max prob returned from av1_nn_softmax equals
5695 // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5696 // calling of av1_nn_softmax.
5697 const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5698 assert(skip_intra_in_interframe <= 5);
5699 if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5700 search_state->intra_search_state.skip_intra_modes = 1;
5701 }
5702 }
5703 }
5704
skip_interp_filter_search(const AV1_COMP * cpi,int is_single_pred)5705 static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
5706 int is_single_pred) {
5707 const MODE encoding_mode = cpi->oxcf.mode;
5708 if (encoding_mode == REALTIME) {
5709 return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
5710 (cpi->sf.interp_sf.skip_interp_filter_search ||
5711 cpi->sf.winner_mode_sf.winner_mode_ifs));
5712 } else if (encoding_mode == GOOD) {
5713 // Skip interpolation filter search for single prediction modes.
5714 return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
5715 }
5716 return false;
5717 }
5718
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5719 static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
5720 BLOCK_SIZE bsize) {
5721 const AV1_COMMON *const cm = &cpi->common;
5722 const SPEED_FEATURES *const sf = &cpi->sf;
5723
5724 if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5725 !sf->rt_sf.short_circuit_low_temp_var ||
5726 !sf->rt_sf.prune_inter_modes_using_temp_var) {
5727 return 0;
5728 }
5729
5730 const int mi_row = x->e_mbd.mi_row;
5731 const int mi_col = x->e_mbd.mi_col;
5732 int is_low_temp_var = 0;
5733
5734 if (cm->seq_params->sb_size == BLOCK_64X64)
5735 is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5736 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5737 else
5738 is_low_temp_var = av1_get_force_skip_low_temp_var(
5739 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5740
5741 return is_low_temp_var;
5742 }
5743
5744 // TODO([email protected]): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5745 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5746 struct macroblock *x, struct RD_STATS *rd_cost,
5747 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5748 int64_t best_rd_so_far) {
5749 AV1_COMMON *const cm = &cpi->common;
5750 const FeatureFlags *const features = &cm->features;
5751 const int num_planes = av1_num_planes(cm);
5752 const SPEED_FEATURES *const sf = &cpi->sf;
5753 MACROBLOCKD *const xd = &x->e_mbd;
5754 MB_MODE_INFO *const mbmi = xd->mi[0];
5755 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5756 int i;
5757 const ModeCosts *mode_costs = &x->mode_costs;
5758 const int *comp_inter_cost =
5759 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5760
5761 InterModeSearchState search_state;
5762 init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5763 INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5764 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5765 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5766 };
5767 HandleInterModeArgs args = { { NULL },
5768 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5769 { NULL },
5770 { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5771 MAX_SB_SIZE >> 1 },
5772 NULL,
5773 NULL,
5774 NULL,
5775 search_state.modelled_rd,
5776 INT_MAX,
5777 INT_MAX,
5778 search_state.simple_rd,
5779 0,
5780 false,
5781 interintra_modes,
5782 { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5783 { { 0, 0 } },
5784 { 0 },
5785 0,
5786 0,
5787 -1,
5788 -1,
5789 -1,
5790 { 0 },
5791 { 0 },
5792 UINT_MAX };
5793 // Currently, is_low_temp_var is used in real time encoding.
5794 const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5795
5796 for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5797 // Indicates the appropriate number of simple translation winner modes for
5798 // exhaustive motion mode evaluation
5799 const int max_winner_motion_mode_cand =
5800 num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5801 assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5802 motion_mode_candidate motion_mode_cand;
5803 motion_mode_best_st_candidate best_motion_mode_cands;
5804 // Initializing the number of motion mode candidates to zero.
5805 best_motion_mode_cands.num_motion_mode_cand = 0;
5806 for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5807 best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5808
5809 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5810
5811 av1_invalid_rd_stats(rd_cost);
5812
5813 for (i = 0; i < REF_FRAMES; ++i) {
5814 x->warp_sample_info[i].num = -1;
5815 }
5816
5817 // Ref frames that are selected by square partition blocks.
5818 int picked_ref_frames_mask = 0;
5819 if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5820 mbmi->partition != PARTITION_NONE) {
5821 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5822 // partition blocks. prune_ref_frame_for_rect_partitions >=2
5823 // implies prune for vert, horiz and extended partition blocks.
5824 if ((mbmi->partition != PARTITION_VERT &&
5825 mbmi->partition != PARTITION_HORZ) ||
5826 sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5827 picked_ref_frames_mask =
5828 fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5829 }
5830 }
5831
5832 #if CONFIG_COLLECT_COMPONENT_TIMING
5833 start_timing(cpi, set_params_rd_pick_inter_mode_time);
5834 #endif
5835 // Skip ref frames that never selected by square blocks.
5836 const int skip_ref_frame_mask =
5837 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5838 mode_skip_mask_t mode_skip_mask;
5839 unsigned int ref_costs_single[REF_FRAMES];
5840 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5841 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5842 // init params, set frame modes, speed features
5843 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5844 skip_ref_frame_mask, ref_costs_single,
5845 ref_costs_comp, yv12_mb);
5846 #if CONFIG_COLLECT_COMPONENT_TIMING
5847 end_timing(cpi, set_params_rd_pick_inter_mode_time);
5848 #endif
5849
5850 int64_t best_est_rd = INT64_MAX;
5851 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5852 // If do_tx_search is 0, only estimated RD should be computed.
5853 // If do_tx_search is 1, all modes have TX search performed.
5854 const int do_tx_search =
5855 !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5856 (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5857 num_pels_log2_lookup[bsize] > 8));
5858 InterModesInfo *inter_modes_info = x->inter_modes_info;
5859 inter_modes_info->num = 0;
5860
5861 // Temporary buffers used by handle_inter_mode().
5862 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5863
5864 // The best RD found for the reference frame, among single reference modes.
5865 // Note that the 0-th element will contain a cut-off that is later used
5866 // to determine if we should skip a compound mode.
5867 int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5868 INT64_MAX, INT64_MAX, INT64_MAX,
5869 INT64_MAX, INT64_MAX };
5870
5871 // Prepared stats used later to check if we could skip intra mode eval.
5872 int64_t inter_cost = -1;
5873 int64_t intra_cost = -1;
5874 // Need to tweak the threshold for hdres speed 0 & 1.
5875 const int mi_row = xd->mi_row;
5876 const int mi_col = xd->mi_col;
5877
5878 // Obtain the relevant tpl stats for pruning inter modes
5879 PruneInfoFromTpl inter_cost_info_from_tpl;
5880 #if !CONFIG_REALTIME_ONLY
5881 if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5882 // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5883 // prune_ref_by_selective_ref_frame()
5884 // x->tpl_keep_ref_frame[id] = 0 => ref frame can be pruned in
5885 // prune_ref_by_selective_ref_frame()
5886 // Populating valid_refs[idx] = 1 ensures that
5887 // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5888 // pruned ref frame.
5889 int valid_refs[INTER_REFS_PER_FRAME];
5890 for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5891 const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5892 valid_refs[frame - 1] =
5893 x->tpl_keep_ref_frame[frame] ||
5894 !prune_ref_by_selective_ref_frame(
5895 cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5896 }
5897 av1_zero(inter_cost_info_from_tpl);
5898 get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5899 &inter_cost_info_from_tpl);
5900 }
5901
5902 const int do_pruning =
5903 (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5904 if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5905 cpi->oxcf.algo_cfg.enable_tpl_model)
5906 calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5907 &intra_cost);
5908 #endif // !CONFIG_REALTIME_ONLY
5909
5910 // Initialize best mode stats for winner mode processing.
5911 const int max_winner_mode_count =
5912 winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5913 zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5914 x->winner_mode_count = 0;
5915 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5916 NULL, bsize, best_rd_so_far,
5917 sf->winner_mode_sf.multi_winner_mode_type, 0);
5918
5919 int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5920 if (sf->inter_sf.prune_inter_modes_if_skippable) {
5921 // Higher multiplication factor values for lower quantizers.
5922 mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5923 }
5924
5925 // Initialize arguments for mode loop speed features
5926 InterModeSFArgs sf_args = { &args.skip_motion_mode,
5927 &mode_skip_mask,
5928 &search_state,
5929 skip_ref_frame_mask,
5930 0,
5931 mode_thresh_mul_fact,
5932 0,
5933 0 };
5934 int64_t best_inter_yrd = INT64_MAX;
5935
5936 // This is the main loop of this function. It loops over all possible inter
5937 // modes and calls handle_inter_mode() to compute the RD for each.
5938 // Here midx is just an iterator index that should not be used by itself
5939 // except to keep track of the number of modes searched. It should be used
5940 // with av1_default_mode_order to get the enum that defines the mode, which
5941 // can be used with av1_mode_defs to get the prediction mode and the ref
5942 // frames.
5943 // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5944 // good speedup for real time case. If we decide to use compound mode in real
5945 // time, maybe we can modify av1_default_mode_order table.
5946 THR_MODES mode_start = THR_INTER_MODE_START;
5947 THR_MODES mode_end = THR_INTER_MODE_END;
5948 const CurrentFrame *const current_frame = &cm->current_frame;
5949 if (current_frame->reference_mode == SINGLE_REFERENCE) {
5950 mode_start = SINGLE_REF_MODE_START;
5951 mode_end = SINGLE_REF_MODE_END;
5952 }
5953
5954 for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5955 // Get the actual prediction mode we are trying in this iteration
5956 const THR_MODES mode_enum = av1_default_mode_order[midx];
5957 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5958 const PREDICTION_MODE this_mode = mode_def->mode;
5959 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5960
5961 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5962 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5963 const int is_single_pred =
5964 ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5965 const int comp_pred = second_ref_frame > INTRA_FRAME;
5966
5967 init_mbmi(mbmi, this_mode, ref_frames, cm);
5968
5969 txfm_info->skip_txfm = 0;
5970 sf_args.num_single_modes_processed += is_single_pred;
5971 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5972 #if CONFIG_COLLECT_COMPONENT_TIMING
5973 start_timing(cpi, skip_inter_mode_time);
5974 #endif
5975 // Apply speed features to decide if this inter mode can be skipped
5976 const int is_skip_inter_mode = skip_inter_mode(
5977 cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5978 #if CONFIG_COLLECT_COMPONENT_TIMING
5979 end_timing(cpi, skip_inter_mode_time);
5980 #endif
5981 if (is_skip_inter_mode) continue;
5982
5983 // Select prediction reference frames.
5984 for (i = 0; i < num_planes; i++) {
5985 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5986 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5987 }
5988
5989 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5990 mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5991 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5992 mbmi->ref_mv_idx = 0;
5993
5994 const int64_t ref_best_rd = search_state.best_rd;
5995 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5996 av1_init_rd_stats(&rd_stats);
5997
5998 const int ref_frame_cost = comp_pred
5999 ? ref_costs_comp[ref_frame][second_ref_frame]
6000 : ref_costs_single[ref_frame];
6001 const int compmode_cost =
6002 is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6003 const int real_compmode_cost =
6004 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6005 ? compmode_cost
6006 : 0;
6007 // Point to variables that are maintained between loop iterations
6008 args.single_newmv = search_state.single_newmv;
6009 args.single_newmv_rate = search_state.single_newmv_rate;
6010 args.single_newmv_valid = search_state.single_newmv_valid;
6011 args.single_comp_cost = real_compmode_cost;
6012 args.ref_frame_cost = ref_frame_cost;
6013 args.best_pred_sse = search_state.best_pred_sse;
6014 args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6015
6016 int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6017 search_state.best_skip_rd[1] };
6018 int64_t this_yrd = INT64_MAX;
6019 #if CONFIG_COLLECT_COMPONENT_TIMING
6020 start_timing(cpi, handle_inter_mode_time);
6021 #endif
6022 int64_t this_rd = handle_inter_mode(
6023 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6024 ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6025 inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6026 &this_yrd);
6027 #if CONFIG_COLLECT_COMPONENT_TIMING
6028 end_timing(cpi, handle_inter_mode_time);
6029 #endif
6030 if (current_frame->reference_mode != SINGLE_REFERENCE) {
6031 if (!args.skip_ifs &&
6032 sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6033 is_inter_singleref_mode(this_mode)) {
6034 collect_single_states(x, &search_state, mbmi);
6035 }
6036
6037 if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6038 is_inter_singleref_mode(this_mode))
6039 update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6040 }
6041
6042 if (this_rd == INT64_MAX) continue;
6043
6044 if (mbmi->skip_txfm) {
6045 rd_stats_y.rate = 0;
6046 rd_stats_uv.rate = 0;
6047 }
6048
6049 if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6050 this_rd < ref_frame_rd[ref_frame]) {
6051 ref_frame_rd[ref_frame] = this_rd;
6052 }
6053
6054 // Did this mode help, i.e., is it the new best mode
6055 if (this_rd < search_state.best_rd) {
6056 assert(IMPLIES(comp_pred,
6057 cm->current_frame.reference_mode != SINGLE_REFERENCE));
6058 search_state.best_pred_sse = x->pred_sse[ref_frame];
6059 best_inter_yrd = this_yrd;
6060 update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6061 &rd_stats_uv, mode_enum, x, do_tx_search);
6062 if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6063 // skip_rd[0] is the best total rd for a skip mode so far.
6064 // skip_rd[1] is the best total rd for a skip mode so far in luma.
6065 // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6066 // When do_tx_search = 0, skip_rd[1] is updated.
6067 search_state.best_skip_rd[1] = skip_rd[1];
6068 }
6069 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6070 // Add this mode to motion mode candidate list for motion mode search
6071 // if using motion_mode_for_winner_cand speed feature
6072 handle_winner_cand(mbmi, &best_motion_mode_cands,
6073 max_winner_motion_mode_cand, this_rd,
6074 &motion_mode_cand, args.skip_motion_mode);
6075 }
6076
6077 /* keep record of best compound/single-only prediction */
6078 record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6079 x->rdmult, &search_state, compmode_cost);
6080 }
6081
6082 #if CONFIG_COLLECT_COMPONENT_TIMING
6083 start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6084 #endif
6085 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6086 // For the single ref winner candidates, evaluate other motion modes (non
6087 // simple translation).
6088 evaluate_motion_mode_for_winner_candidates(
6089 cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6090 &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6091 &search_state, &best_inter_yrd);
6092 }
6093 #if CONFIG_COLLECT_COMPONENT_TIMING
6094 end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6095 #endif
6096
6097 #if CONFIG_COLLECT_COMPONENT_TIMING
6098 start_timing(cpi, do_tx_search_time);
6099 #endif
6100 if (do_tx_search != 1) {
6101 // A full tx search has not yet been done, do tx search for
6102 // top mode candidates
6103 tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6104 yv12_mb, mi_row, mi_col, &search_state,
6105 rd_cost, ctx, &best_inter_yrd);
6106 }
6107 #if CONFIG_COLLECT_COMPONENT_TIMING
6108 end_timing(cpi, do_tx_search_time);
6109 #endif
6110
6111 #if CONFIG_COLLECT_COMPONENT_TIMING
6112 start_timing(cpi, handle_intra_mode_time);
6113 #endif
6114 // Gate intra mode evaluation if best of inter is skip except when source
6115 // variance is extremely low and also based on max intra bsize.
6116 skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6117 intra_cost);
6118
6119 const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6120 search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6121 &sf_args, intra_ref_frame_cost,
6122 best_inter_yrd);
6123 #if CONFIG_COLLECT_COMPONENT_TIMING
6124 end_timing(cpi, handle_intra_mode_time);
6125 #endif
6126
6127 #if CONFIG_COLLECT_COMPONENT_TIMING
6128 start_timing(cpi, refine_winner_mode_tx_time);
6129 #endif
6130 int winner_mode_count =
6131 sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6132 // In effect only when fast tx search speed features are enabled.
6133 refine_winner_mode_tx(
6134 cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6135 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6136 search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6137 #if CONFIG_COLLECT_COMPONENT_TIMING
6138 end_timing(cpi, refine_winner_mode_tx_time);
6139 #endif
6140
6141 // Initialize default mode evaluation params
6142 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6143
6144 // Only try palette mode when the best mode so far is an intra mode.
6145 const int try_palette =
6146 cpi->oxcf.tool_cfg.enable_palette &&
6147 av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6148 !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6149 RD_STATS this_rd_cost;
6150 int this_skippable = 0;
6151 if (try_palette) {
6152 #if CONFIG_COLLECT_COMPONENT_TIMING
6153 start_timing(cpi, av1_search_palette_mode_time);
6154 #endif
6155 this_skippable = av1_search_palette_mode(
6156 &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6157 ctx, &this_rd_cost, search_state.best_rd);
6158 #if CONFIG_COLLECT_COMPONENT_TIMING
6159 end_timing(cpi, av1_search_palette_mode_time);
6160 #endif
6161 if (this_rd_cost.rdcost < search_state.best_rd) {
6162 search_state.best_mode_index = THR_DC;
6163 mbmi->mv[0].as_int = 0;
6164 rd_cost->rate = this_rd_cost.rate;
6165 rd_cost->dist = this_rd_cost.dist;
6166 rd_cost->rdcost = this_rd_cost.rdcost;
6167 search_state.best_rd = rd_cost->rdcost;
6168 search_state.best_mbmode = *mbmi;
6169 search_state.best_skip2 = 0;
6170 search_state.best_mode_skippable = this_skippable;
6171 memcpy(ctx->blk_skip, txfm_info->blk_skip,
6172 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6173 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6174 }
6175 }
6176
6177 search_state.best_mbmode.skip_mode = 0;
6178 if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6179 is_comp_ref_allowed(bsize)) {
6180 const struct segmentation *const seg = &cm->seg;
6181 unsigned char segment_id = mbmi->segment_id;
6182 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6183 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6184 }
6185 }
6186
6187 // Make sure that the ref_mv_idx is only nonzero when we're
6188 // using a mode which can support ref_mv_idx
6189 if (search_state.best_mbmode.ref_mv_idx != 0 &&
6190 !(search_state.best_mbmode.mode == NEWMV ||
6191 search_state.best_mbmode.mode == NEW_NEWMV ||
6192 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6193 search_state.best_mbmode.ref_mv_idx = 0;
6194 }
6195
6196 if (search_state.best_mode_index == THR_INVALID ||
6197 search_state.best_rd >= best_rd_so_far) {
6198 rd_cost->rate = INT_MAX;
6199 rd_cost->rdcost = INT64_MAX;
6200 return;
6201 }
6202
6203 const InterpFilter interp_filter = features->interp_filter;
6204 assert((interp_filter == SWITCHABLE) ||
6205 (interp_filter ==
6206 search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6207 !is_inter_block(&search_state.best_mbmode));
6208 assert((interp_filter == SWITCHABLE) ||
6209 (interp_filter ==
6210 search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6211 !is_inter_block(&search_state.best_mbmode));
6212
6213 if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6214 av1_update_rd_thresh_fact(
6215 cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6216 search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6217 }
6218
6219 // macroblock modes
6220 *mbmi = search_state.best_mbmode;
6221 txfm_info->skip_txfm |= search_state.best_skip2;
6222
6223 // Note: this section is needed since the mode may have been forced to
6224 // GLOBALMV by the all-zero mode handling of ref-mv.
6225 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6226 // Correct the interp filters for GLOBALMV
6227 if (is_nontrans_global_motion(xd, xd->mi[0])) {
6228 int_interpfilters filters =
6229 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6230 assert(mbmi->interp_filters.as_int == filters.as_int);
6231 (void)filters;
6232 }
6233 }
6234
6235 txfm_info->skip_txfm |= search_state.best_mode_skippable;
6236
6237 assert(search_state.best_mode_index != THR_INVALID);
6238
6239 #if CONFIG_INTERNAL_STATS
6240 store_coding_context(x, ctx, search_state.best_mode_index,
6241 search_state.best_mode_skippable);
6242 #else
6243 store_coding_context(x, ctx, search_state.best_mode_skippable);
6244 #endif // CONFIG_INTERNAL_STATS
6245
6246 if (mbmi->palette_mode_info.palette_size[1] > 0) {
6247 assert(try_palette);
6248 av1_restore_uv_color_map(cpi, x);
6249 }
6250 }
6251
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6252 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6253 TileDataEnc *tile_data, MACROBLOCK *x,
6254 int mi_row, int mi_col,
6255 RD_STATS *rd_cost, BLOCK_SIZE bsize,
6256 PICK_MODE_CONTEXT *ctx,
6257 int64_t best_rd_so_far) {
6258 const AV1_COMMON *const cm = &cpi->common;
6259 const FeatureFlags *const features = &cm->features;
6260 MACROBLOCKD *const xd = &x->e_mbd;
6261 MB_MODE_INFO *const mbmi = xd->mi[0];
6262 unsigned char segment_id = mbmi->segment_id;
6263 const int comp_pred = 0;
6264 int i;
6265 unsigned int ref_costs_single[REF_FRAMES];
6266 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6267 const ModeCosts *mode_costs = &x->mode_costs;
6268 const int *comp_inter_cost =
6269 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6270 InterpFilter best_filter = SWITCHABLE;
6271 int64_t this_rd = INT64_MAX;
6272 int rate2 = 0;
6273 const int64_t distortion2 = 0;
6274 (void)mi_row;
6275 (void)mi_col;
6276 (void)tile_data;
6277
6278 av1_collect_neighbors_ref_counts(xd);
6279
6280 estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6281 ref_costs_comp);
6282
6283 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6284 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6285
6286 rd_cost->rate = INT_MAX;
6287
6288 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6289
6290 mbmi->palette_mode_info.palette_size[0] = 0;
6291 mbmi->palette_mode_info.palette_size[1] = 0;
6292 mbmi->filter_intra_mode_info.use_filter_intra = 0;
6293 mbmi->mode = GLOBALMV;
6294 mbmi->motion_mode = SIMPLE_TRANSLATION;
6295 mbmi->uv_mode = UV_DC_PRED;
6296 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6297 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6298 else
6299 mbmi->ref_frame[0] = LAST_FRAME;
6300 mbmi->ref_frame[1] = NONE_FRAME;
6301 mbmi->mv[0].as_int =
6302 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6303 features->allow_high_precision_mv, bsize, mi_col,
6304 mi_row, features->cur_frame_force_integer_mv)
6305 .as_int;
6306 mbmi->tx_size = max_txsize_lookup[bsize];
6307 x->txfm_search_info.skip_txfm = 1;
6308
6309 mbmi->ref_mv_idx = 0;
6310
6311 mbmi->motion_mode = SIMPLE_TRANSLATION;
6312 av1_count_overlappable_neighbors(cm, xd);
6313 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6314 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6315 mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6316 // Select the samples according to motion vector difference
6317 if (mbmi->num_proj_ref > 1) {
6318 mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6319 mbmi->num_proj_ref, bsize);
6320 }
6321 }
6322
6323 const InterpFilter interp_filter = features->interp_filter;
6324 set_default_interp_filters(mbmi, interp_filter);
6325
6326 if (interp_filter != SWITCHABLE) {
6327 best_filter = interp_filter;
6328 } else {
6329 best_filter = EIGHTTAP_REGULAR;
6330 if (av1_is_interp_needed(xd)) {
6331 int rs;
6332 int best_rs = INT_MAX;
6333 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6334 mbmi->interp_filters = av1_broadcast_interp_filter(i);
6335 rs = av1_get_switchable_rate(x, xd, interp_filter,
6336 cm->seq_params->enable_dual_filter);
6337 if (rs < best_rs) {
6338 best_rs = rs;
6339 best_filter = mbmi->interp_filters.as_filters.y_filter;
6340 }
6341 }
6342 }
6343 }
6344 // Set the appropriate filter
6345 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6346 rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6347 cm->seq_params->enable_dual_filter);
6348
6349 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6350 rate2 += comp_inter_cost[comp_pred];
6351
6352 // Estimate the reference frame signaling cost and add it
6353 // to the rolling cost variable.
6354 rate2 += ref_costs_single[LAST_FRAME];
6355 this_rd = RDCOST(x->rdmult, rate2, distortion2);
6356
6357 rd_cost->rate = rate2;
6358 rd_cost->dist = distortion2;
6359 rd_cost->rdcost = this_rd;
6360
6361 if (this_rd >= best_rd_so_far) {
6362 rd_cost->rate = INT_MAX;
6363 rd_cost->rdcost = INT64_MAX;
6364 return;
6365 }
6366
6367 assert((interp_filter == SWITCHABLE) ||
6368 (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6369
6370 if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6371 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6372 cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6373 THR_GLOBALMV, THR_INTER_MODE_START,
6374 THR_INTER_MODE_END, THR_DC, MAX_MODES);
6375 }
6376
6377 #if CONFIG_INTERNAL_STATS
6378 store_coding_context(x, ctx, THR_GLOBALMV, 0);
6379 #else
6380 store_coding_context(x, ctx, 0);
6381 #endif // CONFIG_INTERNAL_STATS
6382 }
6383
6384 /*!\cond */
6385 struct calc_target_weighted_pred_ctxt {
6386 const OBMCBuffer *obmc_buffer;
6387 const uint8_t *tmp;
6388 int tmp_stride;
6389 int overlap;
6390 };
6391 /*!\endcond */
6392
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6393 static inline void calc_target_weighted_pred_above(
6394 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6395 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6396 (void)nb_mi;
6397 (void)num_planes;
6398 (void)rel_mi_row;
6399 (void)dir;
6400
6401 struct calc_target_weighted_pred_ctxt *ctxt =
6402 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6403
6404 const int bw = xd->width << MI_SIZE_LOG2;
6405 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6406
6407 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6408 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6409 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6410 const int is_hbd = is_cur_buf_hbd(xd);
6411
6412 if (!is_hbd) {
6413 for (int row = 0; row < ctxt->overlap; ++row) {
6414 const uint8_t m0 = mask1d[row];
6415 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6416 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6417 wsrc[col] = m1 * tmp[col];
6418 mask[col] = m0;
6419 }
6420 wsrc += bw;
6421 mask += bw;
6422 tmp += ctxt->tmp_stride;
6423 }
6424 } else {
6425 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6426
6427 for (int row = 0; row < ctxt->overlap; ++row) {
6428 const uint8_t m0 = mask1d[row];
6429 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6430 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6431 wsrc[col] = m1 * tmp16[col];
6432 mask[col] = m0;
6433 }
6434 wsrc += bw;
6435 mask += bw;
6436 tmp16 += ctxt->tmp_stride;
6437 }
6438 }
6439 }
6440
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6441 static inline void calc_target_weighted_pred_left(
6442 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6443 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6444 (void)nb_mi;
6445 (void)num_planes;
6446 (void)rel_mi_col;
6447 (void)dir;
6448
6449 struct calc_target_weighted_pred_ctxt *ctxt =
6450 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6451
6452 const int bw = xd->width << MI_SIZE_LOG2;
6453 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6454
6455 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6456 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6457 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6458 const int is_hbd = is_cur_buf_hbd(xd);
6459
6460 if (!is_hbd) {
6461 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6462 for (int col = 0; col < ctxt->overlap; ++col) {
6463 const uint8_t m0 = mask1d[col];
6464 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6465 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6466 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6467 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6468 }
6469 wsrc += bw;
6470 mask += bw;
6471 tmp += ctxt->tmp_stride;
6472 }
6473 } else {
6474 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6475
6476 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6477 for (int col = 0; col < ctxt->overlap; ++col) {
6478 const uint8_t m0 = mask1d[col];
6479 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6480 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6481 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6482 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6483 }
6484 wsrc += bw;
6485 mask += bw;
6486 tmp16 += ctxt->tmp_stride;
6487 }
6488 }
6489 }
6490
6491 // This function has a structure similar to av1_build_obmc_inter_prediction
6492 //
6493 // The OBMC predictor is computed as:
6494 //
6495 // PObmc(x,y) =
6496 // AOM_BLEND_A64(Mh(x),
6497 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6498 // PLeft(x, y))
6499 //
6500 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6501 // rounding, this can be written as:
6502 //
6503 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6504 // Mh(x) * Mv(y) * P(x,y) +
6505 // Mh(x) * Cv(y) * Pabove(x,y) +
6506 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6507 //
6508 // Where :
6509 //
6510 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6511 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6512 //
6513 // This function computes 'wsrc' and 'mask' as:
6514 //
6515 // wsrc(x, y) =
6516 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6517 // Mh(x) * Cv(y) * Pabove(x,y) +
6518 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6519 //
6520 // mask(x, y) = Mh(x) * Mv(y)
6521 //
6522 // These can then be used to efficiently approximate the error for any
6523 // predictor P in the context of the provided neighbouring predictors by
6524 // computing:
6525 //
6526 // error(x, y) =
6527 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6528 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6529 static inline void calc_target_weighted_pred(
6530 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6531 const uint8_t *above, int above_stride, const uint8_t *left,
6532 int left_stride) {
6533 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6534 const int bw = xd->width << MI_SIZE_LOG2;
6535 const int bh = xd->height << MI_SIZE_LOG2;
6536 const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6537 int32_t *mask_buf = obmc_buffer->mask;
6538 int32_t *wsrc_buf = obmc_buffer->wsrc;
6539
6540 const int is_hbd = is_cur_buf_hbd(xd);
6541 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6542
6543 // plane 0 should not be sub-sampled
6544 assert(xd->plane[0].subsampling_x == 0);
6545 assert(xd->plane[0].subsampling_y == 0);
6546
6547 av1_zero_array(wsrc_buf, bw * bh);
6548 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6549
6550 // handle above row
6551 if (xd->up_available) {
6552 const int overlap =
6553 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6554 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6555 above_stride, overlap };
6556 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6557 max_neighbor_obmc[mi_size_wide_log2[bsize]],
6558 calc_target_weighted_pred_above, &ctxt);
6559 }
6560
6561 for (int i = 0; i < bw * bh; ++i) {
6562 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6563 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6564 }
6565
6566 // handle left column
6567 if (xd->left_available) {
6568 const int overlap =
6569 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6570 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6571 left_stride, overlap };
6572 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6573 max_neighbor_obmc[mi_size_high_log2[bsize]],
6574 calc_target_weighted_pred_left, &ctxt);
6575 }
6576
6577 if (!is_hbd) {
6578 const uint8_t *src = x->plane[0].src.buf;
6579
6580 for (int row = 0; row < bh; ++row) {
6581 for (int col = 0; col < bw; ++col) {
6582 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6583 }
6584 wsrc_buf += bw;
6585 src += x->plane[0].src.stride;
6586 }
6587 } else {
6588 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6589
6590 for (int row = 0; row < bh; ++row) {
6591 for (int col = 0; col < bw; ++col) {
6592 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6593 }
6594 wsrc_buf += bw;
6595 src += x->plane[0].src.stride;
6596 }
6597 }
6598 }
6599