xref: /aosp_15_r20/external/libaom/av1/encoder/rdopt.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44 
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73 
74 #define LAST_NEW_MV_INDEX 6
75 
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81   10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82   9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
83   9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
84   9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
85   9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
86   8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
87   8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
88   8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
89   8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
90   7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
91   7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
92   7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
93   7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
94   6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
95   6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
96   6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
97   5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
98   5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
99   5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
100   5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
101   4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
102   4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
103   4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
104   4144,  4120,  4096
105 };
106 
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108   THR_NEARESTMV,
109   THR_NEARESTL2,
110   THR_NEARESTL3,
111   THR_NEARESTB,
112   THR_NEARESTA2,
113   THR_NEARESTA,
114   THR_NEARESTG,
115 
116   THR_NEWMV,
117   THR_NEWL2,
118   THR_NEWL3,
119   THR_NEWB,
120   THR_NEWA2,
121   THR_NEWA,
122   THR_NEWG,
123 
124   THR_NEARMV,
125   THR_NEARL2,
126   THR_NEARL3,
127   THR_NEARB,
128   THR_NEARA2,
129   THR_NEARA,
130   THR_NEARG,
131 
132   THR_GLOBALMV,
133   THR_GLOBALL2,
134   THR_GLOBALL3,
135   THR_GLOBALB,
136   THR_GLOBALA2,
137   THR_GLOBALA,
138   THR_GLOBALG,
139 
140   THR_COMP_NEAREST_NEARESTLA,
141   THR_COMP_NEAREST_NEARESTL2A,
142   THR_COMP_NEAREST_NEARESTL3A,
143   THR_COMP_NEAREST_NEARESTGA,
144   THR_COMP_NEAREST_NEARESTLB,
145   THR_COMP_NEAREST_NEARESTL2B,
146   THR_COMP_NEAREST_NEARESTL3B,
147   THR_COMP_NEAREST_NEARESTGB,
148   THR_COMP_NEAREST_NEARESTLA2,
149   THR_COMP_NEAREST_NEARESTL2A2,
150   THR_COMP_NEAREST_NEARESTL3A2,
151   THR_COMP_NEAREST_NEARESTGA2,
152   THR_COMP_NEAREST_NEARESTLL2,
153   THR_COMP_NEAREST_NEARESTLL3,
154   THR_COMP_NEAREST_NEARESTLG,
155   THR_COMP_NEAREST_NEARESTBA,
156 
157   THR_COMP_NEAR_NEARLB,
158   THR_COMP_NEW_NEWLB,
159   THR_COMP_NEW_NEARESTLB,
160   THR_COMP_NEAREST_NEWLB,
161   THR_COMP_NEW_NEARLB,
162   THR_COMP_NEAR_NEWLB,
163   THR_COMP_GLOBAL_GLOBALLB,
164 
165   THR_COMP_NEAR_NEARLA,
166   THR_COMP_NEW_NEWLA,
167   THR_COMP_NEW_NEARESTLA,
168   THR_COMP_NEAREST_NEWLA,
169   THR_COMP_NEW_NEARLA,
170   THR_COMP_NEAR_NEWLA,
171   THR_COMP_GLOBAL_GLOBALLA,
172 
173   THR_COMP_NEAR_NEARL2A,
174   THR_COMP_NEW_NEWL2A,
175   THR_COMP_NEW_NEARESTL2A,
176   THR_COMP_NEAREST_NEWL2A,
177   THR_COMP_NEW_NEARL2A,
178   THR_COMP_NEAR_NEWL2A,
179   THR_COMP_GLOBAL_GLOBALL2A,
180 
181   THR_COMP_NEAR_NEARL3A,
182   THR_COMP_NEW_NEWL3A,
183   THR_COMP_NEW_NEARESTL3A,
184   THR_COMP_NEAREST_NEWL3A,
185   THR_COMP_NEW_NEARL3A,
186   THR_COMP_NEAR_NEWL3A,
187   THR_COMP_GLOBAL_GLOBALL3A,
188 
189   THR_COMP_NEAR_NEARGA,
190   THR_COMP_NEW_NEWGA,
191   THR_COMP_NEW_NEARESTGA,
192   THR_COMP_NEAREST_NEWGA,
193   THR_COMP_NEW_NEARGA,
194   THR_COMP_NEAR_NEWGA,
195   THR_COMP_GLOBAL_GLOBALGA,
196 
197   THR_COMP_NEAR_NEARL2B,
198   THR_COMP_NEW_NEWL2B,
199   THR_COMP_NEW_NEARESTL2B,
200   THR_COMP_NEAREST_NEWL2B,
201   THR_COMP_NEW_NEARL2B,
202   THR_COMP_NEAR_NEWL2B,
203   THR_COMP_GLOBAL_GLOBALL2B,
204 
205   THR_COMP_NEAR_NEARL3B,
206   THR_COMP_NEW_NEWL3B,
207   THR_COMP_NEW_NEARESTL3B,
208   THR_COMP_NEAREST_NEWL3B,
209   THR_COMP_NEW_NEARL3B,
210   THR_COMP_NEAR_NEWL3B,
211   THR_COMP_GLOBAL_GLOBALL3B,
212 
213   THR_COMP_NEAR_NEARGB,
214   THR_COMP_NEW_NEWGB,
215   THR_COMP_NEW_NEARESTGB,
216   THR_COMP_NEAREST_NEWGB,
217   THR_COMP_NEW_NEARGB,
218   THR_COMP_NEAR_NEWGB,
219   THR_COMP_GLOBAL_GLOBALGB,
220 
221   THR_COMP_NEAR_NEARLA2,
222   THR_COMP_NEW_NEWLA2,
223   THR_COMP_NEW_NEARESTLA2,
224   THR_COMP_NEAREST_NEWLA2,
225   THR_COMP_NEW_NEARLA2,
226   THR_COMP_NEAR_NEWLA2,
227   THR_COMP_GLOBAL_GLOBALLA2,
228 
229   THR_COMP_NEAR_NEARL2A2,
230   THR_COMP_NEW_NEWL2A2,
231   THR_COMP_NEW_NEARESTL2A2,
232   THR_COMP_NEAREST_NEWL2A2,
233   THR_COMP_NEW_NEARL2A2,
234   THR_COMP_NEAR_NEWL2A2,
235   THR_COMP_GLOBAL_GLOBALL2A2,
236 
237   THR_COMP_NEAR_NEARL3A2,
238   THR_COMP_NEW_NEWL3A2,
239   THR_COMP_NEW_NEARESTL3A2,
240   THR_COMP_NEAREST_NEWL3A2,
241   THR_COMP_NEW_NEARL3A2,
242   THR_COMP_NEAR_NEWL3A2,
243   THR_COMP_GLOBAL_GLOBALL3A2,
244 
245   THR_COMP_NEAR_NEARGA2,
246   THR_COMP_NEW_NEWGA2,
247   THR_COMP_NEW_NEARESTGA2,
248   THR_COMP_NEAREST_NEWGA2,
249   THR_COMP_NEW_NEARGA2,
250   THR_COMP_NEAR_NEWGA2,
251   THR_COMP_GLOBAL_GLOBALGA2,
252 
253   THR_COMP_NEAR_NEARLL2,
254   THR_COMP_NEW_NEWLL2,
255   THR_COMP_NEW_NEARESTLL2,
256   THR_COMP_NEAREST_NEWLL2,
257   THR_COMP_NEW_NEARLL2,
258   THR_COMP_NEAR_NEWLL2,
259   THR_COMP_GLOBAL_GLOBALLL2,
260 
261   THR_COMP_NEAR_NEARLL3,
262   THR_COMP_NEW_NEWLL3,
263   THR_COMP_NEW_NEARESTLL3,
264   THR_COMP_NEAREST_NEWLL3,
265   THR_COMP_NEW_NEARLL3,
266   THR_COMP_NEAR_NEWLL3,
267   THR_COMP_GLOBAL_GLOBALLL3,
268 
269   THR_COMP_NEAR_NEARLG,
270   THR_COMP_NEW_NEWLG,
271   THR_COMP_NEW_NEARESTLG,
272   THR_COMP_NEAREST_NEWLG,
273   THR_COMP_NEW_NEARLG,
274   THR_COMP_NEAR_NEWLG,
275   THR_COMP_GLOBAL_GLOBALLG,
276 
277   THR_COMP_NEAR_NEARBA,
278   THR_COMP_NEW_NEWBA,
279   THR_COMP_NEW_NEARESTBA,
280   THR_COMP_NEAREST_NEWBA,
281   THR_COMP_NEW_NEARBA,
282   THR_COMP_NEAR_NEWBA,
283   THR_COMP_GLOBAL_GLOBALBA,
284 
285   THR_DC,
286   THR_PAETH,
287   THR_SMOOTH,
288   THR_SMOOTH_V,
289   THR_SMOOTH_H,
290   THR_H_PRED,
291   THR_V_PRED,
292   THR_D135_PRED,
293   THR_D203_PRED,
294   THR_D157_PRED,
295   THR_D67_PRED,
296   THR_D113_PRED,
297   THR_D45_PRED,
298 };
299 
300 /*!\cond */
301 typedef struct SingleInterModeState {
302   int64_t rd;
303   MV_REFERENCE_FRAME ref_frame;
304   int valid;
305 } SingleInterModeState;
306 
307 typedef struct InterModeSearchState {
308   int64_t best_rd;
309   int64_t best_skip_rd[2];
310   MB_MODE_INFO best_mbmode;
311   int best_rate_y;
312   int best_rate_uv;
313   int best_mode_skippable;
314   int best_skip2;
315   THR_MODES best_mode_index;
316   int num_available_refs;
317   int64_t dist_refs[REF_FRAMES];
318   int dist_order_refs[REF_FRAMES];
319   int64_t mode_threshold[MAX_MODES];
320   int64_t best_intra_rd;
321   unsigned int best_pred_sse;
322 
323   /*!
324    * \brief Keep track of best intra rd for use in compound mode.
325    */
326   int64_t best_pred_rd[REFERENCE_MODES];
327   // Save a set of single_newmv for each checked ref_mv.
328   int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329   int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330   int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331   int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332   // The rd of simple translation in single inter modes
333   int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334   int64_t best_single_rd[REF_FRAMES];
335   PREDICTION_MODE best_single_mode[REF_FRAMES];
336 
337   // Single search results by [directions][modes][reference frames]
338   SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339   int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340   SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341                                             [FWD_REFS];
342   int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343   MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344   IntraModeSearchState intra_search_state;
345   RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348 
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350   for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351     InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352     md->ready = 0;
353     md->num = 0;
354     md->dist_sum = 0;
355     md->ld_sum = 0;
356     md->sse_sum = 0;
357     md->sse_sse_sum = 0;
358     md->sse_ld_sum = 0;
359   }
360 }
361 
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363                              int64_t sse, int *est_residue_cost,
364                              int64_t *est_dist) {
365   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366   if (md->ready) {
367     if (sse < md->dist_mean) {
368       *est_residue_cost = 0;
369       *est_dist = sse;
370     } else {
371       *est_dist = (int64_t)round(md->dist_mean);
372       const double est_ld = md->a * sse + md->b;
373       // Clamp estimated rate cost by INT_MAX / 2.
374       // TODO([email protected]): find better solution than clamping.
375       if (fabs(est_ld) < 1e-2) {
376         *est_residue_cost = INT_MAX / 2;
377       } else {
378         double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379         if (est_residue_cost_dbl < 0) {
380           *est_residue_cost = 0;
381         } else {
382           *est_residue_cost =
383               (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384         }
385       }
386       if (*est_residue_cost <= 0) {
387         *est_residue_cost = 0;
388         *est_dist = sse;
389       }
390     }
391     return 1;
392   }
393   return 0;
394 }
395 
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398     const int block_idx = inter_mode_data_block_idx(bsize);
399     InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400     if (block_idx == -1) continue;
401     if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402       continue;
403     } else {
404       if (md->ready == 0) {
405         md->dist_mean = md->dist_sum / md->num;
406         md->ld_mean = md->ld_sum / md->num;
407         md->sse_mean = md->sse_sum / md->num;
408         md->sse_sse_mean = md->sse_sse_sum / md->num;
409         md->sse_ld_mean = md->sse_ld_sum / md->num;
410       } else {
411         const double factor = 3;
412         md->dist_mean =
413             (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414         md->ld_mean =
415             (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416         md->sse_mean =
417             (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418         md->sse_sse_mean =
419             (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420             (factor + 1);
421         md->sse_ld_mean =
422             (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423             (factor + 1);
424       }
425 
426       const double my = md->ld_mean;
427       const double mx = md->sse_mean;
428       const double dx = sqrt(md->sse_sse_mean);
429       const double dxy = md->sse_ld_mean;
430 
431       md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432       md->b = my - md->a * mx;
433       md->ready = 1;
434 
435       md->num = 0;
436       md->dist_sum = 0;
437       md->ld_sum = 0;
438       md->sse_sum = 0;
439       md->sse_sse_sum = 0;
440       md->sse_ld_sum = 0;
441     }
442     (void)rdmult;
443   }
444 }
445 
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static inline void inter_mode_data_push(TileDataEnc *tile_data,
447                                         BLOCK_SIZE bsize, int64_t sse,
448                                         int64_t dist, int residue_cost) {
449   if (residue_cost == 0 || sse == dist) return;
450   const int block_idx = inter_mode_data_block_idx(bsize);
451   if (block_idx == -1) return;
452   InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453   if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454     const double ld = (sse - dist) * 1. / residue_cost;
455     ++rd_model->num;
456     rd_model->dist_sum += dist;
457     rd_model->ld_sum += ld;
458     rd_model->sse_sum += sse;
459     rd_model->sse_sse_sum += (double)sse * (double)sse;
460     rd_model->sse_ld_sum += sse * ld;
461   }
462 }
463 
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
465                                          int mode_rate, int64_t sse, int64_t rd,
466                                          RD_STATS *rd_cost, RD_STATS *rd_cost_y,
467                                          RD_STATS *rd_cost_uv,
468                                          const MB_MODE_INFO *mbmi) {
469   const int num = inter_modes_info->num;
470   assert(num < MAX_INTER_MODES);
471   inter_modes_info->mbmi_arr[num] = *mbmi;
472   inter_modes_info->mode_rate_arr[num] = mode_rate;
473   inter_modes_info->sse_arr[num] = sse;
474   inter_modes_info->est_rd_arr[num] = rd;
475   inter_modes_info->rd_cost_arr[num] = *rd_cost;
476   inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
477   inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
478   ++inter_modes_info->num;
479 }
480 
compare_rd_idx_pair(const void * a,const void * b)481 static int compare_rd_idx_pair(const void *a, const void *b) {
482   if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
483     // To avoid inconsistency in qsort() ordering when two elements are equal,
484     // using idx as tie breaker. Refer aomedia:2928
485     if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
486       return 0;
487     else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
488       return 1;
489     else
490       return -1;
491   } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
492     return 1;
493   } else {
494     return -1;
495   }
496 }
497 
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)498 static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
499                                          RdIdxPair *rd_idx_pair_arr) {
500   if (inter_modes_info->num == 0) {
501     return;
502   }
503   for (int i = 0; i < inter_modes_info->num; ++i) {
504     rd_idx_pair_arr[i].idx = i;
505     rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
506   }
507   qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
508         compare_rd_idx_pair);
509 }
510 
511 // Similar to get_horver_correlation, but also takes into account first
512 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)513 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
514                                        int width, int height, float *hcorr,
515                                        float *vcorr) {
516   // The following notation is used:
517   // x - current pixel
518   // y - left neighbor pixel
519   // z - top neighbor pixel
520   int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
521   int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
522   int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
523 
524   // First, process horizontal correlation on just the first row
525   x_sum += diff[0];
526   x2_sum += diff[0] * diff[0];
527   x_firstrow += diff[0];
528   x2_firstrow += diff[0] * diff[0];
529   for (int j = 1; j < width; ++j) {
530     const int16_t x = diff[j];
531     const int16_t y = diff[j - 1];
532     x_sum += x;
533     x_firstrow += x;
534     x2_sum += x * x;
535     x2_firstrow += x * x;
536     xy_sum += x * y;
537   }
538 
539   // Process vertical correlation in the first column
540   x_firstcol += diff[0];
541   x2_firstcol += diff[0] * diff[0];
542   for (int i = 1; i < height; ++i) {
543     const int16_t x = diff[i * stride];
544     const int16_t z = diff[(i - 1) * stride];
545     x_sum += x;
546     x_firstcol += x;
547     x2_sum += x * x;
548     x2_firstcol += x * x;
549     xz_sum += x * z;
550   }
551 
552   // Now process horiz and vert correlation through the rest unit
553   for (int i = 1; i < height; ++i) {
554     for (int j = 1; j < width; ++j) {
555       const int16_t x = diff[i * stride + j];
556       const int16_t y = diff[i * stride + j - 1];
557       const int16_t z = diff[(i - 1) * stride + j];
558       x_sum += x;
559       x2_sum += x * x;
560       xy_sum += x * y;
561       xz_sum += x * z;
562     }
563   }
564 
565   for (int j = 0; j < width; ++j) {
566     x_finalrow += diff[(height - 1) * stride + j];
567     x2_finalrow +=
568         diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
569   }
570   for (int i = 0; i < height; ++i) {
571     x_finalcol += diff[i * stride + width - 1];
572     x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
573   }
574 
575   int64_t xhor_sum = x_sum - x_finalcol;
576   int64_t xver_sum = x_sum - x_finalrow;
577   int64_t y_sum = x_sum - x_firstcol;
578   int64_t z_sum = x_sum - x_firstrow;
579   int64_t x2hor_sum = x2_sum - x2_finalcol;
580   int64_t x2ver_sum = x2_sum - x2_finalrow;
581   int64_t y2_sum = x2_sum - x2_firstcol;
582   int64_t z2_sum = x2_sum - x2_firstrow;
583 
584   const float num_hor = (float)(height * (width - 1));
585   const float num_ver = (float)((height - 1) * width);
586 
587   const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
588   const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
589 
590   const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
591   const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
592 
593   const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
594   const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
595 
596   if (xhor_var_n > 0 && y_var_n > 0) {
597     *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
598     *hcorr = *hcorr < 0 ? 0 : *hcorr;
599   } else {
600     *hcorr = 1.0;
601   }
602   if (xver_var_n > 0 && z_var_n > 0) {
603     *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
604     *vcorr = *vcorr < 0 ? 0 : *vcorr;
605   } else {
606     *vcorr = 1.0;
607   }
608 }
609 
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)610 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
611                        int64_t *sse_y) {
612   const AV1_COMMON *cm = &cpi->common;
613   const int num_planes = av1_num_planes(cm);
614   const MACROBLOCKD *xd = &x->e_mbd;
615   const MB_MODE_INFO *mbmi = xd->mi[0];
616   int64_t total_sse = 0;
617   for (int plane = 0; plane < num_planes; ++plane) {
618     if (plane && !xd->is_chroma_ref) break;
619     const struct macroblock_plane *const p = &x->plane[plane];
620     const struct macroblockd_plane *const pd = &xd->plane[plane];
621     const BLOCK_SIZE bs =
622         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
623     unsigned int sse;
624 
625     cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
626                             pd->dst.stride, &sse);
627     total_sse += sse;
628     if (!plane && sse_y) *sse_y = sse;
629   }
630   total_sse <<= 4;
631   return total_sse;
632 }
633 
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)634 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
635                           intptr_t block_size, int64_t *ssz) {
636   int i;
637   int64_t error = 0, sqcoeff = 0;
638 
639   for (i = 0; i < block_size; i++) {
640     const int diff = coeff[i] - dqcoeff[i];
641     error += diff * diff;
642     sqcoeff += coeff[i] * coeff[i];
643   }
644 
645   *ssz = sqcoeff;
646   return error;
647 }
648 
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)649 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
650                              intptr_t block_size) {
651   int64_t error = 0;
652 
653   for (int i = 0; i < block_size; i++) {
654     const int diff = coeff[i] - dqcoeff[i];
655     error += diff * diff;
656   }
657 
658   return error;
659 }
660 
661 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)662 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
663                                  const tran_low_t *dqcoeff, intptr_t block_size,
664                                  int64_t *ssz, int bd) {
665   int i;
666   int64_t error = 0, sqcoeff = 0;
667   int shift = 2 * (bd - 8);
668   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
669 
670   for (i = 0; i < block_size; i++) {
671     const int64_t diff = coeff[i] - dqcoeff[i];
672     error += diff * diff;
673     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
674   }
675   assert(error >= 0 && sqcoeff >= 0);
676   error = (error + rounding) >> shift;
677   sqcoeff = (sqcoeff + rounding) >> shift;
678 
679   *ssz = sqcoeff;
680   return error;
681 }
682 #endif
683 
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)684 static int conditional_skipintra(PREDICTION_MODE mode,
685                                  PREDICTION_MODE best_intra_mode) {
686   if (mode == D113_PRED && best_intra_mode != V_PRED &&
687       best_intra_mode != D135_PRED)
688     return 1;
689   if (mode == D67_PRED && best_intra_mode != V_PRED &&
690       best_intra_mode != D45_PRED)
691     return 1;
692   if (mode == D203_PRED && best_intra_mode != H_PRED &&
693       best_intra_mode != D45_PRED)
694     return 1;
695   if (mode == D157_PRED && best_intra_mode != H_PRED &&
696       best_intra_mode != D135_PRED)
697     return 1;
698   return 0;
699 }
700 
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)701 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
702                        int16_t mode_context) {
703   if (is_inter_compound_mode(mode)) {
704     return mode_costs
705         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
706   }
707 
708   int mode_cost = 0;
709   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
710 
711   assert(is_inter_mode(mode));
712 
713   if (mode == NEWMV) {
714     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
715     return mode_cost;
716   } else {
717     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
718     mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
719 
720     if (mode == GLOBALMV) {
721       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
722       return mode_cost;
723     } else {
724       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
725       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
726       mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
727       return mode_cost;
728     }
729   }
730 }
731 
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)732 static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
733                                               int ref_idx) {
734   return ref_idx ? compound_ref1_mode(this_mode)
735                  : compound_ref0_mode(this_mode);
736 }
737 
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])738 static inline void estimate_ref_frame_costs(
739     const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
740     int segment_id, unsigned int *ref_costs_single,
741     unsigned int (*ref_costs_comp)[REF_FRAMES]) {
742   int seg_ref_active =
743       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
744   if (seg_ref_active) {
745     memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
746     int ref_frame;
747     for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
748       memset(ref_costs_comp[ref_frame], 0,
749              REF_FRAMES * sizeof((*ref_costs_comp)[0]));
750   } else {
751     int intra_inter_ctx = av1_get_intra_inter_context(xd);
752     ref_costs_single[INTRA_FRAME] =
753         mode_costs->intra_inter_cost[intra_inter_ctx][0];
754     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
755 
756     for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
757       ref_costs_single[i] = base_cost;
758 
759     const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
760     const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
761     const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
762     const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
763     const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
764     const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
765 
766     // Determine cost of a single ref frame, where frame types are represented
767     // by a tree:
768     // Level 0: add cost whether this ref is a forward or backward ref
769     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
770     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
774     ref_costs_single[ALTREF2_FRAME] +=
775         mode_costs->single_ref_cost[ctx_p1][0][1];
776     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
777 
778     // Level 1: if this ref is forward ref,
779     // add cost whether it is last/last2 or last3/golden
780     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
781     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
783     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784 
785     // Level 1: if this ref is backward ref
786     // then add cost whether this ref is altref or backward ref
787     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
788     ref_costs_single[ALTREF2_FRAME] +=
789         mode_costs->single_ref_cost[ctx_p2][1][0];
790     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
791 
792     // Level 2: further add cost whether this ref is last or last2
793     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
794     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
795 
796     // Level 2: last3 or golden
797     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
798     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
799 
800     // Level 2: bwdref or altref2
801     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
802     ref_costs_single[ALTREF2_FRAME] +=
803         mode_costs->single_ref_cost[ctx_p6][5][1];
804 
805     if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
806       // Similar to single ref, determine cost of compound ref frames.
807       // cost_compound_refs = cost_first_ref + cost_second_ref
808       const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
809       const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
810       const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
811       const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
812       const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
813 
814       const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
815       unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
816 
817       ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
818           ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
819               base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
820       ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
821       ref_bicomp_costs[ALTREF_FRAME] = 0;
822 
823       // cost of first ref frame
824       ref_bicomp_costs[LAST_FRAME] +=
825           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
826       ref_bicomp_costs[LAST2_FRAME] +=
827           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
828       ref_bicomp_costs[LAST3_FRAME] +=
829           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
830       ref_bicomp_costs[GOLDEN_FRAME] +=
831           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
832 
833       ref_bicomp_costs[LAST_FRAME] +=
834           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
835       ref_bicomp_costs[LAST2_FRAME] +=
836           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
837 
838       ref_bicomp_costs[LAST3_FRAME] +=
839           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
840       ref_bicomp_costs[GOLDEN_FRAME] +=
841           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
842 
843       // cost of second ref frame
844       ref_bicomp_costs[BWDREF_FRAME] +=
845           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
846       ref_bicomp_costs[ALTREF2_FRAME] +=
847           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
848       ref_bicomp_costs[ALTREF_FRAME] +=
849           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
850 
851       ref_bicomp_costs[BWDREF_FRAME] +=
852           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
853       ref_bicomp_costs[ALTREF2_FRAME] +=
854           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
855 
856       // cost: if one ref frame is forward ref, the other ref is backward ref
857       int ref0, ref1;
858       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
859         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
860           ref_costs_comp[ref0][ref1] =
861               ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
862         }
863       }
864 
865       // cost: if both ref frames are the same side.
866       const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
867       const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
868       const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
869       ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
870           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
871           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
872           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
873       ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
874           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
875           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
876           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
877           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
878       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
879           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
880           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
881           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
882           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
883       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
884           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
885           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
886     } else {
887       int ref0, ref1;
888       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
889         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
890           ref_costs_comp[ref0][ref1] = 512;
891       }
892       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
893       ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
894       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
895       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
896     }
897   }
898 }
899 
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)900 static inline void store_coding_context(
901 #if CONFIG_INTERNAL_STATS
902     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
903 #else
904     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
905 #endif  // CONFIG_INTERNAL_STATS
906     int skippable) {
907   MACROBLOCKD *const xd = &x->e_mbd;
908 
909   // Take a snapshot of the coding context so it can be
910   // restored if we decide to encode this way
911   ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
912   ctx->skippable = skippable;
913 #if CONFIG_INTERNAL_STATS
914   ctx->best_mode_index = mode_index;
915 #endif  // CONFIG_INTERNAL_STATS
916   ctx->mic = *xd->mi[0];
917   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
918                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
919 }
920 
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])921 static inline void setup_buffer_ref_mvs_inter(
922     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
923     BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
924   const AV1_COMMON *cm = &cpi->common;
925   const int num_planes = av1_num_planes(cm);
926   const YV12_BUFFER_CONFIG *scaled_ref_frame =
927       av1_get_scaled_ref_frame(cpi, ref_frame);
928   MACROBLOCKD *const xd = &x->e_mbd;
929   MB_MODE_INFO *const mbmi = xd->mi[0];
930   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
931   const struct scale_factors *const sf =
932       get_ref_scale_factors_const(cm, ref_frame);
933   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
934   assert(yv12 != NULL);
935 
936   if (scaled_ref_frame) {
937     // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
938     // support scaling.
939     av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
940                          num_planes);
941   } else {
942     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
943   }
944 
945   // Gets an initial list of candidate vectors from neighbours and orders them
946   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
947                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
948                    mbmi_ext->mode_context);
949   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
950   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
951   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
952   // Further refinement that is encode side only to test the top few candidates
953   // in full and choose the best as the center point for subsequent searches.
954   // The current implementation doesn't support scaling.
955   av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
956               ref_frame, block_size);
957 
958   // Go back to unscaled reference.
959   if (scaled_ref_frame) {
960     // We had temporarily setup pred block based on scaled reference above. Go
961     // back to unscaled reference now, for subsequent use.
962     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
963   }
964 }
965 
966 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
967 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968 
969 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)970 static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
971   const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
972                                      xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
973                                      xd->mb_to_top_edge - LEFT_TOP_MARGIN,
974                                      xd->mb_to_bottom_edge +
975                                          RIGHT_BOTTOM_MARGIN };
976   clamp_mv(mv, &mv_limits);
977 }
978 
979 /* If the current mode shares the same mv with other modes with higher cost,
980  * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)981 static int skip_repeated_mv(const AV1_COMMON *const cm,
982                             const MACROBLOCK *const x,
983                             PREDICTION_MODE this_mode,
984                             const MV_REFERENCE_FRAME ref_frames[2],
985                             InterModeSearchState *search_state) {
986   const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
987   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
988   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
989   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
990   PREDICTION_MODE compare_mode = MB_MODE_COUNT;
991   if (!is_comp_pred) {
992     if (this_mode == NEARMV) {
993       if (ref_mv_count == 0) {
994         // NEARMV has the same motion vector as NEARESTMV
995         compare_mode = NEARESTMV;
996       }
997       if (ref_mv_count == 1 &&
998           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
999         // NEARMV has the same motion vector as GLOBALMV
1000         compare_mode = GLOBALMV;
1001       }
1002     }
1003     if (this_mode == GLOBALMV) {
1004       if (ref_mv_count == 0 &&
1005           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1006         // GLOBALMV has the same motion vector as NEARESTMV
1007         compare_mode = NEARESTMV;
1008       }
1009       if (ref_mv_count == 1) {
1010         // GLOBALMV has the same motion vector as NEARMV
1011         compare_mode = NEARMV;
1012       }
1013     }
1014 
1015     if (compare_mode != MB_MODE_COUNT) {
1016       // Use modelled_rd to check whether compare mode was searched
1017       if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1018           INT64_MAX) {
1019         const int16_t mode_ctx =
1020             av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1021         const int compare_cost =
1022             cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1023         const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1024 
1025         // Only skip if the mode cost is larger than compare mode cost
1026         if (this_cost > compare_cost) {
1027           search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1028               search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1029           return 1;
1030         }
1031       }
1032     }
1033   }
1034   return 0;
1035 }
1036 
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1037 static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1038                                      const AV1_COMMON *cm,
1039                                      const MACROBLOCK *x) {
1040   const MACROBLOCKD *const xd = &x->e_mbd;
1041   *out_mv = in_mv;
1042   lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1043                      cm->features.cur_frame_force_integer_mv);
1044   clamp_mv2(&out_mv->as_mv, xd);
1045   return av1_is_fullmv_in_range(&x->mv_limits,
1046                                 get_fullmv_from_mv(&out_mv->as_mv));
1047 }
1048 
1049 // To use single newmv directly for compound modes, need to clamp the mv to the
1050 // valid mv range. Without this, encoder would generate out of range mv, and
1051 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1052 static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1053                                      int ref_idx) {
1054   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1055   SubpelMvLimits mv_limits;
1056 
1057   av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1058   clamp_mv(&mv->as_mv, &mv_limits);
1059 }
1060 
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1061 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1062                             const BLOCK_SIZE bsize, int_mv *cur_mv,
1063                             int *const rate_mv, HandleInterModeArgs *const args,
1064                             inter_mode_info *mode_info) {
1065   MACROBLOCKD *const xd = &x->e_mbd;
1066   MB_MODE_INFO *const mbmi = xd->mi[0];
1067   const int is_comp_pred = has_second_ref(mbmi);
1068   const PREDICTION_MODE this_mode = mbmi->mode;
1069   const int refs[2] = { mbmi->ref_frame[0],
1070                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1071   const int ref_mv_idx = mbmi->ref_mv_idx;
1072 
1073   if (is_comp_pred) {
1074     const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1075     const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1076     if (this_mode == NEW_NEWMV) {
1077       if (valid_mv0) {
1078         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1079         clamp_mv_in_range(x, &cur_mv[0], 0);
1080       }
1081       if (valid_mv1) {
1082         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1083         clamp_mv_in_range(x, &cur_mv[1], 1);
1084       }
1085       *rate_mv = 0;
1086       for (int i = 0; i < 2; ++i) {
1087         const int_mv ref_mv = av1_get_ref_mv(x, i);
1088         *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1089                                     x->mv_costs->nmv_joint_cost,
1090                                     x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1091       }
1092     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1093       if (valid_mv1) {
1094         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1095         clamp_mv_in_range(x, &cur_mv[1], 1);
1096       }
1097       const int_mv ref_mv = av1_get_ref_mv(x, 1);
1098       *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1099                                  x->mv_costs->nmv_joint_cost,
1100                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1101     } else {
1102       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1103       if (valid_mv0) {
1104         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1105         clamp_mv_in_range(x, &cur_mv[0], 0);
1106       }
1107       const int_mv ref_mv = av1_get_ref_mv(x, 0);
1108       *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1109                                  x->mv_costs->nmv_joint_cost,
1110                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1111     }
1112   } else {
1113     // Single ref case.
1114     const int ref_idx = 0;
1115     int search_range = INT_MAX;
1116 
1117     if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1118       const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1119       int min_mv_diff = INT_MAX;
1120       int best_match = -1;
1121       MV prev_ref_mv[2] = { { 0 } };
1122       for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1123         prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1124                                                      idx, &x->mbmi_ext)
1125                                .as_mv;
1126         const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1127                                        abs(ref_mv.col - prev_ref_mv[idx].col));
1128 
1129         if (min_mv_diff > ref_mv_diff) {
1130           min_mv_diff = ref_mv_diff;
1131           best_match = idx;
1132         }
1133       }
1134 
1135       if (min_mv_diff < (16 << 3)) {
1136         if (args->single_newmv_valid[best_match][refs[0]]) {
1137           search_range = min_mv_diff;
1138           search_range +=
1139               AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1140                          prev_ref_mv[best_match].row),
1141                      abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1142                          prev_ref_mv[best_match].col));
1143           // Get full pixel search range.
1144           search_range = (search_range + 4) >> 3;
1145         }
1146       }
1147     }
1148 
1149     int_mv best_mv;
1150     av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1151                              mode_info, &best_mv, args);
1152     if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1153 
1154     args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1155     args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1156     args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1157     cur_mv[0].as_int = best_mv.as_int;
1158 
1159     // Return after single_newmv is set.
1160     if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1161   }
1162 
1163   return 0;
1164 }
1165 
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1166 static inline void update_mode_start_end_index(
1167     const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1168     int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1169     int interintra_allowed, int eval_motion_mode) {
1170   *mode_index_start = (int)SIMPLE_TRANSLATION;
1171   *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1172   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1173     if (!eval_motion_mode) {
1174       *mode_index_end = (int)SIMPLE_TRANSLATION;
1175     } else {
1176       // Set the start index appropriately to process motion modes other than
1177       // simple translation
1178       *mode_index_start = 1;
1179     }
1180   }
1181   if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1182     *mode_index_end = SIMPLE_TRANSLATION;
1183 }
1184 
1185 /*!\brief AV1 motion mode search
1186  *
1187  * \ingroup inter_mode_search
1188  * Function to search over and determine the motion mode. It will update
1189  * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1190  * WARPED_CAUSAL and determine any necessary side information for the selected
1191  * motion mode. It will also perform the full transform search, unless the
1192  * input parameter do_tx_search indicates to do an estimation of the RD rather
1193  * than an RD corresponding to a full transform search. It will return the
1194  * RD for the final motion_mode.
1195  * Do the RD search for a given inter mode and compute all information relevant
1196  * to the input mode. It will compute the best MV,
1197  * compound parameters (if the mode is a compound mode) and interpolation filter
1198  * parameters.
1199  *
1200  * \param[in]     cpi               Top-level encoder structure.
1201  * \param[in]     tile_data         Pointer to struct holding adaptive
1202  *                                  data/contexts/models for the tile during
1203  *                                  encoding.
1204  * \param[in]     x                 Pointer to struct holding all the data for
1205  *                                  the current macroblock.
1206  * \param[in]     bsize             Current block size.
1207  * \param[in,out] rd_stats          Struct to keep track of the overall RD
1208  *                                  information.
1209  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1210  *                                  for only the Y plane.
1211  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1212  *                                  for only the UV planes.
1213  * \param[in]     args              HandleInterModeArgs struct holding
1214  *                                  miscellaneous arguments for inter mode
1215  *                                  search. See the documentation for this
1216  *                                  struct for a description of each member.
1217  * \param[in]     ref_best_rd       Best RD found so far for this block.
1218  *                                  It is used for early termination of this
1219  *                                  search if the RD exceeds this value.
1220  * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1221  *                                  best total RD for a skip mode so far, and
1222  *                                  skip_rd[1] is the best RD for a skip mode so
1223  *                                  far in luma. This is used as a speed feature
1224  *                                  to skip the transform search if the computed
1225  *                                  skip RD for the current mode is not better
1226  *                                  than the best skip_rd so far.
1227  * \param[in,out] rate_mv           The rate associated with the motion vectors.
1228  *                                  This will be modified if a motion search is
1229  *                                  done in the motion mode search.
1230  * \param[in,out] orig_dst          A prediction buffer to hold a computed
1231  *                                  prediction. This will eventually hold the
1232  *                                  final prediction, and the tmp_dst info will
1233  *                                  be copied here.
1234  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1235  *                                  do_tx_search (see below) is 0.
1236  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1237  *                                  a full transform search. This will compute
1238  *                                  an estimated RD for the modes without the
1239  *                                  transform search and later perform the full
1240  *                                  transform search on the best candidates.
1241  * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1242  *                                  information to perform a full transform
1243  *                                  search only on winning candidates searched
1244  *                                  with an estimate for transform coding RD.
1245  * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1246  *                                  motion modes other than SIMPLE_TRANSLATION.
1247  * \param[out]    yrd               Stores the rdcost corresponding to encoding
1248  *                                  the luma plane.
1249  * \return Returns INT64_MAX if the determined motion mode is invalid and the
1250  * current motion mode being tested should be skipped. It returns 0 if the
1251  * motion mode search is a success.
1252  */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1253 static int64_t motion_mode_rd(
1254     const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1255     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1256     RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1257     int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1258     int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1259     int eval_motion_mode, int64_t *yrd) {
1260   const AV1_COMMON *const cm = &cpi->common;
1261   const FeatureFlags *const features = &cm->features;
1262   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1263   const int num_planes = av1_num_planes(cm);
1264   MACROBLOCKD *xd = &x->e_mbd;
1265   MB_MODE_INFO *mbmi = xd->mi[0];
1266   const int is_comp_pred = has_second_ref(mbmi);
1267   const PREDICTION_MODE this_mode = mbmi->mode;
1268   const int rate2_nocoeff = rd_stats->rate;
1269   int best_xskip_txfm = 0;
1270   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1271   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1272   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1273   const int rate_mv0 = *rate_mv;
1274   const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1275                                  is_interintra_allowed(mbmi) &&
1276                                  mbmi->compound_idx;
1277   WARP_SAMPLE_INFO *const warp_sample_info =
1278       &x->warp_sample_info[mbmi->ref_frame[0]];
1279   int *pts0 = warp_sample_info->pts;
1280   int *pts_inref0 = warp_sample_info->pts_inref;
1281 
1282   assert(mbmi->ref_frame[1] != INTRA_FRAME);
1283   const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1284   av1_invalid_rd_stats(&best_rd_stats);
1285   mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1286   MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1287   *yrd = INT64_MAX;
1288   if (features->switchable_motion_mode) {
1289     // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1290     // is allowed.
1291     last_motion_mode_allowed = motion_mode_allowed(
1292         xd->global_motion, xd, mbmi, features->allow_warped_motion);
1293   }
1294 
1295   if (last_motion_mode_allowed == WARPED_CAUSAL) {
1296     // Collect projection samples used in least squares approximation of
1297     // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1298     if (warp_sample_info->num < 0) {
1299       warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1300     }
1301     mbmi->num_proj_ref = warp_sample_info->num;
1302   }
1303   const int total_samples = mbmi->num_proj_ref;
1304   if (total_samples == 0) {
1305     // Do not search WARPED_CAUSAL if there are no samples to use to determine
1306     // warped parameters.
1307     last_motion_mode_allowed = OBMC_CAUSAL;
1308   }
1309 
1310   const MB_MODE_INFO base_mbmi = *mbmi;
1311   MB_MODE_INFO best_mbmi;
1312   const int interp_filter = features->interp_filter;
1313   const int switchable_rate =
1314       av1_is_interp_needed(xd)
1315           ? av1_get_switchable_rate(x, xd, interp_filter,
1316                                     cm->seq_params->enable_dual_filter)
1317           : 0;
1318   int64_t best_rd = INT64_MAX;
1319   int best_rate_mv = rate_mv0;
1320   const int mi_row = xd->mi_row;
1321   const int mi_col = xd->mi_col;
1322   int mode_index_start, mode_index_end;
1323   const int txfm_rd_gate_level =
1324       get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1325                              cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1326                              TX_SEARCH_MOTION_MODE, eval_motion_mode);
1327 
1328   // Modify the start and end index according to speed features. For example,
1329   // if SIMPLE_TRANSLATION has already been searched according to
1330   // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1331   // to avoid searching it again.
1332   update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1333                               last_motion_mode_allowed, interintra_allowed,
1334                               eval_motion_mode);
1335   // Main function loop. This loops over all of the possible motion modes and
1336   // computes RD to determine the best one. This process includes computing
1337   // any necessary side information for the motion mode and performing the
1338   // transform search.
1339   for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1340        mode_index++) {
1341     if (args->skip_motion_mode && mode_index) continue;
1342     int tmp_rate2 = rate2_nocoeff;
1343     const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1344     int tmp_rate_mv = rate_mv0;
1345 
1346     *mbmi = base_mbmi;
1347     if (is_interintra_mode) {
1348       // Only use SIMPLE_TRANSLATION for interintra
1349       mbmi->motion_mode = SIMPLE_TRANSLATION;
1350     } else {
1351       mbmi->motion_mode = (MOTION_MODE)mode_index;
1352       assert(mbmi->ref_frame[1] != INTRA_FRAME);
1353     }
1354 
1355     // Do not search OBMC if the probability of selecting it is below a
1356     // predetermined threshold for this update_type and block size.
1357     const FRAME_UPDATE_TYPE update_type =
1358         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1359     int use_actual_frame_probs = 1;
1360     int prune_obmc;
1361 #if CONFIG_FPMT_TEST
1362     use_actual_frame_probs =
1363         (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1364     if (!use_actual_frame_probs) {
1365       prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1366                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1367     }
1368 #endif
1369     if (use_actual_frame_probs) {
1370       prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1371                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1372     }
1373     if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1374         mbmi->motion_mode == OBMC_CAUSAL)
1375       continue;
1376 
1377     if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1378       // SIMPLE_TRANSLATION mode: no need to recalculate.
1379       // The prediction is calculated before motion_mode_rd() is called in
1380       // handle_inter_mode()
1381     } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1382       const uint32_t cur_mv = mbmi->mv[0].as_int;
1383       // OBMC_CAUSAL not allowed for compound prediction
1384       assert(!is_comp_pred);
1385       if (have_newmv_in_inter_mode(this_mode)) {
1386         av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1387                                  &mbmi->mv[0], NULL);
1388         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1389       }
1390       if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1391         // Build the predictor according to the current motion vector if it has
1392         // not already been built
1393         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1394                                       0, av1_num_planes(cm) - 1);
1395       }
1396       // Build the inter predictor by blending the predictor corresponding to
1397       // this MV, and the neighboring blocks using the OBMC model
1398       av1_build_obmc_inter_prediction(
1399           cm, xd, args->above_pred_buf, args->above_pred_stride,
1400           args->left_pred_buf, args->left_pred_stride);
1401 #if !CONFIG_REALTIME_ONLY
1402     } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1403       int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1404       mbmi->motion_mode = WARPED_CAUSAL;
1405       mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1406       mbmi->interp_filters =
1407           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1408 
1409       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1410       memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1411       // Select the samples according to motion vector difference
1412       if (mbmi->num_proj_ref > 1) {
1413         mbmi->num_proj_ref = av1_selectSamples(
1414             &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1415       }
1416 
1417       // Compute the warped motion parameters with a least squares fit
1418       //  using the collected samples
1419       if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1420                                mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1421                                &mbmi->wm_params, mi_row, mi_col)) {
1422         assert(!is_comp_pred);
1423         if (have_newmv_in_inter_mode(this_mode)) {
1424           // Refine MV for NEWMV mode
1425           const int_mv mv0 = mbmi->mv[0];
1426           const WarpedMotionParams wm_params0 = mbmi->wm_params;
1427           const int num_proj_ref0 = mbmi->num_proj_ref;
1428 
1429           const int_mv ref_mv = av1_get_ref_mv(x, 0);
1430           SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1431           av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1432                                             &ref_mv.as_mv, NULL);
1433 
1434           // Refine MV in a small range.
1435           av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1436                                total_samples, cpi->sf.mv_sf.warp_search_method,
1437                                cpi->sf.mv_sf.warp_search_iters);
1438 
1439           if (mv0.as_int != mbmi->mv[0].as_int) {
1440             // Keep the refined MV and WM parameters.
1441             tmp_rate_mv = av1_mv_bit_cost(
1442                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1443                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1444             tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1445           } else {
1446             // Restore the old MV and WM parameters.
1447             mbmi->mv[0] = mv0;
1448             mbmi->wm_params = wm_params0;
1449             mbmi->num_proj_ref = num_proj_ref0;
1450           }
1451         }
1452 
1453         // Build the warped predictor
1454         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1455                                       av1_num_planes(cm) - 1);
1456       } else {
1457         continue;
1458       }
1459 #endif  // !CONFIG_REALTIME_ONLY
1460     } else if (is_interintra_mode) {
1461       const int ret =
1462           av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1463                                       &tmp_rate_mv, &tmp_rate2, orig_dst);
1464       if (ret < 0) continue;
1465     }
1466 
1467     // If we are searching newmv and the mv is the same as refmv, skip the
1468     // current mode
1469     if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1470 
1471     // Update rd_stats for the current motion mode
1472     txfm_info->skip_txfm = 0;
1473     rd_stats->dist = 0;
1474     rd_stats->sse = 0;
1475     rd_stats->skip_txfm = 1;
1476     rd_stats->rate = tmp_rate2;
1477     const ModeCosts *mode_costs = &x->mode_costs;
1478     if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1479     if (interintra_allowed) {
1480       rd_stats->rate +=
1481           mode_costs->interintra_cost[size_group_lookup[bsize]]
1482                                      [mbmi->ref_frame[1] == INTRA_FRAME];
1483     }
1484     if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1485         (mbmi->ref_frame[1] != INTRA_FRAME)) {
1486       if (last_motion_mode_allowed == WARPED_CAUSAL) {
1487         rd_stats->rate +=
1488             mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1489       } else {
1490         rd_stats->rate +=
1491             mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1492       }
1493     }
1494 
1495     int64_t this_yrd = INT64_MAX;
1496 
1497     if (!do_tx_search) {
1498       // Avoid doing a transform search here to speed up the overall mode
1499       // search. It will be done later in the mode search if the current
1500       // motion mode seems promising.
1501       int64_t curr_sse = -1;
1502       int64_t sse_y = -1;
1503       int est_residue_cost = 0;
1504       int64_t est_dist = 0;
1505       int64_t est_rd = 0;
1506       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1507         curr_sse = get_sse(cpi, x, &sse_y);
1508         const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1509                                                  &est_residue_cost, &est_dist);
1510         (void)has_est_rd;
1511         assert(has_est_rd);
1512       } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1513                  cpi->sf.rt_sf.use_nonrd_pick_mode) {
1514         model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1515             cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1516             NULL, &curr_sse, NULL, NULL, NULL);
1517         sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1518       }
1519       est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1520       if (est_rd * 0.80 > *best_est_rd) {
1521         mbmi->ref_frame[1] = ref_frame_1;
1522         continue;
1523       }
1524       const int mode_rate = rd_stats->rate;
1525       rd_stats->rate += est_residue_cost;
1526       rd_stats->dist = est_dist;
1527       rd_stats->rdcost = est_rd;
1528       if (rd_stats->rdcost < *best_est_rd) {
1529         *best_est_rd = rd_stats->rdcost;
1530         assert(sse_y >= 0);
1531         ref_skip_rd[1] = txfm_rd_gate_level
1532                              ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1533                              : INT64_MAX;
1534       }
1535       if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1536         if (!is_comp_pred) {
1537           assert(curr_sse >= 0);
1538           inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1539                                 rd_stats->rdcost, rd_stats, rd_stats_y,
1540                                 rd_stats_uv, mbmi);
1541         }
1542       } else {
1543         assert(curr_sse >= 0);
1544         inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1545                               rd_stats->rdcost, rd_stats, rd_stats_y,
1546                               rd_stats_uv, mbmi);
1547       }
1548       mbmi->skip_txfm = 0;
1549     } else {
1550       // Perform full transform search
1551       int64_t skip_rd = INT64_MAX;
1552       int64_t skip_rdy = INT64_MAX;
1553       if (txfm_rd_gate_level) {
1554         // Check if the mode is good enough based on skip RD
1555         int64_t sse_y = INT64_MAX;
1556         int64_t curr_sse = get_sse(cpi, x, &sse_y);
1557         skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1558         skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1559         int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1560                                         txfm_rd_gate_level, 0);
1561         if (!eval_txfm) continue;
1562       }
1563 
1564       // Do transform search
1565       const int mode_rate = rd_stats->rate;
1566       if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1567                            rd_stats->rate, ref_best_rd)) {
1568         if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1569           return INT64_MAX;
1570         }
1571         continue;
1572       }
1573       const int skip_ctx = av1_get_skip_txfm_context(xd);
1574       const int y_rate =
1575           rd_stats->skip_txfm
1576               ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1577               : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1578       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1579 
1580       const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1581       if (curr_rd < ref_best_rd) {
1582         ref_best_rd = curr_rd;
1583         ref_skip_rd[0] = skip_rd;
1584         ref_skip_rd[1] = skip_rdy;
1585       }
1586       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1587         inter_mode_data_push(
1588             tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1589             rd_stats_y->rate + rd_stats_uv->rate +
1590                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1591       }
1592     }
1593 
1594     if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1595       if (is_nontrans_global_motion(xd, xd->mi[0])) {
1596         mbmi->interp_filters =
1597             av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1598       }
1599     }
1600 
1601     const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1602     if (mode_index == 0) {
1603       args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1604     }
1605     if (mode_index == 0 || tmp_rd < best_rd) {
1606       // Update best_rd data if this is the best motion mode so far
1607       best_mbmi = *mbmi;
1608       best_rd = tmp_rd;
1609       best_rd_stats = *rd_stats;
1610       best_rd_stats_y = *rd_stats_y;
1611       best_rate_mv = tmp_rate_mv;
1612       *yrd = this_yrd;
1613       if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1614       memcpy(best_blk_skip, txfm_info->blk_skip,
1615              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1616       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1617       best_xskip_txfm = mbmi->skip_txfm;
1618     }
1619   }
1620   // Update RD and mbmi stats for selected motion mode
1621   mbmi->ref_frame[1] = ref_frame_1;
1622   *rate_mv = best_rate_mv;
1623   if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1624     av1_invalid_rd_stats(rd_stats);
1625     restore_dst_buf(xd, *orig_dst, num_planes);
1626     return INT64_MAX;
1627   }
1628   *mbmi = best_mbmi;
1629   *rd_stats = best_rd_stats;
1630   *rd_stats_y = best_rd_stats_y;
1631   if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1632   memcpy(txfm_info->blk_skip, best_blk_skip,
1633          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1634   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1635   txfm_info->skip_txfm = best_xskip_txfm;
1636 
1637   restore_dst_buf(xd, *orig_dst, num_planes);
1638   return 0;
1639 }
1640 
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t best_rd)1641 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1642                             MACROBLOCK *const x, BLOCK_SIZE bsize,
1643                             const BUFFER_SET *const orig_dst, int64_t best_rd) {
1644   assert(bsize < BLOCK_SIZES_ALL);
1645   const AV1_COMMON *cm = &cpi->common;
1646   const int num_planes = av1_num_planes(cm);
1647   MACROBLOCKD *const xd = &x->e_mbd;
1648   const int mi_row = xd->mi_row;
1649   const int mi_col = xd->mi_col;
1650   int64_t total_sse = 0;
1651   int64_t this_rd = INT64_MAX;
1652   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1653   rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1654 
1655   for (int plane = 0; plane < num_planes; ++plane) {
1656     // Call av1_enc_build_inter_predictor() for one plane at a time.
1657     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1658                                   plane, plane);
1659     const struct macroblockd_plane *const pd = &xd->plane[plane];
1660     const BLOCK_SIZE plane_bsize =
1661         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1662 
1663     av1_subtract_plane(x, plane_bsize, plane);
1664 
1665     int64_t sse =
1666         av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1667     if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1668     sse <<= 4;
1669     total_sse += sse;
1670     // When current rd cost is more than the best rd, skip evaluation of
1671     // remaining planes.
1672     this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1673     if (this_rd > best_rd) break;
1674   }
1675 
1676   rd_stats->dist = rd_stats->sse = total_sse;
1677   rd_stats->rdcost = this_rd;
1678 
1679   restore_dst_buf(xd, *orig_dst, num_planes);
1680   return 0;
1681 }
1682 
1683 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1684 // mode
1685 // Note(rachelbarker): This speed feature currently does not interact correctly
1686 // with global motion. The issue is that, when global motion is used, GLOBALMV
1687 // produces a different prediction to NEARESTMV/NEARMV even if the motion
1688 // vectors are the same. Thus GLOBALMV should not be pruned in this case.
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1689 static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1690                                       int ref_idx,
1691                                       const MV_REFERENCE_FRAME *ref_frame,
1692                                       PREDICTION_MODE single_mode) {
1693   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1694   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1695   assert(single_mode != NEWMV);
1696   if (single_mode == NEARESTMV) {
1697     return 0;
1698   } else if (single_mode == NEARMV) {
1699     // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1700     // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1701     if (ref_mv_count < 2) return 1;
1702   } else if (single_mode == GLOBALMV) {
1703     // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1704     if (ref_mv_count == 0) return 1;
1705     // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1706     else if (ref_mv_count == 1)
1707       return 0;
1708 
1709     int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1710     // Check GLOBALMV is matching with any mv in ref_mv_stack
1711     for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1712       int_mv this_mv;
1713 
1714       if (ref_idx == 0)
1715         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1716       else
1717         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1718 
1719       if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1720         return 1;
1721     }
1722   }
1723   return 0;
1724 }
1725 
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1726 static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1727                               int ref_idx, int ref_mv_idx,
1728                               int skip_repeated_ref_mv,
1729                               const MV_REFERENCE_FRAME *ref_frame,
1730                               const MB_MODE_INFO_EXT *mbmi_ext) {
1731   const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1732   assert(is_inter_singleref_mode(single_mode));
1733   if (single_mode == NEWMV) {
1734     this_mv->as_int = INVALID_MV;
1735   } else if (single_mode == GLOBALMV) {
1736     if (skip_repeated_ref_mv &&
1737         check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1738       return 0;
1739     *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1740   } else {
1741     assert(single_mode == NEARMV || single_mode == NEARESTMV);
1742     const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1743     const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1744     if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1745       assert(ref_mv_offset >= 0);
1746       if (ref_idx == 0) {
1747         *this_mv =
1748             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1749       } else {
1750         *this_mv =
1751             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1752       }
1753     } else {
1754       if (skip_repeated_ref_mv &&
1755           check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1756         return 0;
1757       *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1758     }
1759   }
1760   return 1;
1761 }
1762 
1763 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1764 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type,PREDICTION_MODE best_mode)1765 static inline int skip_nearest_near_mv_using_refmv_weight(
1766     const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1767     const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
1768   if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1769   // Do not skip the mode if the current block has not yet obtained a valid
1770   // inter mode.
1771   if (!is_inter_mode(best_mode)) return 0;
1772 
1773   const MACROBLOCKD *xd = &x->e_mbd;
1774   // Do not skip the mode if both the top and left neighboring blocks are not
1775   // available.
1776   if (!xd->left_available || !xd->up_available) return 0;
1777   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1778   const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1779   const int ref_mv_count =
1780       AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1781 
1782   if (ref_mv_count == 0) return 0;
1783   // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1784   if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1785 
1786   // Count number of ref mvs populated from nearest candidates
1787   int nearest_refmv_count = 0;
1788   for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1789     if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1790   }
1791 
1792   // nearest_refmv_count indicates the closeness of block motion characteristics
1793   // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1794   // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1795   // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1796   // mode since these modes work well for blocks that shares similar motion
1797   // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1798   // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1799   // mode is pruned if none of the ref mvs are populated from nearest candidate.
1800   const int prune_thresh = 1 + (ref_mv_count >= 2);
1801   if (nearest_refmv_count < prune_thresh) return 1;
1802   return 0;
1803 }
1804 
1805 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1806 static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1807                                const AV1_COMMON *cm, const MACROBLOCK *x,
1808                                int skip_repeated_ref_mv) {
1809   const MACROBLOCKD *xd = &x->e_mbd;
1810   const MB_MODE_INFO *mbmi = xd->mi[0];
1811   const int is_comp_pred = has_second_ref(mbmi);
1812 
1813   int ret = 1;
1814   for (int i = 0; i < is_comp_pred + 1; ++i) {
1815     int_mv this_mv;
1816     this_mv.as_int = INVALID_MV;
1817     ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1818                       skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1819     if (!ret) return 0;
1820     const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1821     if (single_mode == NEWMV) {
1822       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1823       cur_mv[i] =
1824           (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1825                          .this_mv
1826                    : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1827                          .comp_mv;
1828     } else {
1829       ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1830     }
1831   }
1832   return ret;
1833 }
1834 
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1835 static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
1836                                const MB_MODE_INFO_EXT *mbmi_ext,
1837                                const int (*const drl_mode_cost0)[2],
1838                                int8_t ref_frame_type) {
1839   int cost = 0;
1840   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1841     for (int idx = 0; idx < 2; ++idx) {
1842       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1843         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1844         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1845         if (mbmi->ref_mv_idx == idx) return cost;
1846       }
1847     }
1848     return cost;
1849   }
1850 
1851   if (have_nearmv_in_inter_mode(mbmi->mode)) {
1852     for (int idx = 1; idx < 3; ++idx) {
1853       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1854         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1855         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1856         if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1857       }
1858     }
1859     return cost;
1860   }
1861   return cost;
1862 }
1863 
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1864 static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
1865                                         const MB_MODE_INFO *const mbmi,
1866                                         PREDICTION_MODE this_mode) {
1867   for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1868     const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1869     const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1870     if (single_mode == NEWMV &&
1871         args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1872       return 0;
1873     }
1874   }
1875   return 1;
1876 }
1877 
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1878 static int get_drl_refmv_count(const MACROBLOCK *const x,
1879                                const MV_REFERENCE_FRAME *ref_frame,
1880                                PREDICTION_MODE mode) {
1881   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1882   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1883   const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1884   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1885   const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1886   const int has_drl =
1887       (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1888   const int ref_set =
1889       has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1890 
1891   return ref_set;
1892 }
1893 
1894 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1895 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1896                                          const int qindex,
1897                                          const int ref_mv_idx) {
1898   if (reduce_inter_modes >= 3) return 1;
1899   // Q-index logic based pruning is enabled only for
1900   // reduce_inter_modes = 2.
1901   assert(reduce_inter_modes == 2);
1902   // When reduce_inter_modes=2, pruning happens as below based on q index.
1903   // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1904   // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1905   // For q index range between 171 and 255: no pruning.
1906   const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1907   return (ref_mv_idx >= min_prune_ref_mv_idx);
1908 }
1909 
1910 // Whether this reference motion vector can be skipped, based on initial
1911 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1912 static bool ref_mv_idx_early_breakout(
1913     const SPEED_FEATURES *const sf,
1914     const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1915     const HandleInterModeArgs *const args, int64_t ref_best_rd,
1916     int ref_mv_idx) {
1917   MACROBLOCKD *xd = &x->e_mbd;
1918   MB_MODE_INFO *mbmi = xd->mi[0];
1919   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1920   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1921   const int is_comp_pred = has_second_ref(mbmi);
1922   if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1923     if (mbmi->ref_frame[0] == LAST2_FRAME ||
1924         mbmi->ref_frame[0] == LAST3_FRAME ||
1925         mbmi->ref_frame[1] == LAST2_FRAME ||
1926         mbmi->ref_frame[1] == LAST3_FRAME) {
1927       const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1928       if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1929           REF_CAT_LEVEL) {
1930         return true;
1931       }
1932     }
1933     // TODO(any): Experiment with reduce_inter_modes for compound prediction
1934     if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1935         have_newmv_in_inter_mode(mbmi->mode)) {
1936       if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1937           mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1938         const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1939         const int do_prune = prune_ref_mv_idx_using_qindex(
1940             sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1941         if (do_prune &&
1942             (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1943              REF_CAT_LEVEL)) {
1944           return true;
1945         }
1946       }
1947     }
1948   }
1949 
1950   mbmi->ref_mv_idx = ref_mv_idx;
1951   if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1952     return true;
1953   }
1954   size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1955   const int drl_cost = get_drl_cost(
1956       mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1957   est_rd_rate += drl_cost;
1958   if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1959       mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1960     return true;
1961   }
1962   return false;
1963 }
1964 
1965 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1966 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1967                                           RD_STATS *rd_stats,
1968                                           HandleInterModeArgs *args,
1969                                           int ref_mv_idx, int64_t ref_best_rd,
1970                                           BLOCK_SIZE bsize) {
1971   MACROBLOCKD *xd = &x->e_mbd;
1972   MB_MODE_INFO *mbmi = xd->mi[0];
1973   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1974   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1975   const AV1_COMMON *cm = &cpi->common;
1976   const int is_comp_pred = has_second_ref(mbmi);
1977   const ModeCosts *mode_costs = &x->mode_costs;
1978 
1979   struct macroblockd_plane *p = xd->plane;
1980   const BUFFER_SET orig_dst = {
1981     { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1982     { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1983   };
1984   av1_init_rd_stats(rd_stats);
1985 
1986   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1987   mbmi->comp_group_idx = 0;
1988   mbmi->compound_idx = 1;
1989   if (mbmi->ref_frame[1] == INTRA_FRAME) {
1990     mbmi->ref_frame[1] = NONE_FRAME;
1991   }
1992   int16_t mode_ctx =
1993       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1994 
1995   mbmi->num_proj_ref = 0;
1996   mbmi->motion_mode = SIMPLE_TRANSLATION;
1997   mbmi->ref_mv_idx = ref_mv_idx;
1998 
1999   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
2000   const int drl_cost =
2001       get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2002   rd_stats->rate += drl_cost;
2003 
2004   int_mv cur_mv[2];
2005   if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2006     return INT64_MAX;
2007   }
2008   assert(have_nearmv_in_inter_mode(mbmi->mode));
2009   for (int i = 0; i < is_comp_pred + 1; ++i) {
2010     mbmi->mv[i].as_int = cur_mv[i].as_int;
2011   }
2012   const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2013   rd_stats->rate += ref_mv_cost;
2014 
2015   if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2016     return INT64_MAX;
2017   }
2018 
2019   mbmi->motion_mode = SIMPLE_TRANSLATION;
2020   mbmi->num_proj_ref = 0;
2021   if (is_comp_pred) {
2022     // Only compound_average
2023     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2024     mbmi->comp_group_idx = 0;
2025     mbmi->compound_idx = 1;
2026   }
2027   set_default_interp_filters(mbmi, cm->features.interp_filter);
2028 
2029   const int mi_row = xd->mi_row;
2030   const int mi_col = xd->mi_col;
2031   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2032                                 AOM_PLANE_Y, AOM_PLANE_Y);
2033   int est_rate;
2034   int64_t est_dist;
2035   model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2036                                   NULL, NULL, NULL, NULL, NULL);
2037   return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2038 }
2039 
2040 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2041 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2042 // it is included.
mask_set_bit(int * mask,int index)2043 static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2044 
mask_check_bit(int mask,int index)2045 static inline bool mask_check_bit(int mask, int index) {
2046   return (mask >> index) & 0x1;
2047 }
2048 
2049 // Before performing the full MV search in handle_inter_mode, do a simple
2050 // translation search and see if we can eliminate any motion vectors.
2051 // Returns an integer where, if the i-th bit is set, it means that the i-th
2052 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2053 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2054                                 RD_STATS *rd_stats,
2055                                 HandleInterModeArgs *const args,
2056                                 int64_t ref_best_rd, BLOCK_SIZE bsize,
2057                                 const int ref_set) {
2058   // If the number of ref mv count is equal to 1, do not prune the same. It
2059   // is better to evaluate the same than to prune it.
2060   if (ref_set == 1) return 1;
2061   AV1_COMMON *const cm = &cpi->common;
2062   const MACROBLOCKD *const xd = &x->e_mbd;
2063   const MB_MODE_INFO *const mbmi = xd->mi[0];
2064   const PREDICTION_MODE this_mode = mbmi->mode;
2065 
2066   // Only search indices if they have some chance of being good.
2067   int good_indices = 0;
2068   for (int i = 0; i < ref_set; ++i) {
2069     if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2070                                   ref_best_rd, i)) {
2071       continue;
2072     }
2073     mask_set_bit(&good_indices, i);
2074   }
2075 
2076   // Only prune in NEARMV mode, if the speed feature is set, and the block size
2077   // is large enough. If these conditions are not met, return all good indices
2078   // found so far.
2079   if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2080     return good_indices;
2081   if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2082   if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2083   // Do not prune when there is internal resizing. TODO(elliottk) fix this
2084   // so b/2384 can be resolved.
2085   if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2086       (mbmi->ref_frame[1] > 0 &&
2087        av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2088     return good_indices;
2089   }
2090 
2091   // Calculate the RD cost for the motion vectors using simple translation.
2092   int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2093   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2094     // If this index is bad, ignore it.
2095     if (!mask_check_bit(good_indices, ref_mv_idx)) {
2096       continue;
2097     }
2098     idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2099         cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2100   }
2101   // Find the index with the best RD cost.
2102   int best_idx = 0;
2103   for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2104     if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2105       best_idx = i;
2106     }
2107   }
2108   // Only include indices that are good and within a % of the best.
2109   const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2110   // If the simple translation cost is not within this multiple of the
2111   // best RD, skip it. Note that the cutoff is derived experimentally.
2112   const double ref_dth = 5;
2113   int result = 0;
2114   for (int i = 0; i < ref_set; ++i) {
2115     if (mask_check_bit(good_indices, i) &&
2116         (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2117         (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2118       mask_set_bit(&result, i);
2119     }
2120   }
2121   return result;
2122 }
2123 
2124 /*!\brief Motion mode information for inter mode search speedup.
2125  *
2126  * Used in a speed feature to search motion modes other than
2127  * SIMPLE_TRANSLATION only on winning candidates.
2128  */
2129 typedef struct motion_mode_candidate {
2130   /*!
2131    * Mode info for the motion mode candidate.
2132    */
2133   MB_MODE_INFO mbmi;
2134   /*!
2135    * Rate describing the cost of the motion vectors for this candidate.
2136    */
2137   int rate_mv;
2138   /*!
2139    * Rate before motion mode search and transform coding is applied.
2140    */
2141   int rate2_nocoeff;
2142   /*!
2143    * An integer value 0 or 1 which indicates whether or not to skip the motion
2144    * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2145    * candidate.
2146    */
2147   int skip_motion_mode;
2148   /*!
2149    * Total RD cost for this candidate.
2150    */
2151   int64_t rd_cost;
2152 } motion_mode_candidate;
2153 
2154 /*!\cond */
2155 typedef struct motion_mode_best_st_candidate {
2156   motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2157   int num_motion_mode_cand;
2158 } motion_mode_best_st_candidate;
2159 
2160 // Checks if the current reference frame matches with neighbouring block's
2161 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2162 static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2163                                                MB_MODE_INFO *nb_mbmi) {
2164   MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2165                                           nb_mbmi->ref_frame[1] };
2166   MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2167                                            cur_mbmi->ref_frame[1] };
2168   const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2169   int match_found = 0;
2170 
2171   for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2172     if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2173         (cur_ref_frames[i] == nb_ref_frames[1]))
2174       match_found = 1;
2175   }
2176   return match_found;
2177 }
2178 
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2179 static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2180                                               MACROBLOCKD *xd) {
2181   if (!xd->up_available) return 1;
2182   const int mi_col = xd->mi_col;
2183   MB_MODE_INFO **cur_mbmi = xd->mi;
2184   // prev_row_mi points into the mi array, starting at the beginning of the
2185   // previous row.
2186   MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2187   const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2188   uint8_t mi_step;
2189   for (int above_mi_col = mi_col; above_mi_col < end_col;
2190        above_mi_col += mi_step) {
2191     MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2192     mi_step = mi_size_wide[above_mi[0]->bsize];
2193     int match_found = 0;
2194     if (is_inter_block(*above_mi))
2195       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2196     if (match_found) return 1;
2197   }
2198   return 0;
2199 }
2200 
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2201 static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2202                                              MACROBLOCKD *xd) {
2203   if (!xd->left_available) return 1;
2204   const int mi_row = xd->mi_row;
2205   MB_MODE_INFO **cur_mbmi = xd->mi;
2206   // prev_col_mi points into the mi array, starting at the top of the
2207   // previous column
2208   MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2209   const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2210   uint8_t mi_step;
2211   for (int left_mi_row = mi_row; left_mi_row < end_row;
2212        left_mi_row += mi_step) {
2213     MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2214     mi_step = mi_size_high[left_mi[0]->bsize];
2215     int match_found = 0;
2216     if (is_inter_block(*left_mi))
2217       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2218     if (match_found) return 1;
2219   }
2220   return 0;
2221 }
2222 /*!\endcond */
2223 
2224 /*! \brief Struct used to hold TPL data to
2225  * narrow down parts of the inter mode search.
2226  */
2227 typedef struct {
2228   /*!
2229    * The best inter cost out of all of the reference frames.
2230    */
2231   int64_t best_inter_cost;
2232   /*!
2233    * The inter cost for each reference frame.
2234    */
2235   int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2236 } PruneInfoFromTpl;
2237 
2238 #if !CONFIG_REALTIME_ONLY
2239 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2240 static inline void get_block_level_tpl_stats(
2241     AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2242     PruneInfoFromTpl *inter_cost_info_from_tpl) {
2243   AV1_COMMON *const cm = &cpi->common;
2244 
2245   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2246                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
2247   const int tpl_idx = cpi->gf_frame_index;
2248   TplParams *const tpl_data = &cpi->ppi->tpl_data;
2249   if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2250   const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2251   const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2252   const int mi_wide = mi_size_wide[bsize];
2253   const int mi_high = mi_size_high[bsize];
2254   const int tpl_stride = tpl_frame->stride;
2255   const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2256   const int mi_col_sr =
2257       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2258   const int mi_col_end_sr =
2259       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2260   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2261 
2262   const int row_step = step;
2263   const int col_step_sr =
2264       coded_to_superres_mi(step, cm->superres_scale_denominator);
2265   for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2266        row += row_step) {
2267     for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2268          col += col_step_sr) {
2269       const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2270           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2271 
2272       // Sums up the inter cost of corresponding ref frames
2273       for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2274         inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2275             this_stats->pred_error[ref_idx];
2276       }
2277     }
2278   }
2279 
2280   // Computes the best inter cost (minimum inter_cost)
2281   int64_t best_inter_cost = INT64_MAX;
2282   for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2283     const int64_t cur_inter_cost =
2284         inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2285     // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2286     // calculating the minimum inter_cost
2287     if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2288         valid_refs[ref_idx])
2289       best_inter_cost = cur_inter_cost;
2290   }
2291   inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2292 }
2293 #endif
2294 
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2295 static inline int prune_modes_based_on_tpl_stats(
2296     PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2297     const PREDICTION_MODE this_mode, int prune_mode_level) {
2298   const int have_newmv = have_newmv_in_inter_mode(this_mode);
2299   if ((prune_mode_level < 2) && have_newmv) return 0;
2300 
2301   const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2302   if (best_inter_cost == INT64_MAX) return 0;
2303 
2304   const int prune_level = prune_mode_level - 1;
2305   int64_t cur_inter_cost;
2306 
2307   const int is_globalmv =
2308       (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2309   const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2310 
2311   // Thresholds used for pruning:
2312   // Lower value indicates aggressive pruning and higher value indicates
2313   // conservative pruning which is set based on ref_mv_idx and speed feature.
2314   // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2315   // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2316   static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2317     { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2318   };
2319 
2320   const int is_comp_pred = (refs[1] > INTRA_FRAME);
2321   if (!is_comp_pred) {
2322     cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2323   } else {
2324     const int64_t inter_cost_ref0 =
2325         inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2326     const int64_t inter_cost_ref1 =
2327         inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2328     // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2329     // more aggressive pruning
2330     cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2331   }
2332 
2333   // Prune the mode if cur_inter_cost is greater than threshold times
2334   // best_inter_cost
2335   if (cur_inter_cost >
2336       ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2337         best_inter_cost) >>
2338        2))
2339     return 1;
2340   return 0;
2341 }
2342 
2343 /*!\brief High level function to select parameters for compound mode.
2344  *
2345  * \ingroup inter_mode_search
2346  * The main search functionality is done in the call to av1_compound_type_rd().
2347  *
2348  * \param[in]     cpi               Top-level encoder structure.
2349  * \param[in]     x                 Pointer to struct holding all the data for
2350  *                                  the current macroblock.
2351  * \param[in]     args              HandleInterModeArgs struct holding
2352  *                                  miscellaneous arguments for inter mode
2353  *                                  search. See the documentation for this
2354  *                                  struct for a description of each member.
2355  * \param[in]     ref_best_rd       Best RD found so far for this block.
2356  *                                  It is used for early termination of this
2357  *                                  search if the RD exceeds this value.
2358  * \param[in,out] cur_mv            Current motion vector.
2359  * \param[in]     bsize             Current block size.
2360  * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2361                                     compound mode.
2362  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2363  *                                  allocated buffers for the compound
2364  *                                  predictors and masks in the compound type
2365  *                                  search.
2366  * \param[in,out] orig_dst          A prediction buffer to hold a computed
2367  *                                  prediction. This will eventually hold the
2368  *                                  final prediction, and the tmp_dst info will
2369  *                                  be copied here.
2370  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2371  *                                  computed prediction.
2372  * \param[in,out] rate_mv           The rate associated with the motion vectors.
2373  *                                  This will be modified if a motion search is
2374  *                                  done in the motion mode search.
2375  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2376  *                                  information.
2377  * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2378  *                                  best total RD for a skip mode so far, and
2379  *                                  skip_rd[1] is the best RD for a skip mode so
2380  *                                  far in luma. This is used as a speed feature
2381  *                                  to skip the transform search if the computed
2382  *                                  skip RD for the current mode is not better
2383  *                                  than the best skip_rd so far.
2384  * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2385  *                                  predictor. If this is 0, the inter predictor
2386  *                                  has already been built and thus we can avoid
2387  *                                  repeating computation.
2388  * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2389  * a viable candidate.
2390  */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2391 static int process_compound_inter_mode(
2392     AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2393     int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2394     int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2395     const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2396     RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2397   MACROBLOCKD *xd = &x->e_mbd;
2398   MB_MODE_INFO *mbmi = xd->mi[0];
2399   const AV1_COMMON *cm = &cpi->common;
2400   const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2401                                    cm->seq_params->enable_masked_compound;
2402   int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2403                          (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2404 
2405   const int num_planes = av1_num_planes(cm);
2406   const int mi_row = xd->mi_row;
2407   const int mi_col = xd->mi_col;
2408   int is_luma_interp_done = 0;
2409   set_default_interp_filters(mbmi, cm->features.interp_filter);
2410 
2411   int64_t best_rd_compound;
2412   int64_t rd_thresh;
2413   const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2414   const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2415   rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2416                                          comp_type_rd_scale);
2417   // Select compound type and any parameters related to that type
2418   // (for example, the mask parameters if it is a masked mode) and compute
2419   // the RD
2420   *compmode_interinter_cost = av1_compound_type_rd(
2421       cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2422       orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2423       ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2424   if (ref_best_rd < INT64_MAX &&
2425       (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2426           ref_best_rd) {
2427     restore_dst_buf(xd, *orig_dst, num_planes);
2428     return 1;
2429   }
2430 
2431   // Build only uv predictor for COMPOUND_AVERAGE.
2432   // Note there is no need to call av1_enc_build_inter_predictor
2433   // for luma if COMPOUND_AVERAGE is selected because it is the first
2434   // candidate in av1_compound_type_rd, which means it used the dst_buf
2435   // rather than the tmp_buf.
2436   if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2437     if (num_planes > 1) {
2438       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2439                                     AOM_PLANE_U, num_planes - 1);
2440     }
2441     *skip_build_pred = 1;
2442   }
2443   return 0;
2444 }
2445 
2446 // Speed feature to prune out MVs that are similar to previous MVs if they
2447 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2448 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2449                                    int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2450                                    MB_MODE_INFO *mbmi, int pruning_factor) {
2451   int i;
2452   const int is_comp_pred = has_second_ref(mbmi);
2453   const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2454 
2455   // Skip the evaluation if an MV match is found.
2456   if (ref_mv_idx > 0) {
2457     for (int idx = 0; idx < ref_mv_idx; ++idx) {
2458       if (save_mv[idx][0].as_int == INVALID_MV) continue;
2459 
2460       int mv_diff = 0;
2461       for (i = 0; i < 1 + is_comp_pred; ++i) {
2462         mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2463                    abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2464       }
2465 
2466       // If this mode is not the best one, and current MV is similar to
2467       // previous stored MV, terminate this ref_mv_idx evaluation.
2468       if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2469     }
2470   }
2471 
2472   if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2473     for (i = 0; i < is_comp_pred + 1; ++i)
2474       save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2475   }
2476 
2477   return 0;
2478 }
2479 
2480 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2481  *
2482  * \ingroup inter_mode_search
2483  *
2484  * Compares the sse of zero mv and the best sse found in single new_mv. If the
2485  * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2486  * Else returns 0.
2487  *
2488  * Note that the sse of here comes from single_motion_search. So it is
2489  * interpolated with the filter in motion search, not the actual interpolation
2490  * filter used in encoding.
2491  *
2492  * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2493  * \param[in]     x                 Pointer to struct holding all the data for
2494  *                                  the current macroblock.
2495  * \param[in]     bsize             The current block_size.
2496  * \param[in]     args              The args to handle_inter_mode, used to track
2497  *                                  the best SSE.
2498  * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2499  *                                       prune_zero_mv_with_sse value
2500  * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2501  */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2502 static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2503                                          const MACROBLOCK *x, BLOCK_SIZE bsize,
2504                                          const HandleInterModeArgs *args,
2505                                          int prune_zero_mv_with_sse) {
2506   const MACROBLOCKD *xd = &x->e_mbd;
2507   const MB_MODE_INFO *mbmi = xd->mi[0];
2508 
2509   const int is_comp_pred = has_second_ref(mbmi);
2510   const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2511 
2512   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2513     if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2514       // Pruning logic only works for IDENTITY type models
2515       // Note: In theory we could apply similar logic for TRANSLATION
2516       // type models, but we do not code these due to a spec bug
2517       // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2518       assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2519       return 0;
2520     }
2521 
2522     // Don't prune if we have invalid data
2523     assert(mbmi->mv[idx].as_int == 0);
2524     if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2525       return 0;
2526     }
2527   }
2528 
2529   // Sum up the sse of ZEROMV and best NEWMV
2530   unsigned int this_sse_sum = 0;
2531   unsigned int best_sse_sum = 0;
2532   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2533     const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2534     const struct macroblockd_plane *pd = xd->plane;
2535     const struct buf_2d *src_buf = &p->src;
2536     const struct buf_2d *ref_buf = &pd->pre[idx];
2537     const uint8_t *src = src_buf->buf;
2538     const uint8_t *ref = ref_buf->buf;
2539     const int src_stride = src_buf->stride;
2540     const int ref_stride = ref_buf->stride;
2541 
2542     unsigned int this_sse;
2543     fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2544     this_sse_sum += this_sse;
2545 
2546     const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2547     best_sse_sum += best_sse;
2548   }
2549 
2550   const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2551   if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2552     return 1;
2553   }
2554 
2555   return 0;
2556 }
2557 
2558 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2559  *
2560  * \ingroup inter_mode_search
2561  *
2562  * Does a simple interpolation filter search during winner mode evaluation. This
2563  * is currently only used by realtime mode as \ref
2564  * av1_interpolation_filter_search is not called during realtime encoding.
2565  *
2566  * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2567  * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2568  * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2569  *  *
2570  * \param[in]     cpi               Pointer to the compressor. Used for feature
2571  *                                  flags.
2572  * \param[in,out] x                 Pointer to macroblock. This is primarily
2573  *                                  used to access the buffers.
2574  * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2575  * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2576  * \param[in]     bsize             The current block_size.
2577  * \return Returns true if a predictor is built in xd->dst, false otherwise.
2578  */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2579 static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2580                                       int mi_row, int mi_col,
2581                                       BLOCK_SIZE bsize) {
2582   static const InterpFilters filters_ref_set[3] = {
2583     { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2584     { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2585     { MULTITAP_SHARP, MULTITAP_SHARP }
2586   };
2587 
2588   const AV1_COMMON *const cm = &cpi->common;
2589   MACROBLOCKD *const xd = &x->e_mbd;
2590   MB_MODE_INFO *const mi = xd->mi[0];
2591   int64_t best_cost = INT64_MAX;
2592   int best_filter_index = -1;
2593   // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2594   const int num_planes = av1_num_planes(cm);
2595   const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2596   assert(is_inter_mode(mi->mode));
2597   assert(mi->motion_mode == SIMPLE_TRANSLATION);
2598   assert(!is_inter_compound_mode(mi->mode));
2599 
2600   if (!av1_is_interp_needed(xd)) {
2601     return false;
2602   }
2603 
2604   struct macroblockd_plane *pd = xd->plane;
2605   const BUFFER_SET orig_dst = {
2606     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2607     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2608   };
2609   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2610   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2611                                  tmp_buf + 2 * MAX_SB_SQUARE },
2612                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2613   const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2614 
2615   for (int i = 0; i < 3; ++i) {
2616     if (is_240p_or_lesser) {
2617       if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2618         continue;
2619       }
2620     } else {
2621       if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2622         continue;
2623       }
2624     }
2625     int64_t cost;
2626     RD_STATS tmp_rd = { 0 };
2627 
2628     mi->interp_filters.as_filters = filters_ref_set[i];
2629     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2630 
2631     model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2632                        ? MODELRD_LEGACY
2633                        : MODELRD_TYPE_INTERP_FILTER](
2634         cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2635         &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2636 
2637     tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2638                                            cm->seq_params->enable_dual_filter);
2639     cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2640     if (cost < best_cost) {
2641       best_filter_index = i;
2642       best_cost = cost;
2643       swap_dst_buf(xd, dst_bufs, num_planes);
2644     }
2645   }
2646   assert(best_filter_index >= 0);
2647 
2648   mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2649 
2650   const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2651 
2652   if (is_best_pred_in_orig) {
2653     swap_dst_buf(xd, dst_bufs, num_planes);
2654   } else {
2655     // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2656     // is_best_pred_in_orig is false, that means the current buffer is the
2657     // original one.
2658     assert(&orig_dst == dst_bufs[0]);
2659     assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2660     const int width = block_size_wide[bsize];
2661     const int height = block_size_high[bsize];
2662 #if CONFIG_AV1_HIGHBITDEPTH
2663     const bool is_hbd = is_cur_buf_hbd(xd);
2664     if (is_hbd) {
2665       aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2666                                tmp_dst.stride[AOM_PLANE_Y],
2667                                CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2668                                orig_dst.stride[AOM_PLANE_Y], width, height);
2669     } else {
2670       aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2671                         orig_dst.plane[AOM_PLANE_Y],
2672                         orig_dst.stride[AOM_PLANE_Y], width, height);
2673     }
2674 #else
2675     aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2676                       orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2677                       width, height);
2678 #endif
2679   }
2680 
2681   // Build the YUV predictor.
2682   if (num_planes > 1) {
2683     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2684                                   AOM_PLANE_U, AOM_PLANE_V);
2685   }
2686 
2687   return true;
2688 }
2689 
2690 /*!\brief AV1 inter mode RD computation
2691  *
2692  * \ingroup inter_mode_search
2693  * Do the RD search for a given inter mode and compute all information relevant
2694  * to the input mode. It will compute the best MV,
2695  * compound parameters (if the mode is a compound mode) and interpolation filter
2696  * parameters.
2697  *
2698  * \param[in]     cpi               Top-level encoder structure.
2699  * \param[in]     tile_data         Pointer to struct holding adaptive
2700  *                                  data/contexts/models for the tile during
2701  *                                  encoding.
2702  * \param[in]     x                 Pointer to structure holding all the data
2703  *                                  for the current macroblock.
2704  * \param[in]     bsize             Current block size.
2705  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2706  *                                  information.
2707  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2708  *                                  for only the Y plane.
2709  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2710  *                                  for only the UV planes.
2711  * \param[in]     args              HandleInterModeArgs struct holding
2712  *                                  miscellaneous arguments for inter mode
2713  *                                  search. See the documentation for this
2714  *                                  struct for a description of each member.
2715  * \param[in]     ref_best_rd       Best RD found so far for this block.
2716  *                                  It is used for early termination of this
2717  *                                  search if the RD exceeds this value.
2718  * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2719  *                                  built in this search.
2720  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2721  *                                  allocated buffers for the compound
2722  *                                  predictors and masks in the compound type
2723  *                                  search.
2724  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2725  *                                  do_tx_search (see below) is 0.
2726  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2727  *                                  a full transform search. This will compute
2728  *                                  an estimated RD for the modes without the
2729  *                                  transform search and later perform the full
2730  *                                  transform search on the best candidates.
2731  * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2732  *                                  information to perform a full transform
2733  *                                  search only on winning candidates searched
2734  *                                  with an estimate for transform coding RD.
2735  * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2736  *                                  motion mode information used in a speed
2737  *                                  feature to search motion modes other than
2738  *                                  SIMPLE_TRANSLATION only on winning
2739  *                                  candidates.
2740  * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2741  *                                  best total RD for a skip mode so far, and
2742  *                                  skip_rd[1] is the best RD for a skip mode so
2743  *                                  far in luma. This is used as a speed feature
2744  *                                  to skip the transform search if the computed
2745  *                                  skip RD for the current mode is not better
2746  *                                  than the best skip_rd so far.
2747  * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2748  *                                         narrow down the search based on data
2749  *                                         collected in the TPL model.
2750  * \param[out]    yrd               Stores the rdcost corresponding to encoding
2751  *                                  the luma plane.
2752  *
2753  * \return The RD cost for the mode being searched.
2754  */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2755 static int64_t handle_inter_mode(
2756     AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2757     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2758     RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2759     uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2760     int64_t *best_est_rd, const int do_tx_search,
2761     InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2762     int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2763     int64_t *yrd) {
2764   const AV1_COMMON *cm = &cpi->common;
2765   const int num_planes = av1_num_planes(cm);
2766   MACROBLOCKD *xd = &x->e_mbd;
2767   MB_MODE_INFO *mbmi = xd->mi[0];
2768   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2769   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2770   const int is_comp_pred = has_second_ref(mbmi);
2771   const PREDICTION_MODE this_mode = mbmi->mode;
2772 
2773 #if CONFIG_REALTIME_ONLY
2774   const int prune_modes_based_on_tpl = 0;
2775 #else   // CONFIG_REALTIME_ONLY
2776   const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2777   const int prune_modes_based_on_tpl =
2778       cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2779       av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2780 #endif  // CONFIG_REALTIME_ONLY
2781   int i;
2782   // Reference frames for this mode
2783   const int refs[2] = { mbmi->ref_frame[0],
2784                         (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2785   int rate_mv = 0;
2786   int64_t rd = INT64_MAX;
2787   // Do first prediction into the destination buffer. Do the next
2788   // prediction into a temporary buffer. Then keep track of which one
2789   // of these currently holds the best predictor, and use the other
2790   // one for future predictions. In the end, copy from tmp_buf to
2791   // dst if necessary.
2792   struct macroblockd_plane *pd = xd->plane;
2793   const BUFFER_SET orig_dst = {
2794     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2795     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2796   };
2797   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2798                                  tmp_buf + 2 * MAX_SB_SQUARE },
2799                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2800 
2801   int64_t ret_val = INT64_MAX;
2802   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2803   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2804   int64_t best_rd = INT64_MAX;
2805   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2806   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2807   int64_t best_yrd = INT64_MAX;
2808   MB_MODE_INFO best_mbmi = *mbmi;
2809   int best_xskip_txfm = 0;
2810   int64_t newmv_ret_val = INT64_MAX;
2811   inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2812 
2813   // Do not prune the mode based on inter cost from tpl if the current ref frame
2814   // is the winner ref in neighbouring blocks.
2815   int ref_match_found_in_above_nb = 0;
2816   int ref_match_found_in_left_nb = 0;
2817   if (prune_modes_based_on_tpl) {
2818     ref_match_found_in_above_nb =
2819         find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2820     ref_match_found_in_left_nb =
2821         find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2822   }
2823 
2824   // First, perform a simple translation search for each of the indices. If
2825   // an index performs well, it will be fully searched in the main loop
2826   // of this function.
2827   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2828   // Save MV results from first 2 ref_mv_idx.
2829   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2830   int best_ref_mv_idx = -1;
2831   const int idx_mask =
2832       ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2833   const int16_t mode_ctx =
2834       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2835   const ModeCosts *mode_costs = &x->mode_costs;
2836   const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2837   const int base_rate =
2838       args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2839 
2840   for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2841     save_mv[i][0].as_int = INVALID_MV;
2842     save_mv[i][1].as_int = INVALID_MV;
2843   }
2844   args->start_mv_cnt = 0;
2845 
2846   // Main loop of this function. This will  iterate over all of the ref mvs
2847   // in the dynamic reference list and do the following:
2848   //    1.) Get the current MV. Create newmv MV if necessary
2849   //    2.) Search compound type and parameters if applicable
2850   //    3.) Do interpolation filter search
2851   //    4.) Build the inter predictor
2852   //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2853   //        WARPED_CAUSAL)
2854   //    6.) Update stats if best so far
2855   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2856     mbmi->ref_mv_idx = ref_mv_idx;
2857 
2858     mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2859     mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2860     const int drl_cost = get_drl_cost(
2861         mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2862     mode_info[ref_mv_idx].drl_cost = drl_cost;
2863     mode_info[ref_mv_idx].skip = 0;
2864 
2865     if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2866       // MV did not perform well in simple translation search. Skip it.
2867       continue;
2868     }
2869     if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2870         !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2871       // Skip mode if TPL model indicates it will not be beneficial.
2872       if (prune_modes_based_on_tpl_stats(
2873               inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2874               cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2875         continue;
2876     }
2877     av1_init_rd_stats(rd_stats);
2878 
2879     // Initialize compound mode data
2880     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2881     mbmi->comp_group_idx = 0;
2882     mbmi->compound_idx = 1;
2883     if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2884 
2885     mbmi->num_proj_ref = 0;
2886     mbmi->motion_mode = SIMPLE_TRANSLATION;
2887 
2888     // Compute cost for signalling this DRL index
2889     rd_stats->rate = base_rate;
2890     rd_stats->rate += drl_cost;
2891 
2892     int rs = 0;
2893     int compmode_interinter_cost = 0;
2894 
2895     int_mv cur_mv[2];
2896 
2897     // TODO(Cherma): Extend this speed feature to support compound mode
2898     int skip_repeated_ref_mv =
2899         is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2900     // Generate the current mv according to the prediction mode
2901     if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2902       continue;
2903     }
2904 
2905     // The above call to build_cur_mv does not handle NEWMV modes. Build
2906     // the mv here if we have NEWMV for any predictors.
2907     if (have_newmv_in_inter_mode(this_mode)) {
2908 #if CONFIG_COLLECT_COMPONENT_TIMING
2909       start_timing(cpi, handle_newmv_time);
2910 #endif
2911       newmv_ret_val =
2912           handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2913 #if CONFIG_COLLECT_COMPONENT_TIMING
2914       end_timing(cpi, handle_newmv_time);
2915 #endif
2916 
2917       if (newmv_ret_val != 0) continue;
2918 
2919       if (is_inter_singleref_mode(this_mode) &&
2920           cur_mv[0].as_int != INVALID_MV) {
2921         const MV_REFERENCE_FRAME ref = refs[0];
2922         const unsigned int this_sse = x->pred_sse[ref];
2923         if (this_sse < args->best_single_sse_in_refs[ref]) {
2924           args->best_single_sse_in_refs[ref] = this_sse;
2925         }
2926 
2927         if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2928           const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2929           const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2930           const double scale_factor[3][11] = {
2931             { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2932             { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2933             { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2934           };
2935           assert(pix_idx >= 0);
2936           assert(th_idx <= 2);
2937           if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2938             continue;
2939         }
2940       }
2941 
2942       rd_stats->rate += rate_mv;
2943     }
2944     // Copy the motion vector for this mode into mbmi struct
2945     for (i = 0; i < is_comp_pred + 1; ++i) {
2946       mbmi->mv[i].as_int = cur_mv[i].as_int;
2947     }
2948 
2949     if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2950         mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2951       continue;
2952     }
2953 
2954     // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2955     // is enabled, and the current MV is similar to a previous one.
2956     if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2957         prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2958                                 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2959       continue;
2960 
2961     if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2962         (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2963       if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2964                                  cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2965         continue;
2966       }
2967     }
2968 
2969     int skip_build_pred = 0;
2970     const int mi_row = xd->mi_row;
2971     const int mi_col = xd->mi_col;
2972 
2973     // Handle a compound predictor, continue if it is determined this
2974     // cannot be the best compound mode
2975     if (is_comp_pred) {
2976 #if CONFIG_COLLECT_COMPONENT_TIMING
2977       start_timing(cpi, compound_type_rd_time);
2978 #endif
2979       const int not_best_mode = process_compound_inter_mode(
2980           cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2981           rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2982           &skip_build_pred);
2983 #if CONFIG_COLLECT_COMPONENT_TIMING
2984       end_timing(cpi, compound_type_rd_time);
2985 #endif
2986       if (not_best_mode) continue;
2987     }
2988 
2989     if (!args->skip_ifs) {
2990 #if CONFIG_COLLECT_COMPONENT_TIMING
2991       start_timing(cpi, interpolation_filter_search_time);
2992 #endif
2993       // Determine the interpolation filter for this mode
2994       ret_val = av1_interpolation_filter_search(
2995           x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2996           &skip_build_pred, args, ref_best_rd);
2997 #if CONFIG_COLLECT_COMPONENT_TIMING
2998       end_timing(cpi, interpolation_filter_search_time);
2999 #endif
3000       if (args->modelled_rd != NULL && !is_comp_pred) {
3001         args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3002       }
3003       if (ret_val != 0) {
3004         restore_dst_buf(xd, orig_dst, num_planes);
3005         continue;
3006       } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3007                  ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3008         restore_dst_buf(xd, orig_dst, num_planes);
3009         continue;
3010       }
3011 
3012       // Compute modelled RD if enabled
3013       if (args->modelled_rd != NULL) {
3014         if (is_comp_pred) {
3015           const int mode0 = compound_ref0_mode(this_mode);
3016           const int mode1 = compound_ref1_mode(this_mode);
3017           const int64_t mrd =
3018               AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3019                      args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3020           if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3021             restore_dst_buf(xd, orig_dst, num_planes);
3022             continue;
3023           }
3024         }
3025       }
3026     }
3027 
3028     rd_stats->rate += compmode_interinter_cost;
3029     if (skip_build_pred != 1) {
3030       // Build this inter predictor if it has not been previously built
3031       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3032                                     av1_num_planes(cm) - 1);
3033     }
3034 
3035 #if CONFIG_COLLECT_COMPONENT_TIMING
3036     start_timing(cpi, motion_mode_rd_time);
3037 #endif
3038     int rate2_nocoeff = rd_stats->rate;
3039     // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3040     // OBMC_CAUSAL or WARPED_CAUSAL
3041     int64_t this_yrd;
3042     ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3043                              rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3044                              &orig_dst, best_est_rd, do_tx_search,
3045                              inter_modes_info, 0, &this_yrd);
3046 #if CONFIG_COLLECT_COMPONENT_TIMING
3047     end_timing(cpi, motion_mode_rd_time);
3048 #endif
3049     assert(
3050         IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3051 
3052     if (ret_val != INT64_MAX) {
3053       int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3054       const THR_MODES mode_enum = get_prediction_mode_idx(
3055           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3056       // Collect mode stats for multiwinner mode processing
3057       store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3058                               rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3059                               cpi->sf.winner_mode_sf.multi_winner_mode_type,
3060                               do_tx_search);
3061       if (tmp_rd < best_rd) {
3062         best_yrd = this_yrd;
3063         // Update the best rd stats if we found the best mode so far
3064         best_rd_stats = *rd_stats;
3065         best_rd_stats_y = *rd_stats_y;
3066         best_rd_stats_uv = *rd_stats_uv;
3067         best_rd = tmp_rd;
3068         best_mbmi = *mbmi;
3069         best_xskip_txfm = txfm_info->skip_txfm;
3070         memcpy(best_blk_skip, txfm_info->blk_skip,
3071                sizeof(best_blk_skip[0]) * xd->height * xd->width);
3072         av1_copy_array(best_tx_type_map, xd->tx_type_map,
3073                        xd->height * xd->width);
3074         motion_mode_cand->rate_mv = rate_mv;
3075         motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3076       }
3077 
3078       if (tmp_rd < ref_best_rd) {
3079         ref_best_rd = tmp_rd;
3080         best_ref_mv_idx = ref_mv_idx;
3081       }
3082     }
3083     restore_dst_buf(xd, orig_dst, num_planes);
3084   }
3085 
3086   if (best_rd == INT64_MAX) return INT64_MAX;
3087 
3088   // re-instate status of the best choice
3089   *rd_stats = best_rd_stats;
3090   *rd_stats_y = best_rd_stats_y;
3091   *rd_stats_uv = best_rd_stats_uv;
3092   *yrd = best_yrd;
3093   *mbmi = best_mbmi;
3094   txfm_info->skip_txfm = best_xskip_txfm;
3095   assert(IMPLIES(mbmi->comp_group_idx == 1,
3096                  mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3097   memcpy(txfm_info->blk_skip, best_blk_skip,
3098          sizeof(best_blk_skip[0]) * xd->height * xd->width);
3099   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3100 
3101   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3102 
3103   return rd_stats->rdcost;
3104 }
3105 
3106 /*!\brief Search for the best intrabc predictor
3107  *
3108  * \ingroup intra_mode_search
3109  * \callergraph
3110  * This function performs a motion search to find the best intrabc predictor.
3111  *
3112  * \returns Returns the best overall rdcost (including the non-intrabc modes
3113  * search before this function).
3114  */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3115 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3116                                        PICK_MODE_CONTEXT *ctx,
3117                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
3118                                        int64_t best_rd) {
3119   const AV1_COMMON *const cm = &cpi->common;
3120   if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3121       !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3122     return INT64_MAX;
3123   const int num_planes = av1_num_planes(cm);
3124 
3125   MACROBLOCKD *const xd = &x->e_mbd;
3126   const TileInfo *tile = &xd->tile;
3127   MB_MODE_INFO *mbmi = xd->mi[0];
3128   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3129 
3130   const int mi_row = xd->mi_row;
3131   const int mi_col = xd->mi_col;
3132   const int w = block_size_wide[bsize];
3133   const int h = block_size_high[bsize];
3134   const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3135   const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3136 
3137   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3138   const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3139   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3140                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3141                    mbmi_ext->mode_context);
3142   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3143   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3144   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3145   int_mv nearestmv, nearmv;
3146   av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3147                                    0);
3148 
3149   if (nearestmv.as_int == INVALID_MV) {
3150     nearestmv.as_int = 0;
3151   }
3152   if (nearmv.as_int == INVALID_MV) {
3153     nearmv.as_int = 0;
3154   }
3155 
3156   int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3157   if (dv_ref.as_int == 0) {
3158     av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3159   }
3160   // Ref DV should not have sub-pel.
3161   assert((dv_ref.as_mv.col & 7) == 0);
3162   assert((dv_ref.as_mv.row & 7) == 0);
3163   mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3164 
3165   struct buf_2d yv12_mb[MAX_MB_PLANE];
3166   av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3167   for (int i = 0; i < num_planes; ++i) {
3168     xd->plane[i].pre[0] = yv12_mb[i];
3169   }
3170 
3171   enum IntrabcMotionDirection {
3172     IBC_MOTION_ABOVE,
3173     IBC_MOTION_LEFT,
3174     IBC_MOTION_DIRECTIONS
3175   };
3176 
3177   MB_MODE_INFO best_mbmi = *mbmi;
3178   RD_STATS best_rdstats = *rd_stats;
3179   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3180   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3181   av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3182 
3183   FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3184   const SEARCH_METHODS search_method =
3185       av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3186   const search_site_config *lookahead_search_sites =
3187       cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3188   const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3189   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3190                                      &dv_ref.as_mv, start_mv,
3191                                      lookahead_search_sites, search_method,
3192                                      /*fine_search_interval=*/0);
3193   const IntraBCMVCosts *const dv_costs = x->dv_costs;
3194   av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3195 
3196   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3197        dir < IBC_MOTION_DIRECTIONS; ++dir) {
3198     switch (dir) {
3199       case IBC_MOTION_ABOVE:
3200         fullms_params.mv_limits.col_min =
3201             (tile->mi_col_start - mi_col) * MI_SIZE;
3202         fullms_params.mv_limits.col_max =
3203             (tile->mi_col_end - mi_col) * MI_SIZE - w;
3204         fullms_params.mv_limits.row_min =
3205             (tile->mi_row_start - mi_row) * MI_SIZE;
3206         fullms_params.mv_limits.row_max =
3207             (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3208         break;
3209       case IBC_MOTION_LEFT:
3210         fullms_params.mv_limits.col_min =
3211             (tile->mi_col_start - mi_col) * MI_SIZE;
3212         fullms_params.mv_limits.col_max =
3213             (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3214         // TODO([email protected]): Minimize the overlap between above and
3215         // left areas.
3216         fullms_params.mv_limits.row_min =
3217             (tile->mi_row_start - mi_row) * MI_SIZE;
3218         int bottom_coded_mi_edge =
3219             AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3220         fullms_params.mv_limits.row_max =
3221             (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3222         break;
3223       default: assert(0);
3224     }
3225     assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3226     assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3227     assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3228     assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3229 
3230     av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3231 
3232     if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3233         fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3234       continue;
3235     }
3236 
3237     const int step_param = cpi->mv_search_params.mv_step_param;
3238     IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3239     int_mv best_mv, best_hash_mv;
3240     FULLPEL_MV_STATS best_mv_stats;
3241 
3242     int bestsme =
3243         av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3244                               &best_mv.as_fullmv, &best_mv_stats, NULL);
3245     const int hashsme = av1_intrabc_hash_search(
3246         cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3247     if (hashsme < bestsme) {
3248       best_mv = best_hash_mv;
3249       bestsme = hashsme;
3250     }
3251 
3252     if (bestsme == INT_MAX) continue;
3253     const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3254     if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3255                                 get_fullmv_from_mv(&dv)))
3256       continue;
3257     if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3258                          cm->seq_params->mib_size_log2))
3259       continue;
3260 
3261     // DV should not have sub-pel.
3262     assert((dv.col & 7) == 0);
3263     assert((dv.row & 7) == 0);
3264     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3265     mbmi->filter_intra_mode_info.use_filter_intra = 0;
3266     mbmi->use_intrabc = 1;
3267     mbmi->mode = DC_PRED;
3268     mbmi->uv_mode = UV_DC_PRED;
3269     mbmi->motion_mode = SIMPLE_TRANSLATION;
3270     mbmi->mv[0].as_mv = dv;
3271     mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3272     mbmi->skip_txfm = 0;
3273     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3274                                   av1_num_planes(cm) - 1);
3275 
3276     // TODO([email protected]): The full motion field defining discount
3277     // in MV_COST_WEIGHT is too large. Explore other values.
3278     const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3279                                         dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3280     const int rate_mode = x->mode_costs.intrabc_cost[1];
3281     RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3282     if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3283                          &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3284       continue;
3285     rd_stats_yuv.rdcost =
3286         RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3287     if (rd_stats_yuv.rdcost < best_rd) {
3288       best_rd = rd_stats_yuv.rdcost;
3289       best_mbmi = *mbmi;
3290       best_rdstats = rd_stats_yuv;
3291       memcpy(best_blk_skip, txfm_info->blk_skip,
3292              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3293       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3294     }
3295   }
3296   *mbmi = best_mbmi;
3297   *rd_stats = best_rdstats;
3298   memcpy(txfm_info->blk_skip, best_blk_skip,
3299          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3300   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3301 #if CONFIG_RD_DEBUG
3302   mbmi->rd_stats = *rd_stats;
3303 #endif
3304   return best_rd;
3305 }
3306 
3307 // TODO([email protected]): We are using struct $struct_name instead of their
3308 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3309 // the typedef will prevent doxygen from finding this function and generating
3310 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3311 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3312 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3313                                struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3314                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3315   const AV1_COMMON *const cm = &cpi->common;
3316   MACROBLOCKD *const xd = &x->e_mbd;
3317   MB_MODE_INFO *const mbmi = xd->mi[0];
3318   const int num_planes = av1_num_planes(cm);
3319   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3320   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3321   uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3322   int64_t dist_y = 0, dist_uv = 0;
3323 
3324   ctx->rd_stats.skip_txfm = 0;
3325   mbmi->ref_frame[0] = INTRA_FRAME;
3326   mbmi->ref_frame[1] = NONE_FRAME;
3327   mbmi->use_intrabc = 0;
3328   mbmi->mv[0].as_int = 0;
3329   mbmi->skip_mode = 0;
3330 
3331   const int64_t intra_yrd =
3332       av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3333                                  &y_skip_txfm, bsize, best_rd, ctx);
3334 
3335   // Initialize default mode evaluation params
3336   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3337 
3338   if (intra_yrd < best_rd) {
3339     // Search intra modes for uv planes if needed
3340     if (num_planes > 1) {
3341       // Set up the tx variables for reproducing the y predictions in case we
3342       // need it for chroma-from-luma.
3343       if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3344         memcpy(txfm_info->blk_skip, ctx->blk_skip,
3345                sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3346         av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3347       }
3348       const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3349       av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3350                                   &dist_uv, &uv_skip_txfm, bsize,
3351                                   max_uv_tx_size);
3352     }
3353 
3354     // Intra block is always coded as non-skip
3355     rd_cost->rate =
3356         rate_y + rate_uv +
3357         x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3358     rd_cost->dist = dist_y + dist_uv;
3359     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3360     rd_cost->skip_txfm = 0;
3361   } else {
3362     rd_cost->rate = INT_MAX;
3363   }
3364 
3365   if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3366     best_rd = rd_cost->rdcost;
3367   if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3368     ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3369     memcpy(ctx->blk_skip, txfm_info->blk_skip,
3370            sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3371     assert(rd_cost->rate != INT_MAX);
3372   }
3373   if (rd_cost->rate == INT_MAX) return;
3374 
3375   ctx->mic = *xd->mi[0];
3376   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3377                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
3378   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3379 }
3380 
3381 static inline void calc_target_weighted_pred(
3382     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3383     const uint8_t *above, int above_stride, const uint8_t *left,
3384     int left_stride);
3385 
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3386 static inline void rd_pick_skip_mode(
3387     RD_STATS *rd_cost, InterModeSearchState *search_state,
3388     const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3389     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3390   const AV1_COMMON *const cm = &cpi->common;
3391   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3392   const int num_planes = av1_num_planes(cm);
3393   MACROBLOCKD *const xd = &x->e_mbd;
3394   MB_MODE_INFO *const mbmi = xd->mi[0];
3395 
3396   x->compound_idx = 1;  // COMPOUND_AVERAGE
3397   RD_STATS skip_mode_rd_stats;
3398   av1_invalid_rd_stats(&skip_mode_rd_stats);
3399 
3400   if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3401       skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3402     return;
3403   }
3404 
3405   const MV_REFERENCE_FRAME ref_frame =
3406       LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3407   const MV_REFERENCE_FRAME second_ref_frame =
3408       LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3409   const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3410   const THR_MODES mode_index =
3411       get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3412 
3413   if (mode_index == THR_INVALID) {
3414     return;
3415   }
3416 
3417   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3418        cpi->sf.inter_sf.disable_onesided_comp) &&
3419       cpi->all_one_sided_refs) {
3420     return;
3421   }
3422 
3423   mbmi->mode = this_mode;
3424   mbmi->uv_mode = UV_DC_PRED;
3425   mbmi->ref_frame[0] = ref_frame;
3426   mbmi->ref_frame[1] = second_ref_frame;
3427   const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3428   if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3429     MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3430     if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3431         mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3432       return;
3433     }
3434     av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3435                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3436                      mbmi_ext->mode_context);
3437     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3438     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3439     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3440   }
3441 
3442   assert(this_mode == NEAREST_NEARESTMV);
3443   if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3444     return;
3445   }
3446 
3447   mbmi->filter_intra_mode_info.use_filter_intra = 0;
3448   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3449   mbmi->comp_group_idx = 0;
3450   mbmi->compound_idx = x->compound_idx;
3451   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3452   mbmi->motion_mode = SIMPLE_TRANSLATION;
3453   mbmi->ref_mv_idx = 0;
3454   mbmi->skip_mode = mbmi->skip_txfm = 1;
3455   mbmi->palette_mode_info.palette_size[0] = 0;
3456   mbmi->palette_mode_info.palette_size[1] = 0;
3457 
3458   set_default_interp_filters(mbmi, cm->features.interp_filter);
3459 
3460   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3461   for (int i = 0; i < num_planes; i++) {
3462     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3463     xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3464   }
3465 
3466   BUFFER_SET orig_dst;
3467   for (int i = 0; i < num_planes; i++) {
3468     orig_dst.plane[i] = xd->plane[i].dst.buf;
3469     orig_dst.stride[i] = xd->plane[i].dst.stride;
3470   }
3471 
3472   // Compare the use of skip_mode with the best intra/inter mode obtained.
3473   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3474   int64_t best_intra_inter_mode_cost = INT64_MAX;
3475   if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3476     const ModeCosts *mode_costs = &x->mode_costs;
3477     best_intra_inter_mode_cost = RDCOST(
3478         x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3479         rd_cost->dist);
3480     // Account for non-skip mode rate in total rd stats
3481     rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3482     av1_rd_cost_update(x->rdmult, rd_cost);
3483   }
3484 
3485   // Obtain the rdcost for skip_mode.
3486   skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3487                best_intra_inter_mode_cost);
3488 
3489   if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3490       (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3491     assert(mode_index != THR_INVALID);
3492     search_state->best_mbmode.skip_mode = 1;
3493     search_state->best_mbmode = *mbmi;
3494     memset(search_state->best_mbmode.inter_tx_size,
3495            search_state->best_mbmode.tx_size,
3496            sizeof(search_state->best_mbmode.inter_tx_size));
3497     set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3498                   search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3499                   xd);
3500     search_state->best_mode_index = mode_index;
3501 
3502     // Update rd_cost
3503     rd_cost->rate = skip_mode_rd_stats.rate;
3504     rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3505     rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3506 
3507     search_state->best_rd = rd_cost->rdcost;
3508     search_state->best_skip2 = 1;
3509     search_state->best_mode_skippable = 1;
3510 
3511     x->txfm_search_info.skip_txfm = 1;
3512   }
3513 }
3514 
3515 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3516 static inline MB_MODE_INFO *get_winner_mode_stats(
3517     MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3518     int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3519     RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3520     THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3521     int mode_idx) {
3522   MB_MODE_INFO *winner_mbmi;
3523   if (multi_winner_mode_type) {
3524     assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3525     WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3526     winner_mbmi = &winner_mode_stat->mbmi;
3527 
3528     *winner_rd_cost = &winner_mode_stat->rd_cost;
3529     *winner_rate_y = winner_mode_stat->rate_y;
3530     *winner_rate_uv = winner_mode_stat->rate_uv;
3531     *winner_mode_index = winner_mode_stat->mode_index;
3532   } else {
3533     winner_mbmi = best_mbmode;
3534     *winner_rd_cost = best_rd_cost;
3535     *winner_rate_y = best_rate_y;
3536     *winner_rate_uv = best_rate_uv;
3537     *winner_mode_index = *best_mode_index;
3538   }
3539   return winner_mbmi;
3540 }
3541 
3542 // speed feature: fast intra/inter transform type search
3543 // Used for speed >= 2
3544 // When this speed feature is on, in rd mode search, only DCT is used.
3545 // After the mode is determined, this function is called, to select
3546 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3547 static inline void refine_winner_mode_tx(
3548     const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3549     PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3550     MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3551     int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3552   const AV1_COMMON *const cm = &cpi->common;
3553   MACROBLOCKD *const xd = &x->e_mbd;
3554   MB_MODE_INFO *const mbmi = xd->mi[0];
3555   TxfmSearchParams *txfm_params = &x->txfm_search_params;
3556   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3557   int64_t best_rd;
3558   const int num_planes = av1_num_planes(cm);
3559 
3560   if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3561                                          rd_cost->skip_txfm))
3562     return;
3563 
3564   // Set params for winner mode evaluation
3565   set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3566 
3567   // No best mode identified so far
3568   if (*best_mode_index == THR_INVALID) return;
3569 
3570   best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3571   for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3572     RD_STATS *winner_rd_stats = NULL;
3573     int winner_rate_y = 0, winner_rate_uv = 0;
3574     THR_MODES winner_mode_index = 0;
3575 
3576     // TODO(any): Combine best mode and multi-winner mode processing paths
3577     // Get winner mode stats for current mode index
3578     MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3579         x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3580         &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3581         cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3582 
3583     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3584         winner_mode_index != THR_INVALID &&
3585         is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3586                                           rd_cost->skip_txfm)) {
3587       RD_STATS rd_stats = *winner_rd_stats;
3588       int skip_blk = 0;
3589       RD_STATS rd_stats_y, rd_stats_uv;
3590       const int skip_ctx = av1_get_skip_txfm_context(xd);
3591 
3592       *mbmi = *winner_mbmi;
3593 
3594       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3595 
3596       // Select prediction reference frames.
3597       for (int i = 0; i < num_planes; i++) {
3598         xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3599         if (has_second_ref(mbmi))
3600           xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3601       }
3602 
3603       if (is_inter_mode(mbmi->mode)) {
3604         const int mi_row = xd->mi_row;
3605         const int mi_col = xd->mi_col;
3606         bool is_predictor_built = false;
3607         const PREDICTION_MODE prediction_mode = mbmi->mode;
3608         // Do interpolation filter search for realtime mode if applicable.
3609         if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3610             cpi->oxcf.mode == REALTIME &&
3611             cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3612             is_inter_mode(prediction_mode) &&
3613             mbmi->motion_mode == SIMPLE_TRANSLATION &&
3614             !is_inter_compound_mode(prediction_mode)) {
3615           is_predictor_built =
3616               fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3617         }
3618         if (!is_predictor_built) {
3619           av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3620                                         av1_num_planes(cm) - 1);
3621         }
3622         if (mbmi->motion_mode == OBMC_CAUSAL)
3623           av1_build_obmc_inter_predictors_sb(cm, xd);
3624 
3625         av1_subtract_plane(x, bsize, 0);
3626         if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3627             !xd->lossless[mbmi->segment_id]) {
3628           av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3629                                               INT64_MAX);
3630           assert(rd_stats_y.rate != INT_MAX);
3631         } else {
3632           av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3633                                             INT64_MAX);
3634           memset(mbmi->inter_tx_size, mbmi->tx_size,
3635                  sizeof(mbmi->inter_tx_size));
3636           for (int i = 0; i < xd->height * xd->width; ++i)
3637             set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3638         }
3639       } else {
3640         av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3641                                           INT64_MAX);
3642       }
3643 
3644       if (num_planes > 1) {
3645         av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3646       } else {
3647         av1_init_rd_stats(&rd_stats_uv);
3648       }
3649 
3650       const ModeCosts *mode_costs = &x->mode_costs;
3651       if (is_inter_mode(mbmi->mode) &&
3652           RDCOST(x->rdmult,
3653                  mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3654                      rd_stats_uv.rate,
3655                  (rd_stats_y.dist + rd_stats_uv.dist)) >
3656               RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3657                      (rd_stats_y.sse + rd_stats_uv.sse))) {
3658         skip_blk = 1;
3659         rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3660         rd_stats_uv.rate = 0;
3661         rd_stats_y.dist = rd_stats_y.sse;
3662         rd_stats_uv.dist = rd_stats_uv.sse;
3663       } else {
3664         skip_blk = 0;
3665         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3666       }
3667       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3668                       winner_rate_y - winner_rate_uv;
3669       int64_t this_rd =
3670           RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3671       if (best_rd > this_rd) {
3672         *best_mbmode = *mbmi;
3673         *best_mode_index = winner_mode_index;
3674         av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3675         av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3676         rd_cost->rate = this_rate;
3677         rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3678         rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3679         rd_cost->rdcost = this_rd;
3680         best_rd = this_rd;
3681         *best_skip2 = skip_blk;
3682       }
3683     }
3684   }
3685 }
3686 
3687 /*!\cond */
3688 typedef struct {
3689   // Mask for each reference frame, specifying which prediction modes to NOT try
3690   // during search.
3691   uint32_t pred_modes[REF_FRAMES];
3692   // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3693   // reference frames (i, j).
3694   // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3695   // (NONE_FRAME).
3696   bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3697 } mode_skip_mask_t;
3698 /*!\endcond */
3699 
3700 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3701 static inline void disable_reference(
3702     MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3703   for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3704     ref_combo[ref][ref2 + 1] = true;
3705   }
3706 }
3707 
3708 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3709 static inline void disable_inter_references_except_altref(
3710     bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3711   disable_reference(LAST_FRAME, ref_combo);
3712   disable_reference(LAST2_FRAME, ref_combo);
3713   disable_reference(LAST3_FRAME, ref_combo);
3714   disable_reference(GOLDEN_FRAME, ref_combo);
3715   disable_reference(BWDREF_FRAME, ref_combo);
3716   disable_reference(ALTREF2_FRAME, ref_combo);
3717 }
3718 
3719 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3720   { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3721   { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3722   { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3723   { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3724   { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3725   { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3726   { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3727   { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3728 };
3729 
3730 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3731 
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3732 static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
3733   if (ref_set == REF_SET_FULL) {
3734     // Everything available by default.
3735     memset(mask, 0, sizeof(*mask));
3736   } else {
3737     // All modes available by default.
3738     memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3739     // All references disabled first.
3740     for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3741       for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3742         mask->ref_combo[ref1][ref2 + 1] = true;
3743       }
3744     }
3745     const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3746     int num_ref_combos;
3747 
3748     // Then enable reduced set of references explicitly.
3749     switch (ref_set) {
3750       case REF_SET_REDUCED:
3751         ref_set_combos = reduced_ref_combos;
3752         num_ref_combos =
3753             (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3754         break;
3755       case REF_SET_REALTIME:
3756         ref_set_combos = real_time_ref_combos;
3757         num_ref_combos =
3758             (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3759         break;
3760       default: assert(0); num_ref_combos = 0;
3761     }
3762 
3763     for (int i = 0; i < num_ref_combos; ++i) {
3764       const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3765       mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3766     }
3767   }
3768 }
3769 
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3770 static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
3771                                        const AV1_COMP *cpi, MACROBLOCK *x,
3772                                        BLOCK_SIZE bsize) {
3773   const AV1_COMMON *const cm = &cpi->common;
3774   const struct segmentation *const seg = &cm->seg;
3775   MACROBLOCKD *const xd = &x->e_mbd;
3776   MB_MODE_INFO *const mbmi = xd->mi[0];
3777   unsigned char segment_id = mbmi->segment_id;
3778   const SPEED_FEATURES *const sf = &cpi->sf;
3779   const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
3780   REF_SET ref_set = REF_SET_FULL;
3781 
3782   if (sf->rt_sf.use_real_time_ref_set)
3783     ref_set = REF_SET_REALTIME;
3784   else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3785     ref_set = REF_SET_REDUCED;
3786 
3787   default_skip_mask(mask, ref_set);
3788 
3789   int min_pred_mv_sad = INT_MAX;
3790   MV_REFERENCE_FRAME ref_frame;
3791   if (ref_set == REF_SET_REALTIME) {
3792     // For real-time encoding, we only look at a subset of ref frames. So the
3793     // threshold for pruning should be computed from this subset as well.
3794     const int num_rt_refs =
3795         sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3796     for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3797       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3798       if (ref != INTRA_FRAME) {
3799         min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3800       }
3801     }
3802   } else {
3803     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3804       min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3805   }
3806 
3807   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3808     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3809       // Skip checking missing reference in both single and compound reference
3810       // modes.
3811       disable_reference(ref_frame, mask->ref_combo);
3812     } else {
3813       // Skip fixed mv modes for poor references
3814       if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3815         mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3816       }
3817     }
3818     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3819         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3820       // Reference not used for the segment.
3821       disable_reference(ref_frame, mask->ref_combo);
3822     }
3823   }
3824   // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3825   // is disabled for this segment. This is to prevent the possibility that we
3826   // end up unable to pick any mode.
3827   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3828     // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3829     // unless ARNR filtering is enabled in which case we want
3830     // an unfiltered alternative. We allow near/nearest as well
3831     // because they may result in zero-zero MVs but be cheaper.
3832     if (cpi->rc.is_src_frame_alt_ref &&
3833         (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3834       disable_inter_references_except_altref(mask->ref_combo);
3835 
3836       mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3837       const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3838       int_mv near_mv, nearest_mv, global_mv;
3839       get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3840                   &x->mbmi_ext);
3841       get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3842       get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3843 
3844       if (near_mv.as_int != global_mv.as_int)
3845         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3846       if (nearest_mv.as_int != global_mv.as_int)
3847         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3848     }
3849   }
3850 
3851   if (cpi->rc.is_src_frame_alt_ref) {
3852     if (inter_sf->alt_ref_search_fp &&
3853         (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3854       mask->pred_modes[ALTREF_FRAME] = 0;
3855       disable_inter_references_except_altref(mask->ref_combo);
3856       disable_reference(INTRA_FRAME, mask->ref_combo);
3857     }
3858   }
3859 
3860   if (inter_sf->alt_ref_search_fp) {
3861     if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3862       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3863       // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3864       // those are past frames
3865       MV_REFERENCE_FRAME start_frame =
3866           inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3867       for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3868         if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3869             0) {
3870           // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3871           // to the relative dist of LAST_FRAME.
3872           if (inter_sf->alt_ref_search_fp == 1 &&
3873               (abs(cpi->ref_frame_dist_info
3874                        .ref_relative_dist[ref_frame - LAST_FRAME]) >
3875                1.5 * abs(cpi->ref_frame_dist_info
3876                              .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3877             continue;
3878           }
3879           if (x->pred_mv_sad[ref_frame] > sad_thresh)
3880             mask->pred_modes[ref_frame] |= INTER_ALL;
3881         }
3882       }
3883     }
3884   }
3885 
3886   if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3887     if (x->best_pred_mv_sad[0] < INT_MAX) {
3888       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3889       const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3890 
3891       // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3892       for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3893         ref_frame = prune_ref_list[ref_idx];
3894         if (x->pred_mv_sad[ref_frame] > sad_thresh)
3895           mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3896       }
3897     }
3898   }
3899 
3900   if (bsize > sf->part_sf.max_intra_bsize) {
3901     disable_reference(INTRA_FRAME, mask->ref_combo);
3902   }
3903 
3904   if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3905     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3906       mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3907       mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3908     }
3909   }
3910 
3911   mask->pred_modes[INTRA_FRAME] |=
3912       ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3913 
3914   // Prune reference frames which are not the closest to the current
3915   // frame and with large pred_mv_sad.
3916   if (inter_sf->prune_single_ref) {
3917     assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
3918     const double prune_threshes[2] = { 1.20, 1.05 };
3919 
3920     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3921       const RefFrameDistanceInfo *const ref_frame_dist_info =
3922           &cpi->ref_frame_dist_info;
3923       const int is_closest_ref =
3924           (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
3925           (ref_frame == ref_frame_dist_info->nearest_future_ref);
3926 
3927       if (!is_closest_ref) {
3928         const int dir =
3929             (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3930                 ? 0
3931                 : 1;
3932         if (x->best_pred_mv_sad[dir] < INT_MAX &&
3933             x->pred_mv_sad[ref_frame] >
3934                 prune_threshes[inter_sf->prune_single_ref - 1] *
3935                     x->best_pred_mv_sad[dir])
3936           mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
3937       }
3938     }
3939   }
3940 }
3941 
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3942 static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
3943                                           HandleInterModeArgs *const args,
3944                                           int is_hbd) {
3945   if (is_hbd) {
3946     const int len = sizeof(uint16_t);
3947     args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3948     args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3949                                                  (MAX_SB_SQUARE >> 1) * len);
3950     args->above_pred_buf[2] =
3951         CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3952     args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3953     args->left_pred_buf[1] =
3954         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3955     args->left_pred_buf[2] =
3956         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3957   } else {
3958     args->above_pred_buf[0] = obmc_buffer->above_pred;
3959     args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3960     args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3961     args->left_pred_buf[0] = obmc_buffer->left_pred;
3962     args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3963     args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3964   }
3965 }
3966 
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3967 static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3968                                   MV_REFERENCE_FRAME ref_frame) {
3969   const AV1_COMMON *const cm = &cpi->common;
3970   MV_REFERENCE_FRAME rf[2];
3971   av1_set_ref_frame(rf, ref_frame);
3972 
3973   if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3974 
3975   if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3976                                        cm->cur_frame->ref_display_order_hint)) {
3977     return 1;
3978   }
3979 
3980   return 0;
3981 }
3982 
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3983 static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
3984                                                     int skip_ref_frame_mask) {
3985   for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3986     if (!(skip_ref_frame_mask & (1 << r))) {
3987       const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3988       if (rf[0] == ref_frame || rf[1] == ref_frame) {
3989         return 1;
3990       }
3991     }
3992   }
3993   return 0;
3994 }
3995 
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3996 static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3997                                              const MB_MODE_INFO *mi_cache) {
3998   if (!mi_cache) {
3999     return 0;
4000   }
4001 
4002   if (ref_frame < REF_FRAMES) {
4003     return (ref_frame == mi_cache->ref_frame[0] ||
4004             ref_frame == mi_cache->ref_frame[1]);
4005   }
4006 
4007   // if we are here, then the current mode is compound.
4008   MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4009   return ref_frame == cached_ref_type;
4010 }
4011 
4012 // Please add/modify parameter setting in this function, making it consistent
4013 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])4014 static inline void set_params_rd_pick_inter_mode(
4015     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4016     BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4017     unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4018     struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4019   const AV1_COMMON *const cm = &cpi->common;
4020   MACROBLOCKD *const xd = &x->e_mbd;
4021   MB_MODE_INFO *const mbmi = xd->mi[0];
4022   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4023   unsigned char segment_id = mbmi->segment_id;
4024 
4025   init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4026   av1_collect_neighbors_ref_counts(xd);
4027   estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4028                            ref_costs_comp);
4029 
4030   const int mi_row = xd->mi_row;
4031   const int mi_col = xd->mi_col;
4032   x->best_pred_mv_sad[0] = INT_MAX;
4033   x->best_pred_mv_sad[1] = INT_MAX;
4034 
4035   for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4036        ++ref_frame) {
4037     x->pred_mv_sad[ref_frame] = INT_MAX;
4038     mbmi_ext->mode_context[ref_frame] = 0;
4039     mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4040     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4041       // Skip the ref frame if the mask says skip and the ref is not used by
4042       // compound ref.
4043       if (skip_ref_frame_mask & (1 << ref_frame) &&
4044           !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4045           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4046         continue;
4047       }
4048       assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4049       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4050     }
4051     if (cpi->sf.inter_sf.alt_ref_search_fp ||
4052         cpi->sf.inter_sf.prune_single_ref ||
4053         cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4054       // Store the best pred_mv_sad across all past frames
4055       if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4056           0)
4057         x->best_pred_mv_sad[0] =
4058             AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4059       else
4060         // Store the best pred_mv_sad across all future frames
4061         x->best_pred_mv_sad[1] =
4062             AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4063     }
4064   }
4065 
4066   if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4067     // No second reference on RT ref set, so no need to initialize
4068     for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4069          ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4070       mbmi_ext->mode_context[ref_frame] = 0;
4071       mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4072       const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4073       if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4074             (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4075         continue;
4076       }
4077 
4078       if (skip_ref_frame_mask & (1 << ref_frame) &&
4079           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4080         continue;
4081       }
4082       // Ref mv list population is not required, when compound references are
4083       // pruned.
4084       if (prune_ref_frame(cpi, x, ref_frame)) continue;
4085 
4086       av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4087                        xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4088                        mbmi_ext->mode_context);
4089       // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4090       // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4091       av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4092     }
4093   }
4094 
4095   av1_count_overlappable_neighbors(cm, xd);
4096   const FRAME_UPDATE_TYPE update_type =
4097       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4098   int use_actual_frame_probs = 1;
4099   int prune_obmc;
4100 #if CONFIG_FPMT_TEST
4101   use_actual_frame_probs =
4102       (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4103   if (!use_actual_frame_probs) {
4104     prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4105                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4106   }
4107 #endif
4108   if (use_actual_frame_probs) {
4109     prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4110                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4111   }
4112   if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4113     if (check_num_overlappable_neighbors(mbmi) &&
4114         is_motion_variation_allowed_bsize(bsize)) {
4115       int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4116       int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4117                                        MAX_SB_SIZE >> 1 };
4118       int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4119                                         MAX_SB_SIZE >> 1 };
4120       int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4121       av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4122                                           dst_width1, dst_height1,
4123                                           args->above_pred_stride);
4124       av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4125                                          dst_width2, dst_height2,
4126                                          args->left_pred_stride);
4127       const int num_planes = av1_num_planes(cm);
4128       av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4129                            mi_col, 0, num_planes);
4130       calc_target_weighted_pred(
4131           cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4132           args->left_pred_buf[0], args->left_pred_stride[0]);
4133     }
4134   }
4135 
4136   init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4137 
4138   // Set params for mode evaluation
4139   set_mode_eval_params(cpi, x, MODE_EVAL);
4140 
4141   x->comp_rd_stats_idx = 0;
4142 
4143   for (int idx = 0; idx < REF_FRAMES; idx++) {
4144     args->best_single_sse_in_refs[idx] = INT32_MAX;
4145   }
4146 }
4147 
init_single_inter_mode_search_state(InterModeSearchState * search_state)4148 static inline void init_single_inter_mode_search_state(
4149     InterModeSearchState *search_state) {
4150   for (int dir = 0; dir < 2; ++dir) {
4151     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4152       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4153         SingleInterModeState *state;
4154 
4155         state = &search_state->single_state[dir][mode][ref_frame];
4156         state->ref_frame = NONE_FRAME;
4157         state->rd = INT64_MAX;
4158 
4159         state = &search_state->single_state_modelled[dir][mode][ref_frame];
4160         state->ref_frame = NONE_FRAME;
4161         state->rd = INT64_MAX;
4162 
4163         search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4164       }
4165     }
4166   }
4167 
4168   for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4169     search_state->best_single_rd[ref_frame] = INT64_MAX;
4170     search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4171   }
4172   av1_zero(search_state->single_state_cnt);
4173   av1_zero(search_state->single_state_modelled_cnt);
4174 }
4175 
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4176 static inline void init_inter_mode_search_state(
4177     InterModeSearchState *search_state, const AV1_COMP *cpi,
4178     const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4179   init_intra_mode_search_state(&search_state->intra_search_state);
4180   av1_invalid_rd_stats(&search_state->best_y_rdcost);
4181 
4182   search_state->best_rd = best_rd_so_far;
4183   search_state->best_skip_rd[0] = INT64_MAX;
4184   search_state->best_skip_rd[1] = INT64_MAX;
4185 
4186   av1_zero(search_state->best_mbmode);
4187 
4188   search_state->best_rate_y = INT_MAX;
4189 
4190   search_state->best_rate_uv = INT_MAX;
4191 
4192   search_state->best_mode_skippable = 0;
4193 
4194   search_state->best_skip2 = 0;
4195 
4196   search_state->best_mode_index = THR_INVALID;
4197 
4198   const MACROBLOCKD *const xd = &x->e_mbd;
4199   const MB_MODE_INFO *const mbmi = xd->mi[0];
4200   const unsigned char segment_id = mbmi->segment_id;
4201 
4202   search_state->num_available_refs = 0;
4203   memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4204   memset(search_state->dist_order_refs, -1,
4205          sizeof(search_state->dist_order_refs));
4206 
4207   for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4208     search_state->mode_threshold[i] = 0;
4209   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4210   for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4211     search_state->mode_threshold[i] =
4212         ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4213         RD_THRESH_FAC_FRAC_BITS;
4214 
4215   search_state->best_intra_rd = INT64_MAX;
4216 
4217   search_state->best_pred_sse = UINT_MAX;
4218 
4219   av1_zero(search_state->single_newmv);
4220   av1_zero(search_state->single_newmv_rate);
4221   av1_zero(search_state->single_newmv_valid);
4222   for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4223     for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4224       for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4225         search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4226         search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4227       }
4228     }
4229   }
4230 
4231   for (int i = 0; i < REFERENCE_MODES; ++i) {
4232     search_state->best_pred_rd[i] = INT64_MAX;
4233   }
4234 
4235   if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4236     for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4237       search_state->mode_threshold[i] =
4238           ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4239           RD_THRESH_FAC_FRAC_BITS;
4240 
4241     for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4242       for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4243         for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4244           search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4245           search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4246         }
4247       }
4248     }
4249 
4250     init_single_inter_mode_search_state(search_state);
4251   }
4252 }
4253 
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4254 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4255                            const MV_REFERENCE_FRAME *ref_frame,
4256                            const PREDICTION_MODE this_mode) {
4257   if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4258     return true;
4259   }
4260 
4261   return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4262 }
4263 
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4264 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4265                                       BLOCK_SIZE bsize,
4266                                       PREDICTION_MODE curr_mode,
4267                                       const MV_REFERENCE_FRAME *ref_frames) {
4268   const int comp_pred = ref_frames[1] > INTRA_FRAME;
4269   if (comp_pred) {
4270     if (!is_comp_ref_allowed(bsize)) return 1;
4271     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4272       return 1;
4273     }
4274 
4275     const AV1_COMMON *const cm = &cpi->common;
4276     if (frame_is_intra_only(cm)) return 1;
4277 
4278     const CurrentFrame *const current_frame = &cm->current_frame;
4279     if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4280 
4281     const struct segmentation *const seg = &cm->seg;
4282     const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4283     // Do not allow compound prediction if the segment level reference frame
4284     // feature is in use as in this case there can only be one reference.
4285     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4286   }
4287 
4288   if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4289     // Mode must be compatible
4290     if (!is_interintra_allowed_bsize(bsize)) return 1;
4291     if (!is_interintra_allowed_mode(curr_mode)) return 1;
4292   }
4293 
4294   return 0;
4295 }
4296 
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4297 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4298                                         BLOCK_SIZE bsize, int mib_size) {
4299   const int sb_size_mask = mib_size - 1;
4300   const MACROBLOCKD *const xd = &x->e_mbd;
4301   const int mi_row = xd->mi_row;
4302   const int mi_col = xd->mi_col;
4303   const int mi_row_in_sb = mi_row & sb_size_mask;
4304   const int mi_col_in_sb = mi_col & sb_size_mask;
4305   const int mi_w = mi_size_wide[bsize];
4306   const int mi_h = mi_size_high[bsize];
4307   int picked_ref_frames_mask = 0;
4308   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4309     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4310       picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4311     }
4312   }
4313   return picked_ref_frames_mask;
4314 }
4315 
4316 // Check if reference frame pair of the current block matches with the given
4317 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4318 static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4319                                        const MV_REFERENCE_FRAME *ref_frames) {
4320   return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4321           (ref_frames[1] == mbmi->ref_frame[1]));
4322 }
4323 
4324 // Case 1: return 0, means don't skip this mode
4325 // Case 2: return 1, means skip this mode completely
4326 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4327 static int inter_mode_search_order_independent_skip(
4328     const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4329     InterModeSearchState *search_state, int skip_ref_frame_mask,
4330     PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4331   if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4332     return 1;
4333   }
4334 
4335   const int ref_type = av1_ref_frame_type(ref_frame);
4336   if (!cpi->sf.rt_sf.use_real_time_ref_set)
4337     if (prune_ref_frame(cpi, x, ref_type)) return 1;
4338 
4339   // This is only used in motion vector unit test.
4340   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4341       ref_frame[0] == INTRA_FRAME)
4342     return 1;
4343 
4344   const AV1_COMMON *const cm = &cpi->common;
4345   if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4346     return 1;
4347   }
4348 
4349   // Reuse the prediction mode in cache
4350   if (x->use_mb_mode_cache) {
4351     const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4352     const PREDICTION_MODE cached_mode = cached_mi->mode;
4353     const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4354     const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4355 
4356     // If the cached mode is intra, then we just need to match the mode.
4357     if (is_mode_intra(cached_mode) && mode != cached_mode) {
4358       return 1;
4359     }
4360 
4361     // If the cached mode is single inter mode, then we match the mode and
4362     // reference frame.
4363     if (cached_mode_is_single) {
4364       if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4365         return 1;
4366       }
4367     } else {
4368       // If the cached mode is compound, then we need to consider several cases.
4369       const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4370       if (mode_is_single) {
4371         // If the mode is single, we know the modes can't match. But we might
4372         // still want to search it if compound mode depends on the current mode.
4373         int skip_motion_mode_only = 0;
4374         if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4375           skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4376         } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4377           skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4378         } else if (cached_mode == NEW_NEWMV) {
4379           skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4380                                    ref_frame[0] == cached_frame[1]);
4381         }
4382 
4383         return 1 + skip_motion_mode_only;
4384       } else {
4385         // If both modes are compound, then everything must match.
4386         if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4387             ref_frame[1] != cached_frame[1]) {
4388           return 1;
4389         }
4390       }
4391     }
4392   }
4393 
4394   const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4395   // If no valid mode has been found so far in PARTITION_NONE when finding a
4396   // valid partition is required, do not skip mode.
4397   if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4398       x->must_find_valid_partition)
4399     return 0;
4400 
4401   const SPEED_FEATURES *const sf = &cpi->sf;
4402   // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4403   // frames
4404   if (sf->inter_sf.prune_nearmv_using_neighbors &&
4405       (mode == NEAR_NEARMV || mode == NEARMV)) {
4406     const MACROBLOCKD *const xd = &x->e_mbd;
4407     if (search_state->best_rd != INT64_MAX && xd->left_available &&
4408         xd->up_available) {
4409       const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4410                                                     { 1, 1, 0 },
4411                                                     { 2, 1, 0 } };
4412       const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4413 
4414       assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4415              qindex_sub_range < 3);
4416       const int num_ref_frame_pair_match_thresh =
4417           thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4418                     [qindex_sub_range];
4419 
4420       assert(num_ref_frame_pair_match_thresh <= 2 &&
4421              num_ref_frame_pair_match_thresh >= 0);
4422       int num_ref_frame_pair_match = 0;
4423 
4424       num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4425       num_ref_frame_pair_match +=
4426           match_ref_frame_pair(xd->above_mbmi, ref_frame);
4427 
4428       // Pruning based on ref frame pair match with neighbors.
4429       if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4430     }
4431   }
4432 
4433   int skip_motion_mode = 0;
4434   if (mbmi->partition != PARTITION_NONE) {
4435     int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4436     if (ref_type <= ALTREF_FRAME && skip_ref) {
4437       // Since the compound ref modes depends on the motion estimation result of
4438       // two single ref modes (best mv of single ref modes as the start point),
4439       // if current single ref mode is marked skip, we need to check if it will
4440       // be used in compound ref modes.
4441       if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4442         // Found a not skipped compound ref mode which contains current
4443         // single ref. So this single ref can't be skipped completely
4444         // Just skip its motion mode search, still try its simple
4445         // transition mode.
4446         skip_motion_mode = 1;
4447         skip_ref = 0;
4448       }
4449     }
4450     // If we are reusing the prediction from cache, and the current frame is
4451     // required by the cache, then we cannot prune it.
4452     if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4453       skip_ref = 0;
4454       // If the cache only needs the current reference type for compound
4455       // prediction, then we can skip motion mode search.
4456       skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4457                           x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4458     }
4459     if (skip_ref) return 1;
4460   }
4461 
4462   if (ref_frame[0] == INTRA_FRAME) {
4463     if (mode != DC_PRED) {
4464       // Disable intra modes other than DC_PRED for blocks with low variance
4465       // Threshold for intra skipping based on source variance
4466       // TODO(debargha): Specialize the threshold for super block sizes
4467       const unsigned int skip_intra_var_thresh = 64;
4468       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4469           x->source_variance < skip_intra_var_thresh)
4470         return 1;
4471     }
4472   }
4473 
4474   if (skip_motion_mode) return 2;
4475 
4476   return 0;
4477 }
4478 
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4479 static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4480                              const MV_REFERENCE_FRAME *ref_frames,
4481                              const AV1_COMMON *cm) {
4482   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4483   mbmi->ref_mv_idx = 0;
4484   mbmi->mode = curr_mode;
4485   mbmi->uv_mode = UV_DC_PRED;
4486   mbmi->ref_frame[0] = ref_frames[0];
4487   mbmi->ref_frame[1] = ref_frames[1];
4488   pmi->palette_size[0] = 0;
4489   pmi->palette_size[1] = 0;
4490   mbmi->filter_intra_mode_info.use_filter_intra = 0;
4491   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4492   mbmi->motion_mode = SIMPLE_TRANSLATION;
4493   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4494   set_default_interp_filters(mbmi, cm->features.interp_filter);
4495 }
4496 
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4497 static inline void collect_single_states(MACROBLOCK *x,
4498                                          InterModeSearchState *search_state,
4499                                          const MB_MODE_INFO *const mbmi) {
4500   int i, j;
4501   const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4502   const PREDICTION_MODE this_mode = mbmi->mode;
4503   const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4504   const int mode_offset = INTER_OFFSET(this_mode);
4505   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4506 
4507   // Simple rd
4508   int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4509   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4510     const int64_t rd =
4511         search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4512     if (rd < simple_rd) simple_rd = rd;
4513   }
4514 
4515   // Insertion sort of single_state
4516   const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4517   SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4518   i = search_state->single_state_cnt[dir][mode_offset];
4519   for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4520     state_s[j] = state_s[j - 1];
4521   state_s[j] = this_state_s;
4522   search_state->single_state_cnt[dir][mode_offset]++;
4523 
4524   // Modelled rd
4525   int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4526   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4527     const int64_t rd =
4528         search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4529     if (rd < modelled_rd) modelled_rd = rd;
4530   }
4531 
4532   // Insertion sort of single_state_modelled
4533   const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4534   SingleInterModeState *state_m =
4535       search_state->single_state_modelled[dir][mode_offset];
4536   i = search_state->single_state_modelled_cnt[dir][mode_offset];
4537   for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4538     state_m[j] = state_m[j - 1];
4539   state_m[j] = this_state_m;
4540   search_state->single_state_modelled_cnt[dir][mode_offset]++;
4541 }
4542 
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4543 static inline void analyze_single_states(const AV1_COMP *cpi,
4544                                          InterModeSearchState *search_state) {
4545   const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4546   assert(prune_level >= 1);
4547   int i, j, dir, mode;
4548 
4549   for (dir = 0; dir < 2; ++dir) {
4550     int64_t best_rd;
4551     SingleInterModeState(*state)[FWD_REFS];
4552     const int prune_factor = prune_level >= 2 ? 6 : 5;
4553 
4554     // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4555     // reference frames for all the modes (NEARESTMV and NEARMV may not
4556     // have same motion vectors). Always keep the best of each mode
4557     // because it might form the best possible combination with other mode.
4558     state = search_state->single_state[dir];
4559     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4560                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4561     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4562       for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4563         if (state[mode][i].rd != INT64_MAX &&
4564             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4565           state[mode][i].valid = 0;
4566         }
4567       }
4568     }
4569 
4570     state = search_state->single_state_modelled[dir];
4571     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4572                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4573     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4574       for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4575         if (state[mode][i].rd != INT64_MAX &&
4576             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4577           state[mode][i].valid = 0;
4578         }
4579       }
4580     }
4581   }
4582 
4583   // Ordering by simple rd first, then by modelled rd
4584   for (dir = 0; dir < 2; ++dir) {
4585     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4586       const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4587       const int state_cnt_m =
4588           search_state->single_state_modelled_cnt[dir][mode];
4589       SingleInterModeState *state_s = search_state->single_state[dir][mode];
4590       SingleInterModeState *state_m =
4591           search_state->single_state_modelled[dir][mode];
4592       int count = 0;
4593       const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4594       for (i = 0; i < state_cnt_s; ++i) {
4595         if (state_s[i].rd == INT64_MAX) break;
4596         if (state_s[i].valid) {
4597           search_state->single_rd_order[dir][mode][count++] =
4598               state_s[i].ref_frame;
4599         }
4600       }
4601       if (count >= max_candidates) continue;
4602 
4603       for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4604         if (state_m[i].rd == INT64_MAX) break;
4605         if (!state_m[i].valid) continue;
4606         const int ref_frame = state_m[i].ref_frame;
4607         int match = 0;
4608         // Check if existing already
4609         for (j = 0; j < count; ++j) {
4610           if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4611             match = 1;
4612             break;
4613           }
4614         }
4615         if (match) continue;
4616         // Check if this ref_frame is removed in simple rd
4617         int valid = 1;
4618         for (j = 0; j < state_cnt_s; ++j) {
4619           if (ref_frame == state_s[j].ref_frame) {
4620             valid = state_s[j].valid;
4621             break;
4622           }
4623         }
4624         if (valid) {
4625           search_state->single_rd_order[dir][mode][count++] = ref_frame;
4626         }
4627       }
4628     }
4629   }
4630 }
4631 
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4632 static int compound_skip_get_candidates(
4633     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4634     const int dir, const PREDICTION_MODE mode) {
4635   const int mode_offset = INTER_OFFSET(mode);
4636   const SingleInterModeState *state =
4637       search_state->single_state[dir][mode_offset];
4638   const SingleInterModeState *state_modelled =
4639       search_state->single_state_modelled[dir][mode_offset];
4640 
4641   int max_candidates = 0;
4642   for (int i = 0; i < FWD_REFS; ++i) {
4643     if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4644     max_candidates++;
4645   }
4646 
4647   int candidates = max_candidates;
4648   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4649     candidates = AOMMIN(2, max_candidates);
4650   }
4651   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4652     if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4653         state[0].ref_frame == state_modelled[0].ref_frame)
4654       candidates = 1;
4655     if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4656   }
4657 
4658   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4659     // Limit the number of candidates to 1 in each direction for compound
4660     // prediction
4661     candidates = AOMMIN(1, candidates);
4662   }
4663   return candidates;
4664 }
4665 
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4666 static int compound_skip_by_single_states(
4667     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4668     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4669     const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4670   const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4671   const int mode[2] = { compound_ref0_mode(this_mode),
4672                         compound_ref1_mode(this_mode) };
4673   const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4674   const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4675                             refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4676   int ref_searched[2] = { 0, 0 };
4677   int ref_mv_match[2] = { 1, 1 };
4678   int i, j;
4679 
4680   for (i = 0; i < 2; ++i) {
4681     const SingleInterModeState *state =
4682         search_state->single_state[mode_dir[i]][mode_offset[i]];
4683     const int state_cnt =
4684         search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4685     for (j = 0; j < state_cnt; ++j) {
4686       if (state[j].ref_frame == refs[i]) {
4687         ref_searched[i] = 1;
4688         break;
4689       }
4690     }
4691   }
4692 
4693   const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4694   for (i = 0; i < 2; ++i) {
4695     if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4696       continue;
4697     }
4698     const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4699     for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4700       int_mv single_mv;
4701       int_mv comp_mv;
4702       get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4703                   &x->mbmi_ext);
4704       get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4705       if (single_mv.as_int != comp_mv.as_int) {
4706         ref_mv_match[i] = 0;
4707         break;
4708       }
4709     }
4710   }
4711 
4712   for (i = 0; i < 2; ++i) {
4713     if (!ref_searched[i] || !ref_mv_match[i]) continue;
4714     const int candidates =
4715         compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4716     const MV_REFERENCE_FRAME *ref_order =
4717         search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4718     int match = 0;
4719     for (j = 0; j < candidates; ++j) {
4720       if (refs[i] == ref_order[j]) {
4721         match = 1;
4722         break;
4723       }
4724     }
4725     if (!match) return 1;
4726   }
4727 
4728   return 0;
4729 }
4730 
4731 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4732 static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
4733                                    const MV_REFERENCE_FRAME *ref_frames,
4734                                    int *const is_ref_match) {
4735   if (is_inter_block(mbmi)) {
4736     is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4737     is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4738     if (has_second_ref(mbmi)) {
4739       is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4740       is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4741     }
4742   }
4743 }
4744 
4745 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4746 static inline int compound_skip_using_neighbor_refs(
4747     MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4748     const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4749   // Exclude non-extended compound modes from pruning
4750   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4751       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4752     return 0;
4753 
4754   if (prune_ext_comp_using_neighbors >= 3) return 1;
4755 
4756   int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4757                                 // 1 - match for backward refs
4758   // Check if ref frames of this block matches with left neighbor.
4759   if (xd->left_available)
4760     match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4761 
4762   // Check if ref frames of this block matches with above neighbor.
4763   if (xd->up_available)
4764     match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4765 
4766   // Combine ref frame match with neighbors in forward and backward refs.
4767   const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4768 
4769   // Pruning based on ref frame match with neighbors.
4770   if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4771   return 1;
4772 }
4773 
4774 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4775 static inline void update_best_single_mode(InterModeSearchState *search_state,
4776                                            const PREDICTION_MODE this_mode,
4777                                            const MV_REFERENCE_FRAME ref_frame,
4778                                            int64_t this_rd) {
4779   if (this_rd < search_state->best_single_rd[ref_frame]) {
4780     search_state->best_single_rd[ref_frame] = this_rd;
4781     search_state->best_single_mode[ref_frame] = this_mode;
4782   }
4783 }
4784 
4785 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4786 static inline int skip_compound_using_best_single_mode_ref(
4787     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4788     const PREDICTION_MODE *best_single_mode,
4789     int prune_comp_using_best_single_mode_ref) {
4790   // Exclude non-extended compound modes from pruning
4791   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4792       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4793     return 0;
4794 
4795   assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4796   const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4797   // Get ref frame direction corresponding to NEWMV
4798   // 0 - NEWMV corresponding to forward direction
4799   // 1 - NEWMV corresponding to backward direction
4800   const int newmv_dir = comp_mode_ref0 != NEWMV;
4801 
4802   // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4803   // have NEWMV as single mode winner.
4804   // Example: For an extended-compound mode,
4805   // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4806   // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4807   // - Avoid pruning this mode, if best single mode corresponding to ref frame
4808   //   ALTREF_FRAME is NEWMV
4809   const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4810   if (single_mode == NEWMV) return 0;
4811 
4812   // Avoid pruning the compound mode when best single mode is not available
4813   if (prune_comp_using_best_single_mode_ref == 1)
4814     if (single_mode == MB_MODE_COUNT) return 0;
4815   return 1;
4816 }
4817 
compare_int64(const void * a,const void * b)4818 static int compare_int64(const void *a, const void *b) {
4819   int64_t a64 = *((int64_t *)a);
4820   int64_t b64 = *((int64_t *)b);
4821   if (a64 < b64) {
4822     return -1;
4823   } else if (a64 == b64) {
4824     return 0;
4825   } else {
4826     return 1;
4827   }
4828 }
4829 
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4830 static inline void update_search_state(
4831     InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4832     PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4833     const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4834     THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4835   const MACROBLOCKD *xd = &x->e_mbd;
4836   const MB_MODE_INFO *mbmi = xd->mi[0];
4837   const int skip_ctx = av1_get_skip_txfm_context(xd);
4838   const int skip_txfm =
4839       mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4840   const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4841 
4842   search_state->best_rd = new_best_rd_stats->rdcost;
4843   search_state->best_mode_index = new_best_mode;
4844   *best_rd_stats_dst = *new_best_rd_stats;
4845   search_state->best_mbmode = *mbmi;
4846   search_state->best_skip2 = skip_txfm;
4847   search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4848   // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4849   // rate_uv because av1_txfm_search process is replaced by rd estimation.
4850   // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4851   // These two values will be updated when av1_txfm_search is called.
4852   if (txfm_search_done) {
4853     search_state->best_rate_y =
4854         new_best_rd_stats_y->rate +
4855         x->mode_costs.skip_txfm_cost[skip_ctx]
4856                                     [new_best_rd_stats->skip_txfm || skip_txfm];
4857     search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4858   }
4859   search_state->best_y_rdcost = *new_best_rd_stats_y;
4860   memcpy(ctx->blk_skip, txfm_info->blk_skip,
4861          sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4862   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4863 }
4864 
4865 // Find the best RD for a reference frame (among single reference modes)
4866 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4867 static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4868   assert(ref_frame_rd[0] == INT64_MAX);
4869   int64_t ref_copy[REF_FRAMES - 1];
4870   memcpy(ref_copy, ref_frame_rd + 1,
4871          sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4872   qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4873 
4874   int64_t cutoff = ref_copy[0];
4875   // The cut-off is within 10% of the best.
4876   if (cutoff != INT64_MAX) {
4877     assert(cutoff < INT64_MAX / 200);
4878     cutoff = (110 * cutoff) / 100;
4879   }
4880   ref_frame_rd[0] = cutoff;
4881 }
4882 
4883 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4884 static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4885                                         MV_REFERENCE_FRAME frame1,
4886                                         MV_REFERENCE_FRAME frame2) {
4887   assert(frame2 > 0);
4888   return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4889          ref_frame_rd[frame2] <= ref_frame_rd[0];
4890 }
4891 
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4892 static inline void evaluate_motion_mode_for_winner_candidates(
4893     const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4894     HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4895     PICK_MODE_CONTEXT *const ctx,
4896     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4897     const motion_mode_best_st_candidate *const best_motion_mode_cands,
4898     int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4899     InterModeSearchState *const search_state, int64_t *yrd) {
4900   const AV1_COMMON *const cm = &cpi->common;
4901   const int num_planes = av1_num_planes(cm);
4902   MACROBLOCKD *const xd = &x->e_mbd;
4903   MB_MODE_INFO *const mbmi = xd->mi[0];
4904   InterModesInfo *const inter_modes_info = x->inter_modes_info;
4905   const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4906 
4907   for (int cand = 0; cand < num_best_cand; cand++) {
4908     RD_STATS rd_stats;
4909     RD_STATS rd_stats_y;
4910     RD_STATS rd_stats_uv;
4911     av1_init_rd_stats(&rd_stats);
4912     av1_init_rd_stats(&rd_stats_y);
4913     av1_init_rd_stats(&rd_stats_uv);
4914     int rate_mv;
4915 
4916     rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4917     args->skip_motion_mode =
4918         best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4919     *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4920     rd_stats.rate =
4921         best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4922 
4923     // Continue if the best candidate is compound.
4924     if (!is_inter_singleref_mode(mbmi->mode)) continue;
4925 
4926     x->txfm_search_info.skip_txfm = 0;
4927     struct macroblockd_plane *pd = xd->plane;
4928     const BUFFER_SET orig_dst = {
4929       { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4930       { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4931     };
4932 
4933     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4934     // Initialize motion mode to simple translation
4935     // Calculation of switchable rate depends on it.
4936     mbmi->motion_mode = 0;
4937     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4938     for (int i = 0; i < num_planes; i++) {
4939       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4940       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4941     }
4942 
4943     int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4944                            search_state->best_skip_rd[1] };
4945     int64_t this_yrd = INT64_MAX;
4946     int64_t ret_value = motion_mode_rd(
4947         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4948         search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4949         do_tx_search, inter_modes_info, 1, &this_yrd);
4950 
4951     if (ret_value != INT64_MAX) {
4952       rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4953       const THR_MODES mode_enum = get_prediction_mode_idx(
4954           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4955       // Collect mode stats for multiwinner mode processing
4956       store_winner_mode_stats(
4957           &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4958           mode_enum, NULL, bsize, rd_stats.rdcost,
4959           cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4960       if (rd_stats.rdcost < search_state->best_rd) {
4961         *yrd = this_yrd;
4962         update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4963                             &rd_stats_uv, mode_enum, x, do_tx_search);
4964         if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4965       }
4966     }
4967   }
4968 }
4969 
4970 /*!\cond */
4971 // Arguments for speed feature pruning of inter mode search
4972 typedef struct {
4973   int *skip_motion_mode;
4974   mode_skip_mask_t *mode_skip_mask;
4975   InterModeSearchState *search_state;
4976   int skip_ref_frame_mask;
4977   int reach_first_comp_mode;
4978   int mode_thresh_mul_fact;
4979   int num_single_modes_processed;
4980   int prune_cpd_using_sr_stats_ready;
4981 } InterModeSFArgs;
4982 /*!\endcond */
4983 
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4984 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4985                            int64_t *ref_frame_rd, int midx,
4986                            InterModeSFArgs *args, int is_low_temp_var) {
4987   const SPEED_FEATURES *const sf = &cpi->sf;
4988   MACROBLOCKD *const xd = &x->e_mbd;
4989   // Get the actual prediction mode we are trying in this iteration
4990   const THR_MODES mode_enum = av1_default_mode_order[midx];
4991   const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4992   const PREDICTION_MODE this_mode = mode_def->mode;
4993   const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4994   const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4995   const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4996   const int comp_pred = second_ref_frame > INTRA_FRAME;
4997 
4998   if (ref_frame == INTRA_FRAME) return 1;
4999 
5000   const FRAME_UPDATE_TYPE update_type =
5001       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5002   if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5003       comp_pred) {
5004     return 1;
5005   }
5006 
5007   // This is for real time encoding.
5008   if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5009       this_mode != NEARESTMV)
5010     return 1;
5011 
5012   // Check if this mode should be skipped because it is incompatible with the
5013   // current frame
5014   if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5015     return 1;
5016   const int ret = inter_mode_search_order_independent_skip(
5017       cpi, x, args->mode_skip_mask, args->search_state,
5018       args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5019   if (ret == 1) return 1;
5020   *(args->skip_motion_mode) = (ret == 2);
5021 
5022   // We've reached the first compound prediction mode, get stats from the
5023   // single reference predictors to help with pruning.
5024   // Disable this pruning logic if interpolation filter search was skipped for
5025   // single prediction modes as it can result in aggressive pruning of compound
5026   // prediction modes due to the absence of modelled_rd populated by
5027   // av1_interpolation_filter_search().
5028   // TODO(Remya): Check the impact of the sf
5029   // 'prune_comp_search_by_single_result' if compound prediction modes are
5030   // enabled in future for REALTIME encode.
5031   if (!sf->interp_sf.skip_interp_filter_search &&
5032       sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5033       args->reach_first_comp_mode == 0) {
5034     analyze_single_states(cpi, args->search_state);
5035     args->reach_first_comp_mode = 1;
5036   }
5037 
5038   // Prune aggressively when best mode is skippable.
5039   int mul_fact = args->search_state->best_mode_skippable
5040                      ? args->mode_thresh_mul_fact
5041                      : (1 << MODE_THRESH_QBITS);
5042   int64_t mode_threshold =
5043       (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5044       MODE_THRESH_QBITS;
5045 
5046   if (args->search_state->best_rd < mode_threshold) return 1;
5047 
5048   // Skip this compound mode based on the RD results from the single prediction
5049   // modes
5050   if (!sf->interp_sf.skip_interp_filter_search &&
5051       sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5052     if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5053                                        ref_frame, second_ref_frame, x))
5054       return 1;
5055   }
5056 
5057   if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5058     // After we done with single reference modes, find the 2nd best RD
5059     // for a reference frame. Only search compound modes that have a reference
5060     // frame at least as good as the 2nd best.
5061     if (!args->prune_cpd_using_sr_stats_ready &&
5062         args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5063       find_top_ref(ref_frame_rd);
5064       args->prune_cpd_using_sr_stats_ready = 1;
5065     }
5066     if (args->prune_cpd_using_sr_stats_ready &&
5067         !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5068       return 1;
5069   }
5070 
5071   // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5072   if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5073       (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5074     return 1;
5075   }
5076 
5077   if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5078     if (compound_skip_using_neighbor_refs(
5079             xd, this_mode, ref_frames,
5080             sf->inter_sf.prune_ext_comp_using_neighbors))
5081       return 1;
5082   }
5083 
5084   if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5085     if (skip_compound_using_best_single_mode_ref(
5086             this_mode, ref_frames, args->search_state->best_single_mode,
5087             sf->inter_sf.prune_comp_using_best_single_mode_ref))
5088       return 1;
5089   }
5090 
5091   if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5092     const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5093     if (skip_nearest_near_mv_using_refmv_weight(
5094             x, this_mode, ref_frame_type,
5095             args->search_state->best_mbmode.mode)) {
5096       // Ensure the mode is pruned only when the current block has obtained a
5097       // valid inter mode.
5098       assert(is_inter_mode(args->search_state->best_mbmode.mode));
5099       return 1;
5100     }
5101   }
5102 
5103   if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5104       ref_frame == GOLDEN_FRAME && !comp_pred) {
5105     const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5106     if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5107         args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5108       if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5109         return 1;
5110     }
5111   }
5112 
5113   return 0;
5114 }
5115 
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5116 static void record_best_compound(REFERENCE_MODE reference_mode,
5117                                  RD_STATS *rd_stats, int comp_pred, int rdmult,
5118                                  InterModeSearchState *search_state,
5119                                  int compmode_cost) {
5120   int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5121 
5122   if (reference_mode == REFERENCE_MODE_SELECT) {
5123     single_rate = rd_stats->rate - compmode_cost;
5124     hybrid_rate = rd_stats->rate;
5125   } else {
5126     single_rate = rd_stats->rate;
5127     hybrid_rate = rd_stats->rate + compmode_cost;
5128   }
5129 
5130   single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5131   hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5132 
5133   if (!comp_pred) {
5134     if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5135       search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5136   } else {
5137     if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5138       search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5139   }
5140   if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5141     search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5142 }
5143 
5144 // Does a transform search over a list of the best inter mode candidates.
5145 // This is called if the original mode search computed an RD estimate
5146 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5147 static void tx_search_best_inter_candidates(
5148     AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5149     int64_t best_rd_so_far, BLOCK_SIZE bsize,
5150     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5151     InterModeSearchState *search_state, RD_STATS *rd_cost,
5152     PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5153   AV1_COMMON *const cm = &cpi->common;
5154   MACROBLOCKD *const xd = &x->e_mbd;
5155   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5156   const ModeCosts *mode_costs = &x->mode_costs;
5157   const int num_planes = av1_num_planes(cm);
5158   const int skip_ctx = av1_get_skip_txfm_context(xd);
5159   MB_MODE_INFO *const mbmi = xd->mi[0];
5160   InterModesInfo *inter_modes_info = x->inter_modes_info;
5161   inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5162   search_state->best_rd = best_rd_so_far;
5163   search_state->best_mode_index = THR_INVALID;
5164   // Initialize best mode stats for winner mode processing
5165   x->winner_mode_count = 0;
5166   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5167                           NULL, bsize, best_rd_so_far,
5168                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5169   inter_modes_info->num =
5170       inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5171           ? inter_modes_info->num
5172           : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5173   const int64_t top_est_rd =
5174       inter_modes_info->num > 0
5175           ? inter_modes_info
5176                 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5177           : INT64_MAX;
5178   *yrd = INT64_MAX;
5179   int64_t best_rd_in_this_partition = INT64_MAX;
5180   int num_inter_mode_cands = inter_modes_info->num;
5181   int newmv_mode_evaled = 0;
5182   int max_allowed_cands = INT_MAX;
5183   if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5184     // The bound on the no. of inter mode candidates, beyond which the
5185     // candidates are limited if a newmv mode got evaluated, is set as
5186     // max_allowed_cands + 1.
5187     const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5188     assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5189     max_allowed_cands =
5190         num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5191   }
5192 
5193   int num_mode_thresh = INT_MAX;
5194   if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5195     // Bound the no. of transform searches per prediction mode beyond a
5196     // threshold.
5197     const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5198     assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5199     num_mode_thresh =
5200         num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5201   }
5202 
5203   int num_tx_cands = 0;
5204   int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5205   // Iterate over best inter mode candidates and perform tx search
5206   for (int j = 0; j < num_inter_mode_cands; ++j) {
5207     const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5208     *mbmi = inter_modes_info->mbmi_arr[data_idx];
5209     const PREDICTION_MODE prediction_mode = mbmi->mode;
5210     int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5211     if (curr_est_rd * 0.80 > top_est_rd) break;
5212 
5213     if (num_tx_cands > num_mode_thresh) {
5214       if ((prediction_mode != NEARESTMV &&
5215            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5216           (prediction_mode == NEARESTMV &&
5217            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5218         continue;
5219     }
5220 
5221     txfm_info->skip_txfm = 0;
5222     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5223 
5224     // Select prediction reference frames.
5225     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5226     for (int i = 0; i < num_planes; i++) {
5227       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5228       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5229     }
5230 
5231     bool is_predictor_built = false;
5232 
5233     // Initialize RD stats
5234     RD_STATS rd_stats;
5235     RD_STATS rd_stats_y;
5236     RD_STATS rd_stats_uv;
5237     const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5238     int64_t skip_rd = INT64_MAX;
5239     const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5240         cm->seq_params->enable_masked_compound,
5241         cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5242         /*eval_motion_mode=*/0);
5243     if (txfm_rd_gate_level) {
5244       // Check if the mode is good enough based on skip RD
5245       int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5246       skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5247       int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5248                                       skip_rd, txfm_rd_gate_level, 0);
5249       if (!eval_txfm) continue;
5250     }
5251 
5252     // Build the prediction for this mode
5253     if (!is_predictor_built) {
5254       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5255                                     av1_num_planes(cm) - 1);
5256     }
5257     if (mbmi->motion_mode == OBMC_CAUSAL) {
5258       av1_build_obmc_inter_predictors_sb(cm, xd);
5259     }
5260 
5261     num_tx_cands++;
5262     if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5263     num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5264     int64_t this_yrd = INT64_MAX;
5265     // Do the transform search
5266     if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5267                          mode_rate, search_state->best_rd)) {
5268       continue;
5269     } else {
5270       const int y_rate =
5271           rd_stats.skip_txfm
5272               ? mode_costs->skip_txfm_cost[skip_ctx][1]
5273               : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5274       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5275 
5276       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5277         inter_mode_data_push(
5278             tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5279             rd_stats_y.rate + rd_stats_uv.rate +
5280                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5281       }
5282     }
5283     rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5284     if (rd_stats.rdcost < best_rd_in_this_partition) {
5285       best_rd_in_this_partition = rd_stats.rdcost;
5286       *yrd = this_yrd;
5287     }
5288 
5289     const THR_MODES mode_enum = get_prediction_mode_idx(
5290         prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5291 
5292     // Collect mode stats for multiwinner mode processing
5293     const int txfm_search_done = 1;
5294     store_winner_mode_stats(
5295         &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5296         NULL, bsize, rd_stats.rdcost,
5297         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5298 
5299     if (rd_stats.rdcost < search_state->best_rd) {
5300       update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5301                           &rd_stats_uv, mode_enum, x, txfm_search_done);
5302       search_state->best_skip_rd[0] = skip_rd;
5303       // Limit the total number of modes to be evaluated if the first is valid
5304       // and transform skip or compound
5305       if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5306         if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5307           // Evaluate more candidates at high quantizers where occurrence of
5308           // transform skip is high.
5309           const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5310           const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5311           num_inter_mode_cands =
5312               AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5313         } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5314           const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5315           // Evaluate more candidates at low quantizers where occurrence of
5316           // single reference mode is high.
5317           const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5318                                                 { 10, 7, 5, 3 } };
5319           const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5320           num_inter_mode_cands = AOMMIN(
5321               max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5322         }
5323       }
5324     }
5325     // If the number of candidates evaluated exceeds max_allowed_cands, break if
5326     // a newmv mode was evaluated already.
5327     if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5328   }
5329 }
5330 
5331 // Indicates number of winner simple translation modes to be used
5332 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5333 
5334 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5335 // speed feature. This list consists of modes that have only searched
5336 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5337 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5338 static void handle_winner_cand(
5339     MB_MODE_INFO *const mbmi,
5340     motion_mode_best_st_candidate *best_motion_mode_cands,
5341     int max_winner_motion_mode_cand, int64_t this_rd,
5342     motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5343   // Number of current motion mode candidates in list
5344   const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5345   int valid_motion_mode_cand_loc = num_motion_mode_cand;
5346 
5347   // find the best location to insert new motion mode candidate
5348   for (int j = 0; j < num_motion_mode_cand; j++) {
5349     if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5350       valid_motion_mode_cand_loc = j;
5351       break;
5352     }
5353   }
5354 
5355   // Insert motion mode if location is found
5356   if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5357     if (num_motion_mode_cand > 0 &&
5358         valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5359       memmove(
5360           &best_motion_mode_cands
5361                ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5362           &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5363           (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5364            valid_motion_mode_cand_loc) *
5365               sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5366     motion_mode_cand->mbmi = *mbmi;
5367     motion_mode_cand->rd_cost = this_rd;
5368     motion_mode_cand->skip_motion_mode = skip_motion_mode;
5369     best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5370         *motion_mode_cand;
5371     best_motion_mode_cands->num_motion_mode_cand =
5372         AOMMIN(max_winner_motion_mode_cand,
5373                best_motion_mode_cands->num_motion_mode_cand + 1);
5374   }
5375 }
5376 
5377 /*!\brief Search intra modes in interframes
5378  *
5379  * \ingroup intra_mode_search
5380  *
5381  * This function searches for the best intra mode when the current frame is an
5382  * interframe. This function however does *not* handle luma palette mode.
5383  * Palette mode is currently handled by \ref av1_search_palette_mode.
5384  *
5385  * This function will first iterate through the luma mode candidates to find the
5386  * best luma intra mode. Once the best luma mode it's found, it will then search
5387  * for the best chroma mode. Because palette mode is currently not handled by
5388  * here, a cache of uv mode is stored in
5389  * InterModeSearchState::intra_search_state so it can be reused later by \ref
5390  * av1_search_palette_mode.
5391  *
5392  * \param[in,out] search_state      Struct keep track of the prediction mode
5393  *                                  search state in interframe.
5394  *
5395  * \param[in]     cpi               Top-level encoder structure.
5396  * \param[in,out] x                 Pointer to struct holding all the data for
5397  *                                  the current prediction block.
5398  * \param[out]    rd_cost           Stores the best rd_cost among all the
5399  *                                  prediction modes searched.
5400  * \param[in]     bsize             Current block size.
5401  * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5402  *                                  copy the tx_type and txfm_skip arrays.
5403  *                                  for only the Y plane.
5404  * \param[in]     sf_args           Stores the list of intra mode candidates
5405  *                                  to be searched.
5406  * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5407  *                                      current ref frame is an intra frame.
5408  * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5409  *                                  terminate chroma intra mode search.
5410  *
5411  * \remark If a new best mode is found, search_state and rd_costs are updated
5412  * correspondingly. While x is also modified, it is only used as a temporary
5413  * buffer, and the final decisions are stored in search_state.
5414  */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5415 static inline void search_intra_modes_in_interframe(
5416     InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5417     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5418     const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5419     int64_t yrd_threshold) {
5420   const AV1_COMMON *const cm = &cpi->common;
5421   const SPEED_FEATURES *const sf = &cpi->sf;
5422   const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5423   MACROBLOCKD *const xd = &x->e_mbd;
5424   MB_MODE_INFO *const mbmi = xd->mi[0];
5425   IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5426 
5427   int is_best_y_mode_intra = 0;
5428   RD_STATS best_intra_rd_stats_y;
5429   int64_t best_rd_y = INT64_MAX;
5430   int best_mode_cost_y = -1;
5431   MB_MODE_INFO best_mbmi = *xd->mi[0];
5432   THR_MODES best_mode_enum = THR_INVALID;
5433   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5434   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5435   const int num_4x4 = bsize_to_num_blk(bsize);
5436 
5437   // Performs luma search
5438   int64_t best_model_rd = INT64_MAX;
5439   int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5440   for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5441     top_intra_model_rd[i] = INT64_MAX;
5442   }
5443   for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5444     if (sf->intra_sf.skip_intra_in_interframe &&
5445         search_state->intra_search_state.skip_intra_modes)
5446       break;
5447     set_y_mode_and_delta_angle(
5448         mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5449     assert(mbmi->mode < INTRA_MODE_END);
5450 
5451     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5452     if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5453       continue;
5454 
5455     const THR_MODES mode_enum =
5456         get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5457     if ((!intra_mode_cfg->enable_smooth_intra ||
5458          cpi->sf.intra_sf.disable_smooth_intra) &&
5459         (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5460          mbmi->mode == SMOOTH_V_PRED))
5461       continue;
5462     if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5463       continue;
5464     if (av1_is_directional_mode(mbmi->mode) &&
5465         !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5466         mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5467       continue;
5468     const PREDICTION_MODE this_mode = mbmi->mode;
5469 
5470     assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5471     assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5472     init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5473     x->txfm_search_info.skip_txfm = 0;
5474 
5475     if (this_mode != DC_PRED) {
5476       // Only search the oblique modes if the best so far is
5477       // one of the neighboring directional modes
5478       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5479           (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5480         if (search_state->best_mode_index != THR_INVALID &&
5481             search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5482           continue;
5483       }
5484       if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5485         if (conditional_skipintra(
5486                 this_mode, search_state->intra_search_state.best_intra_mode))
5487           continue;
5488       }
5489     }
5490 
5491     RD_STATS intra_rd_stats_y;
5492     int mode_cost_y;
5493     int64_t intra_rd_y = INT64_MAX;
5494     const int is_luma_result_valid = av1_handle_intra_y_mode(
5495         intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5496         &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5497         &best_model_rd, top_intra_model_rd);
5498     if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5499       is_best_y_mode_intra = 1;
5500       if (intra_rd_y < best_rd_y) {
5501         best_intra_rd_stats_y = intra_rd_stats_y;
5502         best_mode_cost_y = mode_cost_y;
5503         best_rd_y = intra_rd_y;
5504         best_mbmi = *mbmi;
5505         best_mode_enum = mode_enum;
5506         memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5507                sizeof(best_blk_skip[0]) * num_4x4);
5508         av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5509       }
5510     }
5511   }
5512 
5513   if (!is_best_y_mode_intra) {
5514     return;
5515   }
5516 
5517   assert(best_rd_y < INT64_MAX);
5518 
5519   // Restores the best luma mode
5520   *mbmi = best_mbmi;
5521   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5522          sizeof(best_blk_skip[0]) * num_4x4);
5523   av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5524 
5525   // Performs chroma search
5526   RD_STATS intra_rd_stats, intra_rd_stats_uv;
5527   av1_init_rd_stats(&intra_rd_stats);
5528   av1_init_rd_stats(&intra_rd_stats_uv);
5529   const int num_planes = av1_num_planes(cm);
5530   if (num_planes > 1) {
5531     const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5532         intra_search_state, cpi, x, bsize, &intra_rd_stats,
5533         &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5534 
5535     if (!intra_uv_mode_valid) {
5536       return;
5537     }
5538   }
5539 
5540   // Merge the luma and chroma rd stats
5541   assert(best_mode_cost_y >= 0);
5542   intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5543   if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5544     // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5545     // in the tokenonly rate, but for intra blocks, tx_size is always coded
5546     // (prediction granularity), so we account for it in the full rate,
5547     // not the tokenonly rate.
5548     best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5549   }
5550 
5551   const ModeCosts *mode_costs = &x->mode_costs;
5552   const PREDICTION_MODE mode = mbmi->mode;
5553   if (num_planes > 1 && xd->is_chroma_ref) {
5554     const int uv_mode_cost =
5555         mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5556     intra_rd_stats.rate +=
5557         intra_rd_stats_uv.rate +
5558         intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5559   }
5560 
5561   // Intra block is always coded as non-skip
5562   intra_rd_stats.skip_txfm = 0;
5563   intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5564   // Add in the cost of the no skip flag.
5565   const int skip_ctx = av1_get_skip_txfm_context(xd);
5566   intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5567   // Calculate the final RD estimate for this mode.
5568   const int64_t this_rd =
5569       RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5570   // Keep record of best intra rd
5571   if (this_rd < search_state->best_intra_rd) {
5572     search_state->best_intra_rd = this_rd;
5573     intra_search_state->best_intra_mode = mode;
5574   }
5575 
5576   for (int i = 0; i < REFERENCE_MODES; ++i) {
5577     search_state->best_pred_rd[i] =
5578         AOMMIN(search_state->best_pred_rd[i], this_rd);
5579   }
5580 
5581   intra_rd_stats.rdcost = this_rd;
5582 
5583   // Collect mode stats for multiwinner mode processing
5584   const int txfm_search_done = 1;
5585   store_winner_mode_stats(
5586       &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5587       &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5588       cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5589   if (intra_rd_stats.rdcost < search_state->best_rd) {
5590     update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5591                         &best_intra_rd_stats_y, &intra_rd_stats_uv,
5592                         best_mode_enum, x, txfm_search_done);
5593   }
5594 }
5595 
5596 #if !CONFIG_REALTIME_ONLY
5597 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5598 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5599 static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5600                                                 MACROBLOCK *x, BLOCK_SIZE bsize,
5601                                                 int mi_row, int mi_col,
5602                                                 int64_t *inter_cost,
5603                                                 int64_t *intra_cost) {
5604   const AV1_COMMON *const cm = &cpi->common;
5605   // Only consider full SB.
5606   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5607   const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5608   const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5609                   (block_size_high[sb_size] / tpl_bsize_1d);
5610   SuperBlockEnc *sb_enc = &x->sb_enc;
5611   if (sb_enc->tpl_data_count == len) {
5612     const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5613     const int tpl_stride = sb_enc->tpl_stride;
5614     const int tplw = mi_size_wide[tpl_bsize];
5615     const int tplh = mi_size_high[tpl_bsize];
5616     const int nw = mi_size_wide[bsize] / tplw;
5617     const int nh = mi_size_high[bsize] / tplh;
5618     if (nw >= 1 && nh >= 1) {
5619       const int of_h = mi_row % mi_size_high[sb_size];
5620       const int of_w = mi_col % mi_size_wide[sb_size];
5621       const int start = of_h / tplh * tpl_stride + of_w / tplw;
5622 
5623       for (int k = 0; k < nh; k++) {
5624         for (int l = 0; l < nw; l++) {
5625           *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5626           *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5627         }
5628       }
5629       *inter_cost /= nw * nh;
5630       *intra_cost /= nw * nh;
5631     }
5632   }
5633 }
5634 #endif  // !CONFIG_REALTIME_ONLY
5635 
5636 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5637 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5638 static inline void skip_intra_modes_in_interframe(
5639     AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5640     InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5641     int64_t inter_cost, int64_t intra_cost) {
5642   MACROBLOCKD *const xd = &x->e_mbd;
5643   const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5644   if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5645       bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5646     const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5647     const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5648     if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5649         x->source_variance > 128) {
5650       search_state->intra_search_state.skip_intra_modes = 1;
5651       return;
5652     }
5653   }
5654 
5655   const unsigned int src_var_thresh_intra_skip = 1;
5656   const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5657   if (!(skip_intra_in_interframe &&
5658         (x->source_variance > src_var_thresh_intra_skip)))
5659     return;
5660 
5661   // Prune intra search based on best inter mode being transfrom skip.
5662   if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5663     const int qindex_thresh[2] = { 200, MAXQ };
5664     const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5665     if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5666         (x->qindex <= qindex_thresh[ind])) {
5667       search_state->intra_search_state.skip_intra_modes = 1;
5668       return;
5669     } else if ((skip_intra_in_interframe >= 4) &&
5670                (inter_cost < 0 || intra_cost < 0)) {
5671       search_state->intra_search_state.skip_intra_modes = 1;
5672       return;
5673     }
5674   }
5675   // Use ML model to prune intra search.
5676   if (inter_cost >= 0 && intra_cost >= 0) {
5677     const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5678                                      ? &av1_intrap_nn_config
5679                                      : &av1_intrap_hd_nn_config;
5680     float nn_features[6];
5681     float scores[2] = { 0.0f };
5682 
5683     nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5684     nn_features[1] = (float)mi_size_wide_log2[bsize];
5685     nn_features[2] = (float)mi_size_high_log2[bsize];
5686     nn_features[3] = (float)intra_cost;
5687     nn_features[4] = (float)inter_cost;
5688     const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5689     const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5690     nn_features[5] = (float)(ac_q_max / ac_q);
5691 
5692     av1_nn_predict(nn_features, nn_config, 1, scores);
5693 
5694     // For two parameters, the max prob returned from av1_nn_softmax equals
5695     // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5696     // calling of av1_nn_softmax.
5697     const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5698     assert(skip_intra_in_interframe <= 5);
5699     if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5700       search_state->intra_search_state.skip_intra_modes = 1;
5701     }
5702   }
5703 }
5704 
skip_interp_filter_search(const AV1_COMP * cpi,int is_single_pred)5705 static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
5706                                              int is_single_pred) {
5707   const MODE encoding_mode = cpi->oxcf.mode;
5708   if (encoding_mode == REALTIME) {
5709     return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
5710             (cpi->sf.interp_sf.skip_interp_filter_search ||
5711              cpi->sf.winner_mode_sf.winner_mode_ifs));
5712   } else if (encoding_mode == GOOD) {
5713     // Skip interpolation filter search for single prediction modes.
5714     return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
5715   }
5716   return false;
5717 }
5718 
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5719 static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
5720                                      BLOCK_SIZE bsize) {
5721   const AV1_COMMON *const cm = &cpi->common;
5722   const SPEED_FEATURES *const sf = &cpi->sf;
5723 
5724   if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5725       !sf->rt_sf.short_circuit_low_temp_var ||
5726       !sf->rt_sf.prune_inter_modes_using_temp_var) {
5727     return 0;
5728   }
5729 
5730   const int mi_row = x->e_mbd.mi_row;
5731   const int mi_col = x->e_mbd.mi_col;
5732   int is_low_temp_var = 0;
5733 
5734   if (cm->seq_params->sb_size == BLOCK_64X64)
5735     is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5736         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5737   else
5738     is_low_temp_var = av1_get_force_skip_low_temp_var(
5739         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5740 
5741   return is_low_temp_var;
5742 }
5743 
5744 // TODO([email protected]): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5745 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5746                             struct macroblock *x, struct RD_STATS *rd_cost,
5747                             BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5748                             int64_t best_rd_so_far) {
5749   AV1_COMMON *const cm = &cpi->common;
5750   const FeatureFlags *const features = &cm->features;
5751   const int num_planes = av1_num_planes(cm);
5752   const SPEED_FEATURES *const sf = &cpi->sf;
5753   MACROBLOCKD *const xd = &x->e_mbd;
5754   MB_MODE_INFO *const mbmi = xd->mi[0];
5755   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5756   int i;
5757   const ModeCosts *mode_costs = &x->mode_costs;
5758   const int *comp_inter_cost =
5759       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5760 
5761   InterModeSearchState search_state;
5762   init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5763   INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5764     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5765     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5766   };
5767   HandleInterModeArgs args = { { NULL },
5768                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5769                                { NULL },
5770                                { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5771                                  MAX_SB_SIZE >> 1 },
5772                                NULL,
5773                                NULL,
5774                                NULL,
5775                                search_state.modelled_rd,
5776                                INT_MAX,
5777                                INT_MAX,
5778                                search_state.simple_rd,
5779                                0,
5780                                false,
5781                                interintra_modes,
5782                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5783                                { { 0, 0 } },
5784                                { 0 },
5785                                0,
5786                                0,
5787                                -1,
5788                                -1,
5789                                -1,
5790                                { 0 },
5791                                { 0 },
5792                                UINT_MAX };
5793   // Currently, is_low_temp_var is used in real time encoding.
5794   const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5795 
5796   for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5797   // Indicates the appropriate number of simple translation winner modes for
5798   // exhaustive motion mode evaluation
5799   const int max_winner_motion_mode_cand =
5800       num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5801   assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5802   motion_mode_candidate motion_mode_cand;
5803   motion_mode_best_st_candidate best_motion_mode_cands;
5804   // Initializing the number of motion mode candidates to zero.
5805   best_motion_mode_cands.num_motion_mode_cand = 0;
5806   for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5807     best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5808 
5809   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5810 
5811   av1_invalid_rd_stats(rd_cost);
5812 
5813   for (i = 0; i < REF_FRAMES; ++i) {
5814     x->warp_sample_info[i].num = -1;
5815   }
5816 
5817   // Ref frames that are selected by square partition blocks.
5818   int picked_ref_frames_mask = 0;
5819   if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5820       mbmi->partition != PARTITION_NONE) {
5821     // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5822     // partition blocks. prune_ref_frame_for_rect_partitions >=2
5823     // implies prune for vert, horiz and extended partition blocks.
5824     if ((mbmi->partition != PARTITION_VERT &&
5825          mbmi->partition != PARTITION_HORZ) ||
5826         sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5827       picked_ref_frames_mask =
5828           fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5829     }
5830   }
5831 
5832 #if CONFIG_COLLECT_COMPONENT_TIMING
5833   start_timing(cpi, set_params_rd_pick_inter_mode_time);
5834 #endif
5835   // Skip ref frames that never selected by square blocks.
5836   const int skip_ref_frame_mask =
5837       picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5838   mode_skip_mask_t mode_skip_mask;
5839   unsigned int ref_costs_single[REF_FRAMES];
5840   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5841   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5842   // init params, set frame modes, speed features
5843   set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5844                                 skip_ref_frame_mask, ref_costs_single,
5845                                 ref_costs_comp, yv12_mb);
5846 #if CONFIG_COLLECT_COMPONENT_TIMING
5847   end_timing(cpi, set_params_rd_pick_inter_mode_time);
5848 #endif
5849 
5850   int64_t best_est_rd = INT64_MAX;
5851   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5852   // If do_tx_search is 0, only estimated RD should be computed.
5853   // If do_tx_search is 1, all modes have TX search performed.
5854   const int do_tx_search =
5855       !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5856         (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5857          num_pels_log2_lookup[bsize] > 8));
5858   InterModesInfo *inter_modes_info = x->inter_modes_info;
5859   inter_modes_info->num = 0;
5860 
5861   // Temporary buffers used by handle_inter_mode().
5862   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5863 
5864   // The best RD found for the reference frame, among single reference modes.
5865   // Note that the 0-th element will contain a cut-off that is later used
5866   // to determine if we should skip a compound mode.
5867   int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5868                                        INT64_MAX, INT64_MAX, INT64_MAX,
5869                                        INT64_MAX, INT64_MAX };
5870 
5871   // Prepared stats used later to check if we could skip intra mode eval.
5872   int64_t inter_cost = -1;
5873   int64_t intra_cost = -1;
5874   // Need to tweak the threshold for hdres speed 0 & 1.
5875   const int mi_row = xd->mi_row;
5876   const int mi_col = xd->mi_col;
5877 
5878   // Obtain the relevant tpl stats for pruning inter modes
5879   PruneInfoFromTpl inter_cost_info_from_tpl;
5880 #if !CONFIG_REALTIME_ONLY
5881   if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5882     // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5883     // prune_ref_by_selective_ref_frame()
5884     // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5885     // prune_ref_by_selective_ref_frame()
5886     // Populating valid_refs[idx] = 1 ensures that
5887     // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5888     // pruned ref frame.
5889     int valid_refs[INTER_REFS_PER_FRAME];
5890     for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5891       const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5892       valid_refs[frame - 1] =
5893           x->tpl_keep_ref_frame[frame] ||
5894           !prune_ref_by_selective_ref_frame(
5895               cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5896     }
5897     av1_zero(inter_cost_info_from_tpl);
5898     get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5899                               &inter_cost_info_from_tpl);
5900   }
5901 
5902   const int do_pruning =
5903       (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5904   if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5905       cpi->oxcf.algo_cfg.enable_tpl_model)
5906     calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5907                                  &intra_cost);
5908 #endif  // !CONFIG_REALTIME_ONLY
5909 
5910   // Initialize best mode stats for winner mode processing.
5911   const int max_winner_mode_count =
5912       winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5913   zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5914   x->winner_mode_count = 0;
5915   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5916                           NULL, bsize, best_rd_so_far,
5917                           sf->winner_mode_sf.multi_winner_mode_type, 0);
5918 
5919   int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5920   if (sf->inter_sf.prune_inter_modes_if_skippable) {
5921     // Higher multiplication factor values for lower quantizers.
5922     mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5923   }
5924 
5925   // Initialize arguments for mode loop speed features
5926   InterModeSFArgs sf_args = { &args.skip_motion_mode,
5927                               &mode_skip_mask,
5928                               &search_state,
5929                               skip_ref_frame_mask,
5930                               0,
5931                               mode_thresh_mul_fact,
5932                               0,
5933                               0 };
5934   int64_t best_inter_yrd = INT64_MAX;
5935 
5936   // This is the main loop of this function. It loops over all possible inter
5937   // modes and calls handle_inter_mode() to compute the RD for each.
5938   // Here midx is just an iterator index that should not be used by itself
5939   // except to keep track of the number of modes searched. It should be used
5940   // with av1_default_mode_order to get the enum that defines the mode, which
5941   // can be used with av1_mode_defs to get the prediction mode and the ref
5942   // frames.
5943   // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5944   // good speedup for real time case. If we decide to use compound mode in real
5945   // time, maybe we can modify av1_default_mode_order table.
5946   THR_MODES mode_start = THR_INTER_MODE_START;
5947   THR_MODES mode_end = THR_INTER_MODE_END;
5948   const CurrentFrame *const current_frame = &cm->current_frame;
5949   if (current_frame->reference_mode == SINGLE_REFERENCE) {
5950     mode_start = SINGLE_REF_MODE_START;
5951     mode_end = SINGLE_REF_MODE_END;
5952   }
5953 
5954   for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5955     // Get the actual prediction mode we are trying in this iteration
5956     const THR_MODES mode_enum = av1_default_mode_order[midx];
5957     const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5958     const PREDICTION_MODE this_mode = mode_def->mode;
5959     const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5960 
5961     const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5962     const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5963     const int is_single_pred =
5964         ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5965     const int comp_pred = second_ref_frame > INTRA_FRAME;
5966 
5967     init_mbmi(mbmi, this_mode, ref_frames, cm);
5968 
5969     txfm_info->skip_txfm = 0;
5970     sf_args.num_single_modes_processed += is_single_pred;
5971     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5972 #if CONFIG_COLLECT_COMPONENT_TIMING
5973     start_timing(cpi, skip_inter_mode_time);
5974 #endif
5975     // Apply speed features to decide if this inter mode can be skipped
5976     const int is_skip_inter_mode = skip_inter_mode(
5977         cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5978 #if CONFIG_COLLECT_COMPONENT_TIMING
5979     end_timing(cpi, skip_inter_mode_time);
5980 #endif
5981     if (is_skip_inter_mode) continue;
5982 
5983     // Select prediction reference frames.
5984     for (i = 0; i < num_planes; i++) {
5985       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5986       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5987     }
5988 
5989     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5990     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5991     mbmi->filter_intra_mode_info.use_filter_intra = 0;
5992     mbmi->ref_mv_idx = 0;
5993 
5994     const int64_t ref_best_rd = search_state.best_rd;
5995     RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5996     av1_init_rd_stats(&rd_stats);
5997 
5998     const int ref_frame_cost = comp_pred
5999                                    ? ref_costs_comp[ref_frame][second_ref_frame]
6000                                    : ref_costs_single[ref_frame];
6001     const int compmode_cost =
6002         is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6003     const int real_compmode_cost =
6004         cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6005             ? compmode_cost
6006             : 0;
6007     // Point to variables that are maintained between loop iterations
6008     args.single_newmv = search_state.single_newmv;
6009     args.single_newmv_rate = search_state.single_newmv_rate;
6010     args.single_newmv_valid = search_state.single_newmv_valid;
6011     args.single_comp_cost = real_compmode_cost;
6012     args.ref_frame_cost = ref_frame_cost;
6013     args.best_pred_sse = search_state.best_pred_sse;
6014     args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6015 
6016     int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6017                            search_state.best_skip_rd[1] };
6018     int64_t this_yrd = INT64_MAX;
6019 #if CONFIG_COLLECT_COMPONENT_TIMING
6020     start_timing(cpi, handle_inter_mode_time);
6021 #endif
6022     int64_t this_rd = handle_inter_mode(
6023         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6024         ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6025         inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6026         &this_yrd);
6027 #if CONFIG_COLLECT_COMPONENT_TIMING
6028     end_timing(cpi, handle_inter_mode_time);
6029 #endif
6030     if (current_frame->reference_mode != SINGLE_REFERENCE) {
6031       if (!args.skip_ifs &&
6032           sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6033           is_inter_singleref_mode(this_mode)) {
6034         collect_single_states(x, &search_state, mbmi);
6035       }
6036 
6037       if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6038           is_inter_singleref_mode(this_mode))
6039         update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6040     }
6041 
6042     if (this_rd == INT64_MAX) continue;
6043 
6044     if (mbmi->skip_txfm) {
6045       rd_stats_y.rate = 0;
6046       rd_stats_uv.rate = 0;
6047     }
6048 
6049     if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6050         this_rd < ref_frame_rd[ref_frame]) {
6051       ref_frame_rd[ref_frame] = this_rd;
6052     }
6053 
6054     // Did this mode help, i.e., is it the new best mode
6055     if (this_rd < search_state.best_rd) {
6056       assert(IMPLIES(comp_pred,
6057                      cm->current_frame.reference_mode != SINGLE_REFERENCE));
6058       search_state.best_pred_sse = x->pred_sse[ref_frame];
6059       best_inter_yrd = this_yrd;
6060       update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6061                           &rd_stats_uv, mode_enum, x, do_tx_search);
6062       if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6063       // skip_rd[0] is the best total rd for a skip mode so far.
6064       // skip_rd[1] is the best total rd for a skip mode so far in luma.
6065       // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6066       // When do_tx_search = 0, skip_rd[1] is updated.
6067       search_state.best_skip_rd[1] = skip_rd[1];
6068     }
6069     if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6070       // Add this mode to motion mode candidate list for motion mode search
6071       // if using motion_mode_for_winner_cand speed feature
6072       handle_winner_cand(mbmi, &best_motion_mode_cands,
6073                          max_winner_motion_mode_cand, this_rd,
6074                          &motion_mode_cand, args.skip_motion_mode);
6075     }
6076 
6077     /* keep record of best compound/single-only prediction */
6078     record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6079                          x->rdmult, &search_state, compmode_cost);
6080   }
6081 
6082 #if CONFIG_COLLECT_COMPONENT_TIMING
6083   start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6084 #endif
6085   if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6086     // For the single ref winner candidates, evaluate other motion modes (non
6087     // simple translation).
6088     evaluate_motion_mode_for_winner_candidates(
6089         cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6090         &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6091         &search_state, &best_inter_yrd);
6092   }
6093 #if CONFIG_COLLECT_COMPONENT_TIMING
6094   end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6095 #endif
6096 
6097 #if CONFIG_COLLECT_COMPONENT_TIMING
6098   start_timing(cpi, do_tx_search_time);
6099 #endif
6100   if (do_tx_search != 1) {
6101     // A full tx search has not yet been done, do tx search for
6102     // top mode candidates
6103     tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6104                                     yv12_mb, mi_row, mi_col, &search_state,
6105                                     rd_cost, ctx, &best_inter_yrd);
6106   }
6107 #if CONFIG_COLLECT_COMPONENT_TIMING
6108   end_timing(cpi, do_tx_search_time);
6109 #endif
6110 
6111 #if CONFIG_COLLECT_COMPONENT_TIMING
6112   start_timing(cpi, handle_intra_mode_time);
6113 #endif
6114   // Gate intra mode evaluation if best of inter is skip except when source
6115   // variance is extremely low and also based on max intra bsize.
6116   skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6117                                  intra_cost);
6118 
6119   const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6120   search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6121                                    &sf_args, intra_ref_frame_cost,
6122                                    best_inter_yrd);
6123 #if CONFIG_COLLECT_COMPONENT_TIMING
6124   end_timing(cpi, handle_intra_mode_time);
6125 #endif
6126 
6127 #if CONFIG_COLLECT_COMPONENT_TIMING
6128   start_timing(cpi, refine_winner_mode_tx_time);
6129 #endif
6130   int winner_mode_count =
6131       sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6132   // In effect only when fast tx search speed features are enabled.
6133   refine_winner_mode_tx(
6134       cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6135       &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6136       search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6137 #if CONFIG_COLLECT_COMPONENT_TIMING
6138   end_timing(cpi, refine_winner_mode_tx_time);
6139 #endif
6140 
6141   // Initialize default mode evaluation params
6142   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6143 
6144   // Only try palette mode when the best mode so far is an intra mode.
6145   const int try_palette =
6146       cpi->oxcf.tool_cfg.enable_palette &&
6147       av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6148       !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6149   RD_STATS this_rd_cost;
6150   int this_skippable = 0;
6151   if (try_palette) {
6152 #if CONFIG_COLLECT_COMPONENT_TIMING
6153     start_timing(cpi, av1_search_palette_mode_time);
6154 #endif
6155     this_skippable = av1_search_palette_mode(
6156         &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6157         ctx, &this_rd_cost, search_state.best_rd);
6158 #if CONFIG_COLLECT_COMPONENT_TIMING
6159     end_timing(cpi, av1_search_palette_mode_time);
6160 #endif
6161     if (this_rd_cost.rdcost < search_state.best_rd) {
6162       search_state.best_mode_index = THR_DC;
6163       mbmi->mv[0].as_int = 0;
6164       rd_cost->rate = this_rd_cost.rate;
6165       rd_cost->dist = this_rd_cost.dist;
6166       rd_cost->rdcost = this_rd_cost.rdcost;
6167       search_state.best_rd = rd_cost->rdcost;
6168       search_state.best_mbmode = *mbmi;
6169       search_state.best_skip2 = 0;
6170       search_state.best_mode_skippable = this_skippable;
6171       memcpy(ctx->blk_skip, txfm_info->blk_skip,
6172              sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6173       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6174     }
6175   }
6176 
6177   search_state.best_mbmode.skip_mode = 0;
6178   if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6179       is_comp_ref_allowed(bsize)) {
6180     const struct segmentation *const seg = &cm->seg;
6181     unsigned char segment_id = mbmi->segment_id;
6182     if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6183       rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6184     }
6185   }
6186 
6187   // Make sure that the ref_mv_idx is only nonzero when we're
6188   // using a mode which can support ref_mv_idx
6189   if (search_state.best_mbmode.ref_mv_idx != 0 &&
6190       !(search_state.best_mbmode.mode == NEWMV ||
6191         search_state.best_mbmode.mode == NEW_NEWMV ||
6192         have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6193     search_state.best_mbmode.ref_mv_idx = 0;
6194   }
6195 
6196   if (search_state.best_mode_index == THR_INVALID ||
6197       search_state.best_rd >= best_rd_so_far) {
6198     rd_cost->rate = INT_MAX;
6199     rd_cost->rdcost = INT64_MAX;
6200     return;
6201   }
6202 
6203   const InterpFilter interp_filter = features->interp_filter;
6204   assert((interp_filter == SWITCHABLE) ||
6205          (interp_filter ==
6206           search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6207          !is_inter_block(&search_state.best_mbmode));
6208   assert((interp_filter == SWITCHABLE) ||
6209          (interp_filter ==
6210           search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6211          !is_inter_block(&search_state.best_mbmode));
6212 
6213   if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6214     av1_update_rd_thresh_fact(
6215         cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6216         search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6217   }
6218 
6219   // macroblock modes
6220   *mbmi = search_state.best_mbmode;
6221   txfm_info->skip_txfm |= search_state.best_skip2;
6222 
6223   // Note: this section is needed since the mode may have been forced to
6224   // GLOBALMV by the all-zero mode handling of ref-mv.
6225   if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6226     // Correct the interp filters for GLOBALMV
6227     if (is_nontrans_global_motion(xd, xd->mi[0])) {
6228       int_interpfilters filters =
6229           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6230       assert(mbmi->interp_filters.as_int == filters.as_int);
6231       (void)filters;
6232     }
6233   }
6234 
6235   txfm_info->skip_txfm |= search_state.best_mode_skippable;
6236 
6237   assert(search_state.best_mode_index != THR_INVALID);
6238 
6239 #if CONFIG_INTERNAL_STATS
6240   store_coding_context(x, ctx, search_state.best_mode_index,
6241                        search_state.best_mode_skippable);
6242 #else
6243   store_coding_context(x, ctx, search_state.best_mode_skippable);
6244 #endif  // CONFIG_INTERNAL_STATS
6245 
6246   if (mbmi->palette_mode_info.palette_size[1] > 0) {
6247     assert(try_palette);
6248     av1_restore_uv_color_map(cpi, x);
6249   }
6250 }
6251 
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6252 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6253                                         TileDataEnc *tile_data, MACROBLOCK *x,
6254                                         int mi_row, int mi_col,
6255                                         RD_STATS *rd_cost, BLOCK_SIZE bsize,
6256                                         PICK_MODE_CONTEXT *ctx,
6257                                         int64_t best_rd_so_far) {
6258   const AV1_COMMON *const cm = &cpi->common;
6259   const FeatureFlags *const features = &cm->features;
6260   MACROBLOCKD *const xd = &x->e_mbd;
6261   MB_MODE_INFO *const mbmi = xd->mi[0];
6262   unsigned char segment_id = mbmi->segment_id;
6263   const int comp_pred = 0;
6264   int i;
6265   unsigned int ref_costs_single[REF_FRAMES];
6266   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6267   const ModeCosts *mode_costs = &x->mode_costs;
6268   const int *comp_inter_cost =
6269       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6270   InterpFilter best_filter = SWITCHABLE;
6271   int64_t this_rd = INT64_MAX;
6272   int rate2 = 0;
6273   const int64_t distortion2 = 0;
6274   (void)mi_row;
6275   (void)mi_col;
6276   (void)tile_data;
6277 
6278   av1_collect_neighbors_ref_counts(xd);
6279 
6280   estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6281                            ref_costs_comp);
6282 
6283   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6284   for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6285 
6286   rd_cost->rate = INT_MAX;
6287 
6288   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6289 
6290   mbmi->palette_mode_info.palette_size[0] = 0;
6291   mbmi->palette_mode_info.palette_size[1] = 0;
6292   mbmi->filter_intra_mode_info.use_filter_intra = 0;
6293   mbmi->mode = GLOBALMV;
6294   mbmi->motion_mode = SIMPLE_TRANSLATION;
6295   mbmi->uv_mode = UV_DC_PRED;
6296   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6297     mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6298   else
6299     mbmi->ref_frame[0] = LAST_FRAME;
6300   mbmi->ref_frame[1] = NONE_FRAME;
6301   mbmi->mv[0].as_int =
6302       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6303                            features->allow_high_precision_mv, bsize, mi_col,
6304                            mi_row, features->cur_frame_force_integer_mv)
6305           .as_int;
6306   mbmi->tx_size = max_txsize_lookup[bsize];
6307   x->txfm_search_info.skip_txfm = 1;
6308 
6309   mbmi->ref_mv_idx = 0;
6310 
6311   mbmi->motion_mode = SIMPLE_TRANSLATION;
6312   av1_count_overlappable_neighbors(cm, xd);
6313   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6314     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6315     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6316     // Select the samples according to motion vector difference
6317     if (mbmi->num_proj_ref > 1) {
6318       mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6319                                              mbmi->num_proj_ref, bsize);
6320     }
6321   }
6322 
6323   const InterpFilter interp_filter = features->interp_filter;
6324   set_default_interp_filters(mbmi, interp_filter);
6325 
6326   if (interp_filter != SWITCHABLE) {
6327     best_filter = interp_filter;
6328   } else {
6329     best_filter = EIGHTTAP_REGULAR;
6330     if (av1_is_interp_needed(xd)) {
6331       int rs;
6332       int best_rs = INT_MAX;
6333       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6334         mbmi->interp_filters = av1_broadcast_interp_filter(i);
6335         rs = av1_get_switchable_rate(x, xd, interp_filter,
6336                                      cm->seq_params->enable_dual_filter);
6337         if (rs < best_rs) {
6338           best_rs = rs;
6339           best_filter = mbmi->interp_filters.as_filters.y_filter;
6340         }
6341       }
6342     }
6343   }
6344   // Set the appropriate filter
6345   mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6346   rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6347                                    cm->seq_params->enable_dual_filter);
6348 
6349   if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6350     rate2 += comp_inter_cost[comp_pred];
6351 
6352   // Estimate the reference frame signaling cost and add it
6353   // to the rolling cost variable.
6354   rate2 += ref_costs_single[LAST_FRAME];
6355   this_rd = RDCOST(x->rdmult, rate2, distortion2);
6356 
6357   rd_cost->rate = rate2;
6358   rd_cost->dist = distortion2;
6359   rd_cost->rdcost = this_rd;
6360 
6361   if (this_rd >= best_rd_so_far) {
6362     rd_cost->rate = INT_MAX;
6363     rd_cost->rdcost = INT64_MAX;
6364     return;
6365   }
6366 
6367   assert((interp_filter == SWITCHABLE) ||
6368          (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6369 
6370   if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6371     av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6372                               cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6373                               THR_GLOBALMV, THR_INTER_MODE_START,
6374                               THR_INTER_MODE_END, THR_DC, MAX_MODES);
6375   }
6376 
6377 #if CONFIG_INTERNAL_STATS
6378   store_coding_context(x, ctx, THR_GLOBALMV, 0);
6379 #else
6380   store_coding_context(x, ctx, 0);
6381 #endif  // CONFIG_INTERNAL_STATS
6382 }
6383 
6384 /*!\cond */
6385 struct calc_target_weighted_pred_ctxt {
6386   const OBMCBuffer *obmc_buffer;
6387   const uint8_t *tmp;
6388   int tmp_stride;
6389   int overlap;
6390 };
6391 /*!\endcond */
6392 
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6393 static inline void calc_target_weighted_pred_above(
6394     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6395     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6396   (void)nb_mi;
6397   (void)num_planes;
6398   (void)rel_mi_row;
6399   (void)dir;
6400 
6401   struct calc_target_weighted_pred_ctxt *ctxt =
6402       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6403 
6404   const int bw = xd->width << MI_SIZE_LOG2;
6405   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6406 
6407   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6408   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6409   const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6410   const int is_hbd = is_cur_buf_hbd(xd);
6411 
6412   if (!is_hbd) {
6413     for (int row = 0; row < ctxt->overlap; ++row) {
6414       const uint8_t m0 = mask1d[row];
6415       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6416       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6417         wsrc[col] = m1 * tmp[col];
6418         mask[col] = m0;
6419       }
6420       wsrc += bw;
6421       mask += bw;
6422       tmp += ctxt->tmp_stride;
6423     }
6424   } else {
6425     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6426 
6427     for (int row = 0; row < ctxt->overlap; ++row) {
6428       const uint8_t m0 = mask1d[row];
6429       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6430       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6431         wsrc[col] = m1 * tmp16[col];
6432         mask[col] = m0;
6433       }
6434       wsrc += bw;
6435       mask += bw;
6436       tmp16 += ctxt->tmp_stride;
6437     }
6438   }
6439 }
6440 
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6441 static inline void calc_target_weighted_pred_left(
6442     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6443     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6444   (void)nb_mi;
6445   (void)num_planes;
6446   (void)rel_mi_col;
6447   (void)dir;
6448 
6449   struct calc_target_weighted_pred_ctxt *ctxt =
6450       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6451 
6452   const int bw = xd->width << MI_SIZE_LOG2;
6453   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6454 
6455   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6456   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6457   const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6458   const int is_hbd = is_cur_buf_hbd(xd);
6459 
6460   if (!is_hbd) {
6461     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6462       for (int col = 0; col < ctxt->overlap; ++col) {
6463         const uint8_t m0 = mask1d[col];
6464         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6465         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6466                     (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6467         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6468       }
6469       wsrc += bw;
6470       mask += bw;
6471       tmp += ctxt->tmp_stride;
6472     }
6473   } else {
6474     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6475 
6476     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6477       for (int col = 0; col < ctxt->overlap; ++col) {
6478         const uint8_t m0 = mask1d[col];
6479         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6480         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6481                     (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6482         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6483       }
6484       wsrc += bw;
6485       mask += bw;
6486       tmp16 += ctxt->tmp_stride;
6487     }
6488   }
6489 }
6490 
6491 // This function has a structure similar to av1_build_obmc_inter_prediction
6492 //
6493 // The OBMC predictor is computed as:
6494 //
6495 //  PObmc(x,y) =
6496 //    AOM_BLEND_A64(Mh(x),
6497 //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6498 //                  PLeft(x, y))
6499 //
6500 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6501 // rounding, this can be written as:
6502 //
6503 //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6504 //    Mh(x) * Mv(y) * P(x,y) +
6505 //      Mh(x) * Cv(y) * Pabove(x,y) +
6506 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6507 //
6508 // Where :
6509 //
6510 //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6511 //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6512 //
6513 // This function computes 'wsrc' and 'mask' as:
6514 //
6515 //  wsrc(x, y) =
6516 //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6517 //      Mh(x) * Cv(y) * Pabove(x,y) +
6518 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6519 //
6520 //  mask(x, y) = Mh(x) * Mv(y)
6521 //
6522 // These can then be used to efficiently approximate the error for any
6523 // predictor P in the context of the provided neighbouring predictors by
6524 // computing:
6525 //
6526 //  error(x, y) =
6527 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6528 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6529 static inline void calc_target_weighted_pred(
6530     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6531     const uint8_t *above, int above_stride, const uint8_t *left,
6532     int left_stride) {
6533   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6534   const int bw = xd->width << MI_SIZE_LOG2;
6535   const int bh = xd->height << MI_SIZE_LOG2;
6536   const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6537   int32_t *mask_buf = obmc_buffer->mask;
6538   int32_t *wsrc_buf = obmc_buffer->wsrc;
6539 
6540   const int is_hbd = is_cur_buf_hbd(xd);
6541   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6542 
6543   // plane 0 should not be sub-sampled
6544   assert(xd->plane[0].subsampling_x == 0);
6545   assert(xd->plane[0].subsampling_y == 0);
6546 
6547   av1_zero_array(wsrc_buf, bw * bh);
6548   for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6549 
6550   // handle above row
6551   if (xd->up_available) {
6552     const int overlap =
6553         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6554     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6555                                                    above_stride, overlap };
6556     foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6557                                   max_neighbor_obmc[mi_size_wide_log2[bsize]],
6558                                   calc_target_weighted_pred_above, &ctxt);
6559   }
6560 
6561   for (int i = 0; i < bw * bh; ++i) {
6562     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6563     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6564   }
6565 
6566   // handle left column
6567   if (xd->left_available) {
6568     const int overlap =
6569         AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6570     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6571                                                    left_stride, overlap };
6572     foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6573                                  max_neighbor_obmc[mi_size_high_log2[bsize]],
6574                                  calc_target_weighted_pred_left, &ctxt);
6575   }
6576 
6577   if (!is_hbd) {
6578     const uint8_t *src = x->plane[0].src.buf;
6579 
6580     for (int row = 0; row < bh; ++row) {
6581       for (int col = 0; col < bw; ++col) {
6582         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6583       }
6584       wsrc_buf += bw;
6585       src += x->plane[0].src.stride;
6586     }
6587   } else {
6588     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6589 
6590     for (int row = 0; row < bh; ++row) {
6591       for (int col = 0; col < bw; ++col) {
6592         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6593       }
6594       wsrc_buf += bw;
6595       src += x->plane[0].src.stride;
6596     }
6597   }
6598 }
6599