xref: /aosp_15_r20/external/libvpx/vp9/encoder/vp9_tpl_model.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2023 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 
13 #include "./vpx_dsp_rtcd.h"
14 #if CONFIG_NON_GREEDY_MV
15 #include "vp9/common/vp9_mvref_common.h"
16 #endif
17 #include "vp9/common/vp9_reconinter.h"
18 #include "vp9/common/vp9_reconintra.h"
19 #include "vp9/common/vp9_scan.h"
20 #include "vp9/encoder/vp9_encoder.h"
21 #include "vp9/encoder/vp9_ext_ratectrl.h"
22 #include "vp9/encoder/vp9_firstpass.h"
23 #include "vp9/encoder/vp9_ratectrl.h"
24 #include "vp9/encoder/vp9_tpl_model.h"
25 #include "vpx/internal/vpx_codec_internal.h"
26 #include "vpx/vpx_codec.h"
27 #include "vpx/vpx_ext_ratectrl.h"
28 
init_gop_frames_rc(VP9_COMP * cpi,GF_PICTURE * gf_picture,const GF_GROUP * gf_group,int * tpl_group_frames)29 static int init_gop_frames_rc(VP9_COMP *cpi, GF_PICTURE *gf_picture,
30                               const GF_GROUP *gf_group, int *tpl_group_frames) {
31   VP9_COMMON *cm = &cpi->common;
32   int frame_idx = 0;
33   int i;
34   int extend_frame_count = 0;
35   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
36   int frame_gop_offset = 0;
37 
38   int added_overlay = 0;
39 
40   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
41   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
42 
43   memset(recon_frame_index, -1, sizeof(recon_frame_index));
44 
45   for (i = 0; i < FRAME_BUFFERS; ++i) {
46     if (frame_bufs[i].ref_count == 0) {
47       alloc_frame_mvs(cm, i);
48       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
49                                    cm->subsampling_x, cm->subsampling_y,
50 #if CONFIG_VP9_HIGHBITDEPTH
51                                    cm->use_highbitdepth,
52 #endif
53                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
54                                    NULL, NULL, NULL))
55         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
56                            "Failed to allocate frame buffer");
57 
58       recon_frame_index[frame_idx] = i;
59       ++frame_idx;
60 
61       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
62     }
63   }
64 
65   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
66     assert(recon_frame_index[i] >= 0);
67     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
68   }
69 
70   *tpl_group_frames = 0;
71 
72   int ref_table[3];
73 
74   if (gf_group->index == 1 && gf_group->update_type[1] == ARF_UPDATE) {
75     if (gf_group->update_type[0] == KF_UPDATE) {
76       // This is the only frame in ref buffer. We need it to be on
77       // gf_picture[0].
78       for (i = 0; i < 3; ++i) ref_table[i] = -REFS_PER_FRAME;
79 
80       gf_picture[0].frame =
81           &cm->buffer_pool->frame_bufs[gf_group->update_ref_idx[0]].buf;
82       ref_table[gf_group->update_ref_idx[0]] = 0;
83 
84       for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -REFS_PER_FRAME;
85       gf_picture[0].update_type = gf_group->update_type[0];
86     } else {
87       for (i = 0; i < REFS_PER_FRAME; i++) {
88         if (cm->ref_frame_map[i] != -1) {
89           gf_picture[-i].frame =
90               &cm->buffer_pool->frame_bufs[cm->ref_frame_map[i]].buf;
91           ref_table[i] = -i;
92         } else {
93           ref_table[i] = -REFS_PER_FRAME;
94         }
95       }
96       for (i = 0; i < 3; ++i) {
97         gf_picture[0].ref_frame[i] = ref_table[i];
98       }
99     }
100     ++*tpl_group_frames;
101 
102     // Initialize base layer ARF frame
103     gf_picture[1].frame = cpi->Source;
104     for (i = 0; i < 3; ++i) gf_picture[1].ref_frame[i] = ref_table[i];
105     gf_picture[1].update_type = gf_group->update_type[1];
106     ref_table[gf_group->update_ref_idx[1]] = 1;
107 
108     ++*tpl_group_frames;
109   } else {
110     assert(gf_group->index == 0);
111     if (gf_group->update_type[0] == KF_UPDATE) {
112       // This is the only frame in ref buffer. We need it to be on
113       // gf_picture[0].
114       gf_picture[0].frame = cpi->Source;
115       for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -REFS_PER_FRAME;
116       gf_picture[0].update_type = gf_group->update_type[0];
117 
118       for (i = 0; i < 3; ++i) ref_table[i] = -REFS_PER_FRAME;
119       ref_table[gf_group->update_ref_idx[0]] = 0;
120     } else {
121       // Initialize ref table
122       for (i = 0; i < REFS_PER_FRAME; i++) {
123         if (cm->ref_frame_map[i] != -1) {
124           gf_picture[-i].frame =
125               &cm->buffer_pool->frame_bufs[cm->ref_frame_map[i]].buf;
126           ref_table[i] = -i;
127         } else {
128           ref_table[i] = -REFS_PER_FRAME;
129         }
130       }
131       for (i = 0; i < 3; ++i) {
132         gf_picture[0].ref_frame[i] = ref_table[i];
133       }
134       gf_picture[0].update_type = gf_group->update_type[0];
135       if (gf_group->update_type[0] != OVERLAY_UPDATE &&
136           gf_group->update_ref_idx[0] != -1) {
137         ref_table[gf_group->update_ref_idx[0]] = 0;
138       }
139     }
140     ++*tpl_group_frames;
141   }
142 
143   int has_arf =
144       gf_group->gf_group_size > 1 && gf_group->update_type[1] == ARF_UPDATE &&
145       gf_group->update_type[gf_group->gf_group_size] == OVERLAY_UPDATE;
146 
147   // Initialize P frames
148   for (frame_idx = *tpl_group_frames; frame_idx < MAX_ARF_GOP_SIZE;
149        ++frame_idx) {
150     if (frame_idx >= gf_group->gf_group_size && !has_arf) break;
151     struct lookahead_entry *buf;
152     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
153     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
154 
155     if (buf == NULL) break;
156 
157     gf_picture[frame_idx].frame = &buf->img;
158     for (i = 0; i < 3; ++i) {
159       gf_picture[frame_idx].ref_frame[i] = ref_table[i];
160     }
161 
162     if (gf_group->update_type[frame_idx] != OVERLAY_UPDATE &&
163         gf_group->update_ref_idx[frame_idx] != -1) {
164       ref_table[gf_group->update_ref_idx[frame_idx]] = frame_idx;
165     }
166 
167     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
168 
169     ++*tpl_group_frames;
170 
171     // The length of group of pictures is baseline_gf_interval, plus the
172     // beginning golden frame from last GOP, plus the last overlay frame in
173     // the same GOP.
174     if (frame_idx == gf_group->gf_group_size) {
175       added_overlay = 1;
176 
177       ++frame_idx;
178       ++frame_gop_offset;
179       break;
180     }
181 
182     if (frame_idx == gf_group->gf_group_size - 1 &&
183         gf_group->update_type[gf_group->gf_group_size] != OVERLAY_UPDATE) {
184       ++frame_idx;
185       ++frame_gop_offset;
186       break;
187     }
188   }
189 
190   int lst_index = frame_idx - 1;
191   // Extend two frames outside the current gf group.
192   for (; has_arf && frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2;
193        ++frame_idx) {
194     struct lookahead_entry *buf =
195         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
196 
197     if (buf == NULL) break;
198 
199     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
200 
201     gf_picture[frame_idx].frame = &buf->img;
202     gf_picture[frame_idx].ref_frame[0] = gf_picture[lst_index].ref_frame[0];
203     gf_picture[frame_idx].ref_frame[1] = gf_picture[lst_index].ref_frame[1];
204     gf_picture[frame_idx].ref_frame[2] = gf_picture[lst_index].ref_frame[2];
205 
206     if (gf_picture[frame_idx].ref_frame[0] >
207             gf_picture[frame_idx].ref_frame[1] &&
208         gf_picture[frame_idx].ref_frame[0] >
209             gf_picture[frame_idx].ref_frame[2]) {
210       gf_picture[frame_idx].ref_frame[0] = lst_index;
211     } else if (gf_picture[frame_idx].ref_frame[1] >
212                    gf_picture[frame_idx].ref_frame[0] &&
213                gf_picture[frame_idx].ref_frame[1] >
214                    gf_picture[frame_idx].ref_frame[2]) {
215       gf_picture[frame_idx].ref_frame[1] = lst_index;
216     } else {
217       gf_picture[frame_idx].ref_frame[2] = lst_index;
218     }
219 
220     gf_picture[frame_idx].update_type = LF_UPDATE;
221     lst_index = frame_idx;
222     ++*tpl_group_frames;
223     ++extend_frame_count;
224     ++frame_gop_offset;
225   }
226 
227   return extend_frame_count + added_overlay;
228 }
229 
init_gop_frames(VP9_COMP * cpi,GF_PICTURE * gf_picture,const GF_GROUP * gf_group,int * tpl_group_frames)230 static int init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
231                            const GF_GROUP *gf_group, int *tpl_group_frames) {
232   if (cpi->ext_ratectrl.ready &&
233       (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_GOP) != 0) {
234     return init_gop_frames_rc(cpi, gf_picture, gf_group, tpl_group_frames);
235   }
236 
237   VP9_COMMON *cm = &cpi->common;
238   int frame_idx = 0;
239   int i;
240   int gld_index = -1;
241   int alt_index = -2;
242   int lst_index = -1;
243   int arf_index_stack[MAX_ARF_LAYERS];
244   int arf_stack_size = 0;
245   int extend_frame_count = 0;
246   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
247   int frame_gop_offset = 0;
248 
249   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
250   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
251 
252   memset(recon_frame_index, -1, sizeof(recon_frame_index));
253   stack_init(arf_index_stack, MAX_ARF_LAYERS);
254 
255   for (i = 0; i < FRAME_BUFFERS; ++i) {
256     if (frame_bufs[i].ref_count == 0) {
257       alloc_frame_mvs(cm, i);
258       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
259                                    cm->subsampling_x, cm->subsampling_y,
260 #if CONFIG_VP9_HIGHBITDEPTH
261                                    cm->use_highbitdepth,
262 #endif
263                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
264                                    NULL, NULL, NULL))
265         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
266                            "Failed to allocate frame buffer");
267 
268       recon_frame_index[frame_idx] = i;
269       ++frame_idx;
270 
271       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
272     }
273   }
274 
275   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
276     assert(recon_frame_index[i] >= 0);
277     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
278   }
279 
280   *tpl_group_frames = 0;
281 
282   // Initialize Golden reference frame.
283   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
284   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -REFS_PER_FRAME;
285   gf_picture[0].update_type = gf_group->update_type[0];
286   gld_index = 0;
287   ++*tpl_group_frames;
288 
289   gf_picture[-1].frame = get_ref_frame_buffer(cpi, LAST_FRAME);
290   gf_picture[-2].frame = get_ref_frame_buffer(cpi, ALTREF_FRAME);
291 
292   // Initialize base layer ARF frame
293   gf_picture[1].frame = cpi->Source;
294   gf_picture[1].ref_frame[0] = gld_index;
295   gf_picture[1].ref_frame[1] = lst_index;
296   gf_picture[1].ref_frame[2] = alt_index;
297   gf_picture[1].update_type = gf_group->update_type[1];
298   alt_index = 1;
299   ++*tpl_group_frames;
300 
301   // Initialize P frames
302   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
303     struct lookahead_entry *buf;
304     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
305     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
306 
307     if (buf == NULL) break;
308 
309     gf_picture[frame_idx].frame = &buf->img;
310     gf_picture[frame_idx].ref_frame[0] = gld_index;
311     gf_picture[frame_idx].ref_frame[1] = lst_index;
312     gf_picture[frame_idx].ref_frame[2] = alt_index;
313     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
314 
315     switch (gf_group->update_type[frame_idx]) {
316       case ARF_UPDATE:
317         stack_push(arf_index_stack, alt_index, arf_stack_size);
318         ++arf_stack_size;
319         alt_index = frame_idx;
320         break;
321       case LF_UPDATE: lst_index = frame_idx; break;
322       case OVERLAY_UPDATE:
323         gld_index = frame_idx;
324         alt_index = stack_pop(arf_index_stack, arf_stack_size);
325         --arf_stack_size;
326         break;
327       case USE_BUF_FRAME:
328         lst_index = alt_index;
329         alt_index = stack_pop(arf_index_stack, arf_stack_size);
330         --arf_stack_size;
331         break;
332       default: break;
333     }
334 
335     ++*tpl_group_frames;
336 
337     // The length of group of pictures is baseline_gf_interval, plus the
338     // beginning golden frame from last GOP, plus the last overlay frame in
339     // the same GOP.
340     if (frame_idx == gf_group->gf_group_size) break;
341   }
342 
343   alt_index = -1;
344   ++frame_idx;
345   ++frame_gop_offset;
346 
347   // Extend two frames outside the current gf group.
348   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
349     struct lookahead_entry *buf =
350         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
351 
352     if (buf == NULL) break;
353 
354     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
355 
356     gf_picture[frame_idx].frame = &buf->img;
357     gf_picture[frame_idx].ref_frame[0] = gld_index;
358     gf_picture[frame_idx].ref_frame[1] = lst_index;
359     gf_picture[frame_idx].ref_frame[2] = alt_index;
360     gf_picture[frame_idx].update_type = LF_UPDATE;
361     lst_index = frame_idx;
362     ++*tpl_group_frames;
363     ++extend_frame_count;
364     ++frame_gop_offset;
365   }
366 
367   return extend_frame_count;
368 }
369 
init_tpl_stats(VP9_COMP * cpi)370 static void init_tpl_stats(VP9_COMP *cpi) {
371   int frame_idx;
372   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
373     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
374     memset(tpl_frame->tpl_stats_ptr, 0,
375            tpl_frame->height * tpl_frame->width *
376                sizeof(*tpl_frame->tpl_stats_ptr));
377     tpl_frame->is_valid = 0;
378   }
379 }
380 
free_tpl_frame_stats_list(VpxTplGopStats * tpl_gop_stats)381 static void free_tpl_frame_stats_list(VpxTplGopStats *tpl_gop_stats) {
382   int frame_idx;
383   for (frame_idx = 0; frame_idx < tpl_gop_stats->size; ++frame_idx) {
384     vpx_free(tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list);
385   }
386   vpx_free(tpl_gop_stats->frame_stats_list);
387 }
388 
init_tpl_stats_before_propagation(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,TplDepFrame * tpl_stats,int tpl_gop_frames,int frame_width,int frame_height)389 static void init_tpl_stats_before_propagation(
390     struct vpx_internal_error_info *error_info, VpxTplGopStats *tpl_gop_stats,
391     TplDepFrame *tpl_stats, int tpl_gop_frames, int frame_width,
392     int frame_height) {
393   int frame_idx;
394   free_tpl_frame_stats_list(tpl_gop_stats);
395   CHECK_MEM_ERROR(
396       error_info, tpl_gop_stats->frame_stats_list,
397       vpx_calloc(tpl_gop_frames, sizeof(*tpl_gop_stats->frame_stats_list)));
398   tpl_gop_stats->size = tpl_gop_frames;
399   for (frame_idx = 0; frame_idx < tpl_gop_frames; ++frame_idx) {
400     const int mi_rows = tpl_stats[frame_idx].mi_rows;
401     const int mi_cols = tpl_stats[frame_idx].mi_cols;
402     CHECK_MEM_ERROR(
403         error_info, tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list,
404         vpx_calloc(
405             mi_rows * mi_cols,
406             sizeof(
407                 *tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list)));
408     tpl_gop_stats->frame_stats_list[frame_idx].num_blocks = mi_rows * mi_cols;
409     tpl_gop_stats->frame_stats_list[frame_idx].frame_width = frame_width;
410     tpl_gop_stats->frame_stats_list[frame_idx].frame_height = frame_height;
411   }
412 }
413 
414 #if CONFIG_NON_GREEDY_MV
full_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,int mi_row,int mi_col,MV * mv)415 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
416                                          MotionField *motion_field,
417                                          int frame_idx, uint8_t *cur_frame_buf,
418                                          uint8_t *ref_frame_buf, int stride,
419                                          BLOCK_SIZE bsize, int mi_row,
420                                          int mi_col, MV *mv) {
421   MACROBLOCK *const x = &td->mb;
422   MACROBLOCKD *const xd = &x->e_mbd;
423   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
424   int step_param;
425   uint32_t bestsme = UINT_MAX;
426   const MvLimits tmp_mv_limits = x->mv_limits;
427   // lambda is used to adjust the importance of motion vector consistency.
428   // TODO(angiebird): Figure out lambda's proper value.
429   const int lambda = cpi->tpl_stats[frame_idx].lambda;
430   int_mv nb_full_mvs[NB_MVS_NUM];
431   int nb_full_mv_num;
432 
433   MV best_ref_mv1 = { 0, 0 };
434   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
435 
436   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
437   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
438 
439   // Setup frame pointers
440   x->plane[0].src.buf = cur_frame_buf;
441   x->plane[0].src.stride = stride;
442   xd->plane[0].pre[0].buf = ref_frame_buf;
443   xd->plane[0].pre[0].stride = stride;
444 
445   step_param = mv_sf->reduce_first_step_size;
446   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
447 
448   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
449 
450   nb_full_mv_num =
451       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
452   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
453                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
454 
455   /* restore UMV window */
456   x->mv_limits = tmp_mv_limits;
457 
458   return bestsme;
459 }
460 
sub_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)461 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
462                                         uint8_t *cur_frame_buf,
463                                         uint8_t *ref_frame_buf, int stride,
464                                         BLOCK_SIZE bsize, MV *mv) {
465   MACROBLOCK *const x = &td->mb;
466   MACROBLOCKD *const xd = &x->e_mbd;
467   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
468   uint32_t bestsme = UINT_MAX;
469   uint32_t distortion;
470   uint32_t sse;
471   int cost_list[5];
472 
473   MV best_ref_mv1 = { 0, 0 };
474 
475   // Setup frame pointers
476   x->plane[0].src.buf = cur_frame_buf;
477   x->plane[0].src.stride = stride;
478   xd->plane[0].pre[0].buf = ref_frame_buf;
479   xd->plane[0].pre[0].stride = stride;
480 
481   // TODO(yunqing): may use higher tap interp filter than 2 taps.
482   // Ignore mv costing by sending NULL pointer instead of cost array
483   bestsme = cpi->find_fractional_mv_step(
484       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
485       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
486       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
487       USE_2_TAPS);
488 
489   return bestsme;
490 }
491 
492 #else  // CONFIG_NON_GREEDY_MV
motion_compensated_prediction(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)493 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
494                                               uint8_t *cur_frame_buf,
495                                               uint8_t *ref_frame_buf,
496                                               int stride, BLOCK_SIZE bsize,
497                                               MV *mv) {
498   MACROBLOCK *const x = &td->mb;
499   MACROBLOCKD *const xd = &x->e_mbd;
500   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
501   const SEARCH_METHODS search_method = NSTEP;
502   int step_param;
503   int sadpb = x->sadperbit16;
504   uint32_t bestsme = UINT_MAX;
505   uint32_t distortion;
506   uint32_t sse;
507   int cost_list[5];
508   const MvLimits tmp_mv_limits = x->mv_limits;
509 
510   MV best_ref_mv1 = { 0, 0 };
511   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
512 
513   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
514   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
515 
516   // Setup frame pointers
517   x->plane[0].src.buf = cur_frame_buf;
518   x->plane[0].src.stride = stride;
519   xd->plane[0].pre[0].buf = ref_frame_buf;
520   xd->plane[0].pre[0].stride = stride;
521 
522   step_param = mv_sf->reduce_first_step_size;
523   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
524 
525   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
526 
527   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
528                         search_method, sadpb, cond_cost_list(cpi, cost_list),
529                         &best_ref_mv1, mv, 0, 0);
530 
531   /* restore UMV window */
532   x->mv_limits = tmp_mv_limits;
533 
534   // TODO(yunqing): may use higher tap interp filter than 2 taps.
535   // Ignore mv costing by sending NULL pointer instead of cost array
536   bestsme = cpi->find_fractional_mv_step(
537       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
538       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
539       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
540       USE_2_TAPS);
541 
542   return bestsme;
543 }
544 #endif
545 
get_overlap_area(int grid_pos_row,int grid_pos_col,int ref_pos_row,int ref_pos_col,int block,BLOCK_SIZE bsize)546 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
547                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
548   int width = 0, height = 0;
549   int bw = 4 << b_width_log2_lookup[bsize];
550   int bh = 4 << b_height_log2_lookup[bsize];
551 
552   switch (block) {
553     case 0:
554       width = grid_pos_col + bw - ref_pos_col;
555       height = grid_pos_row + bh - ref_pos_row;
556       break;
557     case 1:
558       width = ref_pos_col + bw - grid_pos_col;
559       height = grid_pos_row + bh - ref_pos_row;
560       break;
561     case 2:
562       width = grid_pos_col + bw - ref_pos_col;
563       height = ref_pos_row + bh - grid_pos_row;
564       break;
565     case 3:
566       width = ref_pos_col + bw - grid_pos_col;
567       height = ref_pos_row + bh - grid_pos_row;
568       break;
569     default: assert(0);
570   }
571 
572   return width * height;
573 }
574 
round_floor(int ref_pos,int bsize_pix)575 static int round_floor(int ref_pos, int bsize_pix) {
576   int round;
577   if (ref_pos < 0)
578     round = -(1 + (-ref_pos - 1) / bsize_pix);
579   else
580     round = ref_pos / bsize_pix;
581 
582   return round;
583 }
584 
tpl_model_store(TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int stride)585 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
586                             BLOCK_SIZE bsize, int stride) {
587   const int mi_height = num_8x8_blocks_high_lookup[bsize];
588   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
589   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
590   int idx, idy;
591 
592   for (idy = 0; idy < mi_height; ++idy) {
593     for (idx = 0; idx < mi_width; ++idx) {
594       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
595       const int64_t mc_flow = tpl_ptr->mc_flow;
596       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
597       *tpl_ptr = *src_stats;
598       tpl_ptr->mc_flow = mc_flow;
599       tpl_ptr->mc_ref_cost = mc_ref_cost;
600       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
601     }
602   }
603 }
604 
tpl_store_before_propagation(VpxTplBlockStats * tpl_block_stats,TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int src_stride,int64_t recon_error,int64_t rate_cost,int ref_frame_idx,int mi_rows,int mi_cols)605 static void tpl_store_before_propagation(VpxTplBlockStats *tpl_block_stats,
606                                          TplDepStats *tpl_stats, int mi_row,
607                                          int mi_col, BLOCK_SIZE bsize,
608                                          int src_stride, int64_t recon_error,
609                                          int64_t rate_cost, int ref_frame_idx,
610                                          int mi_rows, int mi_cols) {
611   const int mi_height = num_8x8_blocks_high_lookup[bsize];
612   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
613   const TplDepStats *src_stats = &tpl_stats[mi_row * src_stride + mi_col];
614   int idx, idy;
615 
616   for (idy = 0; idy < mi_height; ++idy) {
617     for (idx = 0; idx < mi_width; ++idx) {
618       if (mi_row + idy >= mi_rows || mi_col + idx >= mi_cols) continue;
619       VpxTplBlockStats *tpl_block_stats_ptr =
620           &tpl_block_stats[(mi_row + idy) * mi_cols + mi_col + idx];
621       tpl_block_stats_ptr->row = mi_row * 8 + idy * 8;
622       tpl_block_stats_ptr->col = mi_col * 8 + idx * 8;
623       tpl_block_stats_ptr->inter_cost = src_stats->inter_cost;
624       tpl_block_stats_ptr->intra_cost = src_stats->intra_cost;
625       // inter/intra_cost here is calculated with SATD which should be close
626       // enough to be used as inter/intra_pred_error
627       tpl_block_stats_ptr->inter_pred_err = src_stats->inter_cost;
628       tpl_block_stats_ptr->intra_pred_err = src_stats->intra_cost;
629       tpl_block_stats_ptr->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2;
630       tpl_block_stats_ptr->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
631       tpl_block_stats_ptr->mv_r = src_stats->mv.as_mv.row;
632       tpl_block_stats_ptr->mv_c = src_stats->mv.as_mv.col;
633       tpl_block_stats_ptr->ref_frame_index = ref_frame_idx;
634     }
635   }
636 }
637 
tpl_model_update_b(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)638 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
639                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
640   if (tpl_stats->ref_frame_index < 0) return;
641 
642   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
643   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
644   MV mv = tpl_stats->mv.as_mv;
645   int mv_row = mv.row >> 3;
646   int mv_col = mv.col >> 3;
647 
648   int ref_pos_row = mi_row * MI_SIZE + mv_row;
649   int ref_pos_col = mi_col * MI_SIZE + mv_col;
650 
651   const int bw = 4 << b_width_log2_lookup[bsize];
652   const int bh = 4 << b_height_log2_lookup[bsize];
653   const int mi_height = num_8x8_blocks_high_lookup[bsize];
654   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
655   const int pix_num = bw * bh;
656 
657   // top-left on grid block location in pixel
658   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
659   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
660   int block;
661 
662   for (block = 0; block < 4; ++block) {
663     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
664     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
665 
666     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
667         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
668       int overlap_area = get_overlap_area(
669           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
670       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
671       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
672 
673       int64_t mc_flow = tpl_stats->mc_dep_cost -
674                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
675                             tpl_stats->intra_cost;
676 
677       int idx, idy;
678 
679       for (idy = 0; idy < mi_height; ++idy) {
680         for (idx = 0; idx < mi_width; ++idx) {
681           TplDepStats *des_stats =
682               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
683                          (ref_mi_col + idx)];
684 
685           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
686           des_stats->mc_ref_cost +=
687               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
688               pix_num;
689           assert(overlap_area >= 0);
690         }
691       }
692     }
693   }
694 }
695 
tpl_model_update(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)696 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
697                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
698   int idx, idy;
699   const int mi_height = num_8x8_blocks_high_lookup[bsize];
700   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
701 
702   for (idy = 0; idy < mi_height; ++idy) {
703     for (idx = 0; idx < mi_width; ++idx) {
704       TplDepStats *tpl_ptr =
705           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
706       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
707                          BLOCK_8X8);
708     }
709   }
710 }
711 
get_quantize_error(MACROBLOCK * x,int plane,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,TX_SIZE tx_size,int64_t * recon_error,int64_t * sse,uint16_t * eob)712 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
713                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
714                                TX_SIZE tx_size, int64_t *recon_error,
715                                int64_t *sse, uint16_t *eob) {
716   MACROBLOCKD *const xd = &x->e_mbd;
717   const struct macroblock_plane *const p = &x->plane[plane];
718   const struct macroblockd_plane *const pd = &xd->plane[plane];
719   const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
720   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
721   const int shift = tx_size == TX_32X32 ? 0 : 2;
722 
723   // skip block condition should be handled before this is called.
724   assert(!x->skip_block);
725 
726 #if CONFIG_VP9_HIGHBITDEPTH
727   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
728     vp9_highbd_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff,
729                                  pd->dequant, eob, scan_order);
730   } else {
731     vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
732                           scan_order);
733   }
734 #else
735   vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
736                         scan_order);
737 #endif  // CONFIG_VP9_HIGHBITDEPTH
738 
739   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
740   *recon_error = VPXMAX(*recon_error, 1);
741 
742   *sse = (*sse) >> shift;
743   *sse = VPXMAX(*sse, 1);
744 }
745 
746 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)747 void vp9_highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
748                              TX_SIZE tx_size) {
749   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
750   switch (tx_size) {
751     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
752     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
753     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
754     default: assert(0);
755   }
756 }
757 #endif  // CONFIG_VP9_HIGHBITDEPTH
758 
vp9_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)759 void vp9_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
760                       TX_SIZE tx_size) {
761   switch (tx_size) {
762     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
763     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
764     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
765     default: assert(0);
766   }
767 }
768 
set_mv_limits(const VP9_COMMON * cm,MACROBLOCK * x,int mi_row,int mi_col)769 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
770                           int mi_col) {
771   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
772   x->mv_limits.row_max =
773       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
774   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
775   x->mv_limits.col_max =
776       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
777 }
778 
rate_estimator(const tran_low_t * qcoeff,int eob,TX_SIZE tx_size)779 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
780   const ScanOrder *const scan_order = &vp9_scan_orders[tx_size][DCT_DCT];
781   int rate_cost = 1;
782   int idx;
783   assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
784   for (idx = 0; idx < eob; ++idx) {
785     unsigned int abs_level = abs(qcoeff[scan_order->scan[idx]]);
786     rate_cost += get_msb(abs_level + 1) + 1 + (abs_level > 0);
787   }
788 
789   return (rate_cost << VP9_PROB_COST_SHIFT);
790 }
791 
mode_estimation(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,struct scale_factors * sf,GF_PICTURE * gf_picture,int frame_idx,TplDepFrame * tpl_frame,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,int mi_row,int mi_col,BLOCK_SIZE bsize,TX_SIZE tx_size,YV12_BUFFER_CONFIG * ref_frame[],uint8_t * predictor,int64_t * recon_error,int64_t * rate_cost,int64_t * sse,int * ref_frame_idx)792 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
793                             struct scale_factors *sf, GF_PICTURE *gf_picture,
794                             int frame_idx, TplDepFrame *tpl_frame,
795                             int16_t *src_diff, tran_low_t *coeff,
796                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
797                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
798                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
799                             int64_t *recon_error, int64_t *rate_cost,
800                             int64_t *sse, int *ref_frame_idx) {
801   VP9_COMMON *cm = &cpi->common;
802   ThreadData *td = &cpi->td;
803 
804   const int bw = 4 << b_width_log2_lookup[bsize];
805   const int bh = 4 << b_height_log2_lookup[bsize];
806   const int pix_num = bw * bh;
807   int best_rf_idx = -1;
808   int_mv best_mv;
809   int64_t best_inter_cost = INT64_MAX;
810   int64_t inter_cost;
811   int rf_idx;
812   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
813 
814   int64_t best_intra_cost = INT64_MAX;
815   int64_t intra_cost;
816   PREDICTION_MODE mode;
817   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
818   MODE_INFO mi_above, mi_left;
819   const int mi_height = num_8x8_blocks_high_lookup[bsize];
820   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
821   TplDepStats *tpl_stats =
822       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
823 
824   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
825   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
826   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
827   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
828   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
829   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
830 
831   // Intra prediction search
832   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
833     uint8_t *src, *dst;
834     int src_stride, dst_stride;
835 
836     src = xd->cur_buf->y_buffer + mb_y_offset;
837     src_stride = xd->cur_buf->y_stride;
838 
839     dst = &predictor[0];
840     dst_stride = bw;
841 
842     xd->mi[0]->sb_type = bsize;
843     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
844 
845     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
846                             src_stride, dst, dst_stride, 0, 0, 0);
847 
848 #if CONFIG_VP9_HIGHBITDEPTH
849     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
850       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
851                                 dst_stride, xd->bd);
852       vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
853       intra_cost = vpx_highbd_satd(coeff, pix_num);
854     } else {
855       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
856                          dst_stride);
857       vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
858       intra_cost = vpx_satd(coeff, pix_num);
859     }
860 #else
861     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
862     vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
863     intra_cost = vpx_satd(coeff, pix_num);
864 #endif  // CONFIG_VP9_HIGHBITDEPTH
865 
866     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
867   }
868 
869   // Motion compensated prediction
870   best_mv.as_int = 0;
871 
872   set_mv_limits(cm, x, mi_row, mi_col);
873 
874   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
875     int_mv mv;
876 #if CONFIG_NON_GREEDY_MV
877     MotionField *motion_field;
878 #endif
879     if (ref_frame[rf_idx] == NULL) continue;
880 
881 #if CONFIG_NON_GREEDY_MV
882     (void)td;
883     motion_field = vp9_motion_field_info_get_motion_field(
884         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
885     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
886 #else
887     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
888                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
889                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
890 #endif
891 
892 #if CONFIG_VP9_HIGHBITDEPTH
893     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
894       vp9_highbd_build_inter_predictor(
895           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
896           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
897           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
898           mi_row * MI_SIZE, xd->bd);
899       vpx_highbd_subtract_block(
900           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
901           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
902       vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
903       inter_cost = vpx_highbd_satd(coeff, pix_num);
904     } else {
905       vp9_build_inter_predictor(
906           ref_frame[rf_idx]->y_buffer + mb_y_offset,
907           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
908           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
909       vpx_subtract_block(bh, bw, src_diff, bw,
910                          xd->cur_buf->y_buffer + mb_y_offset,
911                          xd->cur_buf->y_stride, &predictor[0], bw);
912       vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
913       inter_cost = vpx_satd(coeff, pix_num);
914     }
915 #else
916     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
917                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
918                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
919                               mi_col * MI_SIZE, mi_row * MI_SIZE);
920     vpx_subtract_block(bh, bw, src_diff, bw,
921                        xd->cur_buf->y_buffer + mb_y_offset,
922                        xd->cur_buf->y_stride, &predictor[0], bw);
923     vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
924     inter_cost = vpx_satd(coeff, pix_num);
925 #endif
926 
927     if (inter_cost < best_inter_cost) {
928       uint16_t eob = 0;
929       best_rf_idx = rf_idx;
930       best_inter_cost = inter_cost;
931       best_mv.as_int = mv.as_int;
932       // Since best_inter_cost is initialized as INT64_MAX, recon_error and
933       // rate_cost will be calculated with the best reference frame.
934       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
935                          sse, &eob);
936       *rate_cost = rate_estimator(qcoeff, eob, tx_size);
937     }
938   }
939   best_intra_cost = VPXMAX(best_intra_cost, 1);
940   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
941   tpl_stats->inter_cost = VPXMAX(
942       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
943   tpl_stats->intra_cost = VPXMAX(
944       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
945   if (best_rf_idx >= 0) {
946     tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
947   }
948   tpl_stats->mv.as_int = best_mv.as_int;
949   *ref_frame_idx = best_rf_idx;
950 }
951 
952 #if CONFIG_NON_GREEDY_MV
get_block_src_pred_buf(MACROBLOCKD * xd,GF_PICTURE * gf_picture,int frame_idx,int rf_idx,int mi_row,int mi_col,struct buf_2d * src,struct buf_2d * pre)953 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
954                                   int frame_idx, int rf_idx, int mi_row,
955                                   int mi_col, struct buf_2d *src,
956                                   struct buf_2d *pre) {
957   const int mb_y_offset =
958       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
959   YV12_BUFFER_CONFIG *ref_frame = NULL;
960   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
961   if (ref_frame_idx != -1) {
962     ref_frame = gf_picture[ref_frame_idx].frame;
963     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
964     src->stride = xd->cur_buf->y_stride;
965     pre->buf = ref_frame->y_buffer + mb_y_offset;
966     pre->stride = ref_frame->y_stride;
967     assert(src->stride == pre->stride);
968     return 1;
969   } else {
970     printf("invalid ref_frame_idx");
971     assert(ref_frame_idx != -1);
972     return 0;
973   }
974 }
975 
976 #define kMvPreCheckLines 5
977 #define kMvPreCheckSize 15
978 
979 #define MV_REF_POS_NUM 3
980 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
981   { -1, 0 },
982   { 0, -1 },
983   { -1, -1 },
984 };
985 
get_select_mv(VP9_COMP * cpi,TplDepFrame * tpl_frame,int mi_row,int mi_col)986 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
987                              int mi_col) {
988   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
989 }
990 
find_ref_mv(int mv_mode,VP9_COMP * cpi,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)991 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
992                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
993   int i;
994   const int mi_height = num_8x8_blocks_high_lookup[bsize];
995   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
996   int_mv nearest_mv, near_mv, invalid_mv;
997   nearest_mv.as_int = INVALID_MV;
998   near_mv.as_int = INVALID_MV;
999   invalid_mv.as_int = INVALID_MV;
1000   for (i = 0; i < MV_REF_POS_NUM; ++i) {
1001     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
1002     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
1003     assert(mv_ref_pos[i].row <= 0);
1004     assert(mv_ref_pos[i].col <= 0);
1005     if (nb_row >= 0 && nb_col >= 0) {
1006       if (nearest_mv.as_int == INVALID_MV) {
1007         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
1008       } else {
1009         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
1010         if (mv.as_int == nearest_mv.as_int) {
1011           continue;
1012         } else {
1013           near_mv = mv;
1014           break;
1015         }
1016       }
1017     }
1018   }
1019   if (nearest_mv.as_int == INVALID_MV) {
1020     nearest_mv.as_mv.row = 0;
1021     nearest_mv.as_mv.col = 0;
1022   }
1023   if (near_mv.as_int == INVALID_MV) {
1024     near_mv.as_mv.row = 0;
1025     near_mv.as_mv.col = 0;
1026   }
1027   if (mv_mode == NEAREST_MV_MODE) {
1028     return nearest_mv;
1029   }
1030   if (mv_mode == NEAR_MV_MODE) {
1031     return near_mv;
1032   }
1033   assert(0);
1034   return invalid_mv;
1035 }
1036 
get_mv_from_mv_mode(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1037 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
1038                                   MotionField *motion_field,
1039                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
1040                                   int mi_row, int mi_col) {
1041   int_mv mv;
1042   switch (mv_mode) {
1043     case ZERO_MV_MODE:
1044       mv.as_mv.row = 0;
1045       mv.as_mv.col = 0;
1046       break;
1047     case NEW_MV_MODE:
1048       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
1049       break;
1050     case NEAREST_MV_MODE:
1051       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
1052       break;
1053     case NEAR_MV_MODE:
1054       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
1055       break;
1056     default:
1057       mv.as_int = INVALID_MV;
1058       assert(0);
1059       break;
1060   }
1061   return mv;
1062 }
1063 
get_mv_dist(int mv_mode,VP9_COMP * cpi,MACROBLOCKD * xd,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)1064 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
1065                           GF_PICTURE *gf_picture, MotionField *motion_field,
1066                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
1067                           BLOCK_SIZE bsize, int mi_row, int mi_col,
1068                           int_mv *mv) {
1069   uint32_t sse;
1070   struct buf_2d src;
1071   struct buf_2d pre;
1072   MV full_mv;
1073   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
1074                             mi_row, mi_col);
1075   full_mv = get_full_mv(&mv->as_mv);
1076   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
1077                              &src, &pre)) {
1078     // TODO(angiebird): Consider subpixel when computing the sse.
1079     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
1080                           pre.stride, &sse);
1081     return (double)(sse << VP9_DIST_SCALE_LOG2);
1082   } else {
1083     assert(0);
1084     return 0;
1085   }
1086 }
1087 
get_mv_mode_cost(int mv_mode)1088 static int get_mv_mode_cost(int mv_mode) {
1089   // TODO(angiebird): The probabilities are roughly inferred from
1090   // default_inter_mode_probs. Check if there is a better way to set the
1091   // probabilities.
1092   const int zero_mv_prob = 16;
1093   const int new_mv_prob = 24 * 1;
1094   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
1095   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
1096   switch (mv_mode) {
1097     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
1098     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
1099     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
1100     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
1101     default: assert(0); return -1;
1102   }
1103 }
1104 
get_mv_diff_cost(MV * new_mv,MV * ref_mv)1105 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
1106   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
1107                         log2(1 + abs(new_mv->col - ref_mv->col));
1108   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
1109   return mv_diff_cost;
1110 }
get_mv_cost(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1111 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
1112                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
1113                           int mi_col) {
1114   double mv_cost = get_mv_mode_cost(mv_mode);
1115   if (mv_mode == NEW_MV_MODE) {
1116     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
1117                                     bsize, mi_row, mi_col)
1118                     .as_mv;
1119     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
1120                                         tpl_frame, bsize, mi_row, mi_col)
1121                         .as_mv;
1122     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
1123                                      bsize, mi_row, mi_col)
1124                      .as_mv;
1125     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
1126     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
1127     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
1128   }
1129   return mv_cost;
1130 }
1131 
eval_mv_mode(int mv_mode,VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)1132 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
1133                            GF_PICTURE *gf_picture, MotionField *motion_field,
1134                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
1135                            BLOCK_SIZE bsize, int mi_row, int mi_col,
1136                            int_mv *mv) {
1137   MACROBLOCKD *xd = &x->e_mbd;
1138   double mv_dist =
1139       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
1140                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
1141   double mv_cost =
1142       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
1143   double mult = 180;
1144 
1145   return mv_cost + mult * log2f(1 + mv_dist);
1146 }
1147 
find_best_ref_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,double * rd,int_mv * mv)1148 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1149                                  GF_PICTURE *gf_picture,
1150                                  MotionField *motion_field, int frame_idx,
1151                                  TplDepFrame *tpl_frame, int rf_idx,
1152                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
1153                                  double *rd, int_mv *mv) {
1154   int best_mv_mode = ZERO_MV_MODE;
1155   int update = 0;
1156   int mv_mode;
1157   *rd = 0;
1158   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
1159     double this_rd;
1160     int_mv this_mv;
1161     if (mv_mode == NEW_MV_MODE) {
1162       continue;
1163     }
1164     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
1165                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
1166     if (update == 0) {
1167       *rd = this_rd;
1168       *mv = this_mv;
1169       best_mv_mode = mv_mode;
1170       update = 1;
1171     } else {
1172       if (this_rd < *rd) {
1173         *rd = this_rd;
1174         *mv = this_mv;
1175         best_mv_mode = mv_mode;
1176       }
1177     }
1178   }
1179   return best_mv_mode;
1180 }
1181 
predict_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col)1182 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1183                             GF_PICTURE *gf_picture, MotionField *motion_field,
1184                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
1185                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
1186   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1187   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1188   int tmp_mv_mode_arr[kMvPreCheckSize];
1189   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
1190   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
1191   int_mv *select_mv_arr = cpi->select_mv_arr;
1192   int_mv tmp_select_mv_arr[kMvPreCheckSize];
1193   int stride = tpl_frame->stride;
1194   double new_mv_rd = 0;
1195   double no_new_mv_rd = 0;
1196   double this_new_mv_rd = 0;
1197   double this_no_new_mv_rd = 0;
1198   int idx;
1199   int tmp_idx;
1200   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
1201 
1202   // no new mv
1203   // diagonal scan order
1204   tmp_idx = 0;
1205   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
1206     int r;
1207     for (r = 0; r <= idx; ++r) {
1208       int c = idx - r;
1209       int nb_row = mi_row + r * mi_height;
1210       int nb_col = mi_col + c * mi_width;
1211       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1212         double this_rd;
1213         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
1214         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
1215             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
1216             bsize, nb_row, nb_col, &this_rd, mv);
1217         if (r == 0 && c == 0) {
1218           this_no_new_mv_rd = this_rd;
1219         }
1220         no_new_mv_rd += this_rd;
1221         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
1222         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
1223         ++tmp_idx;
1224       }
1225     }
1226   }
1227 
1228   // new mv
1229   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
1230   this_new_mv_rd = eval_mv_mode(
1231       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1232       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
1233   new_mv_rd = this_new_mv_rd;
1234   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
1235   // beforehand.
1236   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
1237     int r;
1238     for (r = 0; r <= idx; ++r) {
1239       int c = idx - r;
1240       int nb_row = mi_row + r * mi_height;
1241       int nb_col = mi_col + c * mi_width;
1242       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1243         double this_rd;
1244         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
1245         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
1246             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
1247             bsize, nb_row, nb_col, &this_rd, mv);
1248         new_mv_rd += this_rd;
1249       }
1250     }
1251   }
1252 
1253   // update best_mv_mode
1254   tmp_idx = 0;
1255   if (no_new_mv_rd < new_mv_rd) {
1256     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
1257       int r;
1258       for (r = 0; r <= idx; ++r) {
1259         int c = idx - r;
1260         int nb_row = mi_row + r * mi_height;
1261         int nb_col = mi_col + c * mi_width;
1262         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1263           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
1264           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
1265           ++tmp_idx;
1266         }
1267       }
1268     }
1269     rd_diff_arr[mi_row * stride + mi_col] = 0;
1270   } else {
1271     rd_diff_arr[mi_row * stride + mi_col] =
1272         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
1273   }
1274 }
1275 
predict_mv_mode_arr(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize)1276 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
1277                                 GF_PICTURE *gf_picture,
1278                                 MotionField *motion_field, int frame_idx,
1279                                 TplDepFrame *tpl_frame, int rf_idx,
1280                                 BLOCK_SIZE bsize) {
1281   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1282   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1283   const int unit_rows = tpl_frame->mi_rows / mi_height;
1284   const int unit_cols = tpl_frame->mi_cols / mi_width;
1285   const int max_diagonal_lines = unit_rows + unit_cols - 1;
1286   int idx;
1287   for (idx = 0; idx < max_diagonal_lines; ++idx) {
1288     int r;
1289     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
1290          ++r) {
1291       int c = idx - r;
1292       int mi_row = r * mi_height;
1293       int mi_col = c * mi_width;
1294       assert(c >= 0 && c < unit_cols);
1295       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
1296       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
1297       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1298                       rf_idx, bsize, mi_row, mi_col);
1299     }
1300   }
1301 }
1302 
do_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,YV12_BUFFER_CONFIG * ref_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1303 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
1304                              MotionField *motion_field, int frame_idx,
1305                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
1306                              int mi_row, int mi_col) {
1307   VP9_COMMON *cm = &cpi->common;
1308   MACROBLOCK *x = &td->mb;
1309   MACROBLOCKD *xd = &x->e_mbd;
1310   const int mb_y_offset =
1311       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
1312   assert(ref_frame != NULL);
1313   set_mv_limits(cm, x, mi_row, mi_col);
1314   {
1315     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
1316     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
1317     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
1318     const int stride = xd->cur_buf->y_stride;
1319     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
1320                              ref_frame_buf, stride, bsize, mi_row, mi_col,
1321                              &mv.as_mv);
1322     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
1323                             bsize, &mv.as_mv);
1324     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
1325   }
1326 }
1327 
build_motion_field(VP9_COMP * cpi,int frame_idx,YV12_BUFFER_CONFIG * ref_frame[MAX_INTER_REF_FRAMES],BLOCK_SIZE bsize)1328 static void build_motion_field(
1329     VP9_COMP *cpi, int frame_idx,
1330     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
1331   VP9_COMMON *cm = &cpi->common;
1332   ThreadData *td = &cpi->td;
1333   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1334   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1335   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1336   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
1337   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
1338   int mi_row, mi_col;
1339   int rf_idx;
1340 
1341   tpl_frame->lambda = (pw * ph) >> 2;
1342   assert(pw * ph == tpl_frame->lambda << 2);
1343 
1344   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1345     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1346         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1347     if (ref_frame[rf_idx] == NULL) {
1348       continue;
1349     }
1350     vp9_motion_field_reset_mvs(motion_field);
1351     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1352       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1353         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
1354                          bsize, mi_row, mi_col);
1355       }
1356     }
1357   }
1358 }
1359 #endif  // CONFIG_NON_GREEDY_MV
1360 
mc_flow_dispenser(VP9_COMP * cpi,GF_PICTURE * gf_picture,int frame_idx,BLOCK_SIZE bsize)1361 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
1362                               int frame_idx, BLOCK_SIZE bsize) {
1363   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1364   VpxTplFrameStats *tpl_frame_stats_before_propagation =
1365       &cpi->tpl_gop_stats.frame_stats_list[frame_idx];
1366   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
1367   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
1368 
1369   VP9_COMMON *cm = &cpi->common;
1370   struct scale_factors sf;
1371   int rdmult, idx;
1372   ThreadData *td = &cpi->td;
1373   MACROBLOCK *x = &td->mb;
1374   MACROBLOCKD *xd = &x->e_mbd;
1375   int mi_row, mi_col;
1376 
1377 #if CONFIG_VP9_HIGHBITDEPTH
1378   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
1379   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
1380   uint8_t *predictor;
1381 #else
1382   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
1383 #endif
1384   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
1385   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
1386   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
1387   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
1388 
1389   const TX_SIZE tx_size = max_txsize_lookup[bsize];
1390   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1391   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1392 
1393   tpl_frame_stats_before_propagation->frame_width = cm->width;
1394   tpl_frame_stats_before_propagation->frame_height = cm->height;
1395   // Setup scaling factor
1396 #if CONFIG_VP9_HIGHBITDEPTH
1397   vp9_setup_scale_factors_for_frame(
1398       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1399       this_frame->y_crop_width, this_frame->y_crop_height,
1400       cpi->common.use_highbitdepth);
1401 
1402   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1403     predictor = CONVERT_TO_BYTEPTR(predictor16);
1404   else
1405     predictor = predictor8;
1406 #else
1407   vp9_setup_scale_factors_for_frame(
1408       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1409       this_frame->y_crop_width, this_frame->y_crop_height);
1410 #endif  // CONFIG_VP9_HIGHBITDEPTH
1411 
1412   // Prepare reference frame pointers. If any reference frame slot is
1413   // unavailable, the pointer will be set to Null.
1414   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
1415     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
1416     if (rf_idx != -REFS_PER_FRAME) ref_frame[idx] = gf_picture[rf_idx].frame;
1417   }
1418 
1419   xd->mi = cm->mi_grid_visible;
1420   xd->mi[0] = cm->mi;
1421   xd->cur_buf = this_frame;
1422 
1423   // Get rd multiplier set up.
1424   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
1425   set_error_per_bit(&cpi->td.mb, rdmult);
1426   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
1427 
1428   tpl_frame->is_valid = 1;
1429 
1430   cm->base_qindex = tpl_frame->base_qindex;
1431   vp9_frame_init_quantizer(cpi);
1432 
1433 #if CONFIG_NON_GREEDY_MV
1434   {
1435     int square_block_idx;
1436     int rf_idx;
1437     for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
1438          ++square_block_idx) {
1439       BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
1440       build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
1441     }
1442     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1443       int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1444       if (ref_frame_idx != -1) {
1445         MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1446             &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1447         predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
1448                             tpl_frame, rf_idx, bsize);
1449       }
1450     }
1451   }
1452 #endif  // CONFIG_NON_GREEDY_MV
1453 
1454   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1455     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1456       int64_t recon_error = 0;
1457       int64_t rate_cost = 0;
1458       int64_t sse = 0;
1459       // Ref frame index in the ref frame buffer.
1460       int ref_frame_idx = -1;
1461       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
1462                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
1463                       tx_size, ref_frame, predictor, &recon_error, &rate_cost,
1464                       &sse, &ref_frame_idx);
1465       // Motion flow dependency dispenser.
1466       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
1467                       tpl_frame->stride);
1468 
1469       tpl_store_before_propagation(
1470           tpl_frame_stats_before_propagation->block_stats_list,
1471           tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, tpl_frame->stride,
1472           recon_error, rate_cost, ref_frame_idx, tpl_frame->mi_rows,
1473           tpl_frame->mi_cols);
1474 
1475       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
1476                        bsize);
1477     }
1478   }
1479 }
1480 
trim_tpl_stats(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,int extra_frames)1481 static void trim_tpl_stats(struct vpx_internal_error_info *error_info,
1482                            VpxTplGopStats *tpl_gop_stats, int extra_frames) {
1483   int i;
1484   VpxTplFrameStats *new_frame_stats;
1485   const int new_size = tpl_gop_stats->size - extra_frames;
1486   if (tpl_gop_stats->size <= extra_frames)
1487     vpx_internal_error(
1488         error_info, VPX_CODEC_ERROR,
1489         "The number of frames in VpxTplGopStats is fewer than expected.");
1490   CHECK_MEM_ERROR(error_info, new_frame_stats,
1491                   vpx_calloc(new_size, sizeof(*new_frame_stats)));
1492   for (i = 0; i < new_size; i++) {
1493     VpxTplFrameStats *frame_stats = &tpl_gop_stats->frame_stats_list[i];
1494     const int num_blocks = frame_stats->num_blocks;
1495     new_frame_stats[i].num_blocks = frame_stats->num_blocks;
1496     new_frame_stats[i].frame_width = frame_stats->frame_width;
1497     new_frame_stats[i].frame_height = frame_stats->frame_height;
1498     new_frame_stats[i].num_blocks = num_blocks;
1499     CHECK_MEM_ERROR(
1500         error_info, new_frame_stats[i].block_stats_list,
1501         vpx_calloc(num_blocks, sizeof(*new_frame_stats[i].block_stats_list)));
1502     memcpy(new_frame_stats[i].block_stats_list, frame_stats->block_stats_list,
1503            num_blocks * sizeof(*new_frame_stats[i].block_stats_list));
1504   }
1505   free_tpl_frame_stats_list(tpl_gop_stats);
1506   tpl_gop_stats->size = new_size;
1507   tpl_gop_stats->frame_stats_list = new_frame_stats;
1508 }
1509 
1510 #if CONFIG_NON_GREEDY_MV
1511 #define DUMP_TPL_STATS 0
1512 #if DUMP_TPL_STATS
dump_buf(uint8_t * buf,int stride,int row,int col,int h,int w)1513 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
1514   int i, j;
1515   printf("%d %d\n", h, w);
1516   for (i = 0; i < h; ++i) {
1517     for (j = 0; j < w; ++j) {
1518       printf("%d ", buf[(row + i) * stride + col + j]);
1519     }
1520   }
1521   printf("\n");
1522 }
1523 
dump_frame_buf(const YV12_BUFFER_CONFIG * frame_buf)1524 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
1525   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
1526            frame_buf->y_width);
1527   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
1528            frame_buf->uv_height, frame_buf->uv_width);
1529   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
1530            frame_buf->uv_height, frame_buf->uv_width);
1531 }
1532 
dump_tpl_stats(const VP9_COMP * cpi,int tpl_group_frames,const GF_GROUP * gf_group,const GF_PICTURE * gf_picture,BLOCK_SIZE bsize)1533 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
1534                            const GF_GROUP *gf_group,
1535                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
1536   int frame_idx;
1537   const VP9_COMMON *cm = &cpi->common;
1538   int rf_idx;
1539   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
1540     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1541       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1542       int mi_row, mi_col;
1543       int ref_frame_idx;
1544       const int mi_height = num_8x8_blocks_high_lookup[bsize];
1545       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1546       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1547       if (ref_frame_idx != -1) {
1548         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
1549         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
1550         const int ref_gf_frame_offset =
1551             gf_group->frame_gop_index[ref_frame_idx];
1552         printf("=\n");
1553         printf(
1554             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
1555             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
1556             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
1557             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
1558         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1559           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1560             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1561               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
1562                                                        frame_idx, rf_idx, bsize,
1563                                                        mi_row, mi_col);
1564               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
1565                      mv.as_mv.col);
1566             }
1567           }
1568         }
1569         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1570           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1571             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1572               const TplDepStats *tpl_ptr =
1573                   &tpl_frame
1574                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
1575               printf("%f ", tpl_ptr->feature_score);
1576             }
1577           }
1578         }
1579         printf("\n");
1580 
1581         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1582           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1583             const int mv_mode =
1584                 tpl_frame
1585                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
1586             printf("%d ", mv_mode);
1587           }
1588         }
1589         printf("\n");
1590 
1591         dump_frame_buf(gf_picture[frame_idx].frame);
1592         dump_frame_buf(ref_frame_buf);
1593       }
1594     }
1595   }
1596 }
1597 #endif  // DUMP_TPL_STATS
1598 #endif  // CONFIG_NON_GREEDY_MV
1599 
vp9_init_tpl_buffer(VP9_COMP * cpi)1600 void vp9_init_tpl_buffer(VP9_COMP *cpi) {
1601   VP9_COMMON *cm = &cpi->common;
1602   int frame;
1603 
1604   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
1605   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
1606 #if CONFIG_NON_GREEDY_MV
1607   int rf_idx;
1608 
1609   vpx_free(cpi->select_mv_arr);
1610   CHECK_MEM_ERROR(
1611       &cm->error, cpi->select_mv_arr,
1612       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
1613 #endif
1614 
1615   // TODO(jingning): Reduce the actual memory use for tpl model build up.
1616   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1617     if (cpi->tpl_stats[frame].width >= mi_cols &&
1618         cpi->tpl_stats[frame].height >= mi_rows &&
1619         cpi->tpl_stats[frame].tpl_stats_ptr)
1620       continue;
1621 
1622 #if CONFIG_NON_GREEDY_MV
1623     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1624       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1625       CHECK_MEM_ERROR(
1626           &cm->error, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
1627           vpx_calloc(mi_rows * mi_cols * 4,
1628                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
1629       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1630       CHECK_MEM_ERROR(
1631           &cm->error, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
1632           vpx_calloc(mi_rows * mi_cols * 4,
1633                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
1634     }
1635 #endif
1636     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1637     CHECK_MEM_ERROR(&cm->error, cpi->tpl_stats[frame].tpl_stats_ptr,
1638                     vpx_calloc(mi_rows * mi_cols,
1639                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
1640     cpi->tpl_stats[frame].is_valid = 0;
1641     cpi->tpl_stats[frame].width = mi_cols;
1642     cpi->tpl_stats[frame].height = mi_rows;
1643     cpi->tpl_stats[frame].stride = mi_cols;
1644     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
1645     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
1646   }
1647 
1648   for (frame = 0; frame < REF_FRAMES; ++frame) {
1649     cpi->enc_frame_buf[frame].mem_valid = 0;
1650     cpi->enc_frame_buf[frame].released = 1;
1651   }
1652 }
1653 
vp9_free_tpl_buffer(VP9_COMP * cpi)1654 void vp9_free_tpl_buffer(VP9_COMP *cpi) {
1655   int frame;
1656 #if CONFIG_NON_GREEDY_MV
1657   vp9_free_motion_field_info(&cpi->motion_field_info);
1658   vpx_free(cpi->select_mv_arr);
1659 #endif
1660   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1661 #if CONFIG_NON_GREEDY_MV
1662     int rf_idx;
1663     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1664       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1665       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1666     }
1667 #endif
1668     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1669     cpi->tpl_stats[frame].is_valid = 0;
1670   }
1671   free_tpl_frame_stats_list(&cpi->tpl_gop_stats);
1672 }
1673 
1674 #if CONFIG_RATE_CTRL
accumulate_frame_tpl_stats(VP9_COMP * cpi)1675 static void accumulate_frame_tpl_stats(VP9_COMP *cpi) {
1676   VP9_COMMON *const cm = &cpi->common;
1677   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1678   int show_frame_count = 0;
1679   int frame_idx;
1680   // Accumulate tpl stats for each frame in the current group of picture.
1681   for (frame_idx = 1; frame_idx < gf_group->gf_group_size; ++frame_idx) {
1682     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1683     TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
1684     const int tpl_stride = tpl_frame->stride;
1685     int64_t intra_cost_base = 0;
1686     int64_t inter_cost_base = 0;
1687     int64_t mc_dep_cost_base = 0;
1688     int64_t mc_ref_cost_base = 0;
1689     int64_t mc_flow_base = 0;
1690     int row, col;
1691 
1692     if (!tpl_frame->is_valid) continue;
1693 
1694     for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
1695       for (col = 0; col < cm->mi_cols; ++col) {
1696         TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
1697         intra_cost_base += this_stats->intra_cost;
1698         inter_cost_base += this_stats->inter_cost;
1699         mc_dep_cost_base += this_stats->mc_dep_cost;
1700         mc_ref_cost_base += this_stats->mc_ref_cost;
1701         mc_flow_base += this_stats->mc_flow;
1702       }
1703     }
1704 
1705     cpi->tpl_stats_info[show_frame_count].intra_cost = intra_cost_base;
1706     cpi->tpl_stats_info[show_frame_count].inter_cost = inter_cost_base;
1707     cpi->tpl_stats_info[show_frame_count].mc_dep_cost = mc_dep_cost_base;
1708     cpi->tpl_stats_info[show_frame_count].mc_ref_cost = mc_ref_cost_base;
1709     cpi->tpl_stats_info[show_frame_count].mc_flow = mc_flow_base;
1710 
1711     ++show_frame_count;
1712   }
1713 }
1714 #endif  // CONFIG_RATE_CTRL
1715 
vp9_estimate_tpl_qp_gop(VP9_COMP * cpi)1716 void vp9_estimate_tpl_qp_gop(VP9_COMP *cpi) {
1717   int gop_length = cpi->twopass.gf_group.gf_group_size;
1718   int bottom_index, top_index;
1719   int idx;
1720   const int gf_index = cpi->twopass.gf_group.index;
1721   const int is_src_frame_alt_ref = cpi->rc.is_src_frame_alt_ref;
1722   const int refresh_frame_context = cpi->common.refresh_frame_context;
1723 
1724   for (idx = gf_index; idx <= gop_length; ++idx) {
1725     TplDepFrame *tpl_frame = &cpi->tpl_stats[idx];
1726     int target_rate = cpi->twopass.gf_group.bit_allocation[idx];
1727     cpi->twopass.gf_group.index = idx;
1728     vp9_rc_set_frame_target(cpi, target_rate);
1729     vp9_configure_buffer_updates(cpi, idx);
1730     if (cpi->ext_ratectrl.ready &&
1731         (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_QP) != 0 &&
1732         cpi->ext_ratectrl.funcs.get_encodeframe_decision != NULL) {
1733       VP9_COMMON *cm = &cpi->common;
1734       vpx_codec_err_t codec_status;
1735       const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1736       vpx_rc_encodeframe_decision_t encode_frame_decision;
1737       if (idx == gop_length) break;
1738       codec_status = vp9_extrc_get_encodeframe_decision(
1739           &cpi->ext_ratectrl, gf_group->index, &encode_frame_decision);
1740       if (codec_status != VPX_CODEC_OK) {
1741         vpx_internal_error(&cm->error, codec_status,
1742                            "vp9_extrc_get_encodeframe_decision() failed");
1743       }
1744       tpl_frame->base_qindex = encode_frame_decision.q_index;
1745     } else {
1746       tpl_frame->base_qindex = vp9_rc_pick_q_and_bounds_two_pass(
1747           cpi, &bottom_index, &top_index, idx);
1748       tpl_frame->base_qindex = VPXMAX(tpl_frame->base_qindex, 1);
1749     }
1750   }
1751   // Reset the actual index and frame update
1752   cpi->twopass.gf_group.index = gf_index;
1753   cpi->rc.is_src_frame_alt_ref = is_src_frame_alt_ref;
1754   cpi->common.refresh_frame_context = refresh_frame_context;
1755   vp9_configure_buffer_updates(cpi, gf_index);
1756 }
1757 
vp9_setup_tpl_stats(VP9_COMP * cpi)1758 void vp9_setup_tpl_stats(VP9_COMP *cpi) {
1759   GF_PICTURE gf_picture_buf[MAX_ARF_GOP_SIZE + REFS_PER_FRAME];
1760   GF_PICTURE *gf_picture = &gf_picture_buf[REFS_PER_FRAME];
1761   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1762   int tpl_group_frames = 0;
1763   int frame_idx;
1764   int extended_frame_count;
1765   cpi->tpl_bsize = BLOCK_32X32;
1766 
1767   memset(gf_picture_buf, 0, sizeof(gf_picture_buf));
1768   extended_frame_count =
1769       init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
1770 
1771   init_tpl_stats(cpi);
1772 
1773   init_tpl_stats_before_propagation(&cpi->common.error, &cpi->tpl_gop_stats,
1774                                     cpi->tpl_stats, tpl_group_frames,
1775                                     cpi->common.width, cpi->common.height);
1776 
1777   // Backward propagation from tpl_group_frames to 1.
1778   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
1779     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
1780     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
1781   }
1782 
1783   if (cpi->ext_ratectrl.ready &&
1784       cpi->ext_ratectrl.funcs.send_tpl_gop_stats != NULL) {
1785     // Intra search on key frame
1786     if (gf_picture[0].update_type != OVERLAY_UPDATE) {
1787       mc_flow_dispenser(cpi, gf_picture, 0, cpi->tpl_bsize);
1788     }
1789     // TPL stats has extra frames from next GOP. Trim those extra frames for
1790     // Qmode.
1791     trim_tpl_stats(&cpi->common.error, &cpi->tpl_gop_stats,
1792                    extended_frame_count);
1793     const vpx_codec_err_t codec_status =
1794         vp9_extrc_send_tpl_stats(&cpi->ext_ratectrl, &cpi->tpl_gop_stats);
1795     if (codec_status != VPX_CODEC_OK) {
1796       vpx_internal_error(&cpi->common.error, codec_status,
1797                          "vp9_extrc_send_tpl_stats() failed");
1798     }
1799   }
1800 
1801 #if CONFIG_NON_GREEDY_MV
1802   cpi->tpl_ready = 1;
1803 #if DUMP_TPL_STATS
1804   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
1805 #endif  // DUMP_TPL_STATS
1806 #endif  // CONFIG_NON_GREEDY_MV
1807 
1808 #if CONFIG_RATE_CTRL
1809   if (cpi->oxcf.use_simple_encode_api) {
1810     accumulate_frame_tpl_stats(cpi);
1811   }
1812 #endif  // CONFIG_RATE_CTRL
1813 }
1814