1 /*
2 * Copyright (c) 2023 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12
13 #include "./vpx_dsp_rtcd.h"
14 #if CONFIG_NON_GREEDY_MV
15 #include "vp9/common/vp9_mvref_common.h"
16 #endif
17 #include "vp9/common/vp9_reconinter.h"
18 #include "vp9/common/vp9_reconintra.h"
19 #include "vp9/common/vp9_scan.h"
20 #include "vp9/encoder/vp9_encoder.h"
21 #include "vp9/encoder/vp9_ext_ratectrl.h"
22 #include "vp9/encoder/vp9_firstpass.h"
23 #include "vp9/encoder/vp9_ratectrl.h"
24 #include "vp9/encoder/vp9_tpl_model.h"
25 #include "vpx/internal/vpx_codec_internal.h"
26 #include "vpx/vpx_codec.h"
27 #include "vpx/vpx_ext_ratectrl.h"
28
init_gop_frames_rc(VP9_COMP * cpi,GF_PICTURE * gf_picture,const GF_GROUP * gf_group,int * tpl_group_frames)29 static int init_gop_frames_rc(VP9_COMP *cpi, GF_PICTURE *gf_picture,
30 const GF_GROUP *gf_group, int *tpl_group_frames) {
31 VP9_COMMON *cm = &cpi->common;
32 int frame_idx = 0;
33 int i;
34 int extend_frame_count = 0;
35 int pframe_qindex = cpi->tpl_stats[2].base_qindex;
36 int frame_gop_offset = 0;
37
38 int added_overlay = 0;
39
40 RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
41 int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
42
43 memset(recon_frame_index, -1, sizeof(recon_frame_index));
44
45 for (i = 0; i < FRAME_BUFFERS; ++i) {
46 if (frame_bufs[i].ref_count == 0) {
47 alloc_frame_mvs(cm, i);
48 if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
49 cm->subsampling_x, cm->subsampling_y,
50 #if CONFIG_VP9_HIGHBITDEPTH
51 cm->use_highbitdepth,
52 #endif
53 VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
54 NULL, NULL, NULL))
55 vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
56 "Failed to allocate frame buffer");
57
58 recon_frame_index[frame_idx] = i;
59 ++frame_idx;
60
61 if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
62 }
63 }
64
65 for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
66 assert(recon_frame_index[i] >= 0);
67 cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
68 }
69
70 *tpl_group_frames = 0;
71
72 int ref_table[3];
73
74 if (gf_group->index == 1 && gf_group->update_type[1] == ARF_UPDATE) {
75 if (gf_group->update_type[0] == KF_UPDATE) {
76 // This is the only frame in ref buffer. We need it to be on
77 // gf_picture[0].
78 for (i = 0; i < 3; ++i) ref_table[i] = -REFS_PER_FRAME;
79
80 gf_picture[0].frame =
81 &cm->buffer_pool->frame_bufs[gf_group->update_ref_idx[0]].buf;
82 ref_table[gf_group->update_ref_idx[0]] = 0;
83
84 for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -REFS_PER_FRAME;
85 gf_picture[0].update_type = gf_group->update_type[0];
86 } else {
87 for (i = 0; i < REFS_PER_FRAME; i++) {
88 if (cm->ref_frame_map[i] != -1) {
89 gf_picture[-i].frame =
90 &cm->buffer_pool->frame_bufs[cm->ref_frame_map[i]].buf;
91 ref_table[i] = -i;
92 } else {
93 ref_table[i] = -REFS_PER_FRAME;
94 }
95 }
96 for (i = 0; i < 3; ++i) {
97 gf_picture[0].ref_frame[i] = ref_table[i];
98 }
99 }
100 ++*tpl_group_frames;
101
102 // Initialize base layer ARF frame
103 gf_picture[1].frame = cpi->Source;
104 for (i = 0; i < 3; ++i) gf_picture[1].ref_frame[i] = ref_table[i];
105 gf_picture[1].update_type = gf_group->update_type[1];
106 ref_table[gf_group->update_ref_idx[1]] = 1;
107
108 ++*tpl_group_frames;
109 } else {
110 assert(gf_group->index == 0);
111 if (gf_group->update_type[0] == KF_UPDATE) {
112 // This is the only frame in ref buffer. We need it to be on
113 // gf_picture[0].
114 gf_picture[0].frame = cpi->Source;
115 for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -REFS_PER_FRAME;
116 gf_picture[0].update_type = gf_group->update_type[0];
117
118 for (i = 0; i < 3; ++i) ref_table[i] = -REFS_PER_FRAME;
119 ref_table[gf_group->update_ref_idx[0]] = 0;
120 } else {
121 // Initialize ref table
122 for (i = 0; i < REFS_PER_FRAME; i++) {
123 if (cm->ref_frame_map[i] != -1) {
124 gf_picture[-i].frame =
125 &cm->buffer_pool->frame_bufs[cm->ref_frame_map[i]].buf;
126 ref_table[i] = -i;
127 } else {
128 ref_table[i] = -REFS_PER_FRAME;
129 }
130 }
131 for (i = 0; i < 3; ++i) {
132 gf_picture[0].ref_frame[i] = ref_table[i];
133 }
134 gf_picture[0].update_type = gf_group->update_type[0];
135 if (gf_group->update_type[0] != OVERLAY_UPDATE &&
136 gf_group->update_ref_idx[0] != -1) {
137 ref_table[gf_group->update_ref_idx[0]] = 0;
138 }
139 }
140 ++*tpl_group_frames;
141 }
142
143 int has_arf =
144 gf_group->gf_group_size > 1 && gf_group->update_type[1] == ARF_UPDATE &&
145 gf_group->update_type[gf_group->gf_group_size] == OVERLAY_UPDATE;
146
147 // Initialize P frames
148 for (frame_idx = *tpl_group_frames; frame_idx < MAX_ARF_GOP_SIZE;
149 ++frame_idx) {
150 if (frame_idx >= gf_group->gf_group_size && !has_arf) break;
151 struct lookahead_entry *buf;
152 frame_gop_offset = gf_group->frame_gop_index[frame_idx];
153 buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
154
155 if (buf == NULL) break;
156
157 gf_picture[frame_idx].frame = &buf->img;
158 for (i = 0; i < 3; ++i) {
159 gf_picture[frame_idx].ref_frame[i] = ref_table[i];
160 }
161
162 if (gf_group->update_type[frame_idx] != OVERLAY_UPDATE &&
163 gf_group->update_ref_idx[frame_idx] != -1) {
164 ref_table[gf_group->update_ref_idx[frame_idx]] = frame_idx;
165 }
166
167 gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
168
169 ++*tpl_group_frames;
170
171 // The length of group of pictures is baseline_gf_interval, plus the
172 // beginning golden frame from last GOP, plus the last overlay frame in
173 // the same GOP.
174 if (frame_idx == gf_group->gf_group_size) {
175 added_overlay = 1;
176
177 ++frame_idx;
178 ++frame_gop_offset;
179 break;
180 }
181
182 if (frame_idx == gf_group->gf_group_size - 1 &&
183 gf_group->update_type[gf_group->gf_group_size] != OVERLAY_UPDATE) {
184 ++frame_idx;
185 ++frame_gop_offset;
186 break;
187 }
188 }
189
190 int lst_index = frame_idx - 1;
191 // Extend two frames outside the current gf group.
192 for (; has_arf && frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2;
193 ++frame_idx) {
194 struct lookahead_entry *buf =
195 vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
196
197 if (buf == NULL) break;
198
199 cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
200
201 gf_picture[frame_idx].frame = &buf->img;
202 gf_picture[frame_idx].ref_frame[0] = gf_picture[lst_index].ref_frame[0];
203 gf_picture[frame_idx].ref_frame[1] = gf_picture[lst_index].ref_frame[1];
204 gf_picture[frame_idx].ref_frame[2] = gf_picture[lst_index].ref_frame[2];
205
206 if (gf_picture[frame_idx].ref_frame[0] >
207 gf_picture[frame_idx].ref_frame[1] &&
208 gf_picture[frame_idx].ref_frame[0] >
209 gf_picture[frame_idx].ref_frame[2]) {
210 gf_picture[frame_idx].ref_frame[0] = lst_index;
211 } else if (gf_picture[frame_idx].ref_frame[1] >
212 gf_picture[frame_idx].ref_frame[0] &&
213 gf_picture[frame_idx].ref_frame[1] >
214 gf_picture[frame_idx].ref_frame[2]) {
215 gf_picture[frame_idx].ref_frame[1] = lst_index;
216 } else {
217 gf_picture[frame_idx].ref_frame[2] = lst_index;
218 }
219
220 gf_picture[frame_idx].update_type = LF_UPDATE;
221 lst_index = frame_idx;
222 ++*tpl_group_frames;
223 ++extend_frame_count;
224 ++frame_gop_offset;
225 }
226
227 return extend_frame_count + added_overlay;
228 }
229
init_gop_frames(VP9_COMP * cpi,GF_PICTURE * gf_picture,const GF_GROUP * gf_group,int * tpl_group_frames)230 static int init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
231 const GF_GROUP *gf_group, int *tpl_group_frames) {
232 if (cpi->ext_ratectrl.ready &&
233 (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_GOP) != 0) {
234 return init_gop_frames_rc(cpi, gf_picture, gf_group, tpl_group_frames);
235 }
236
237 VP9_COMMON *cm = &cpi->common;
238 int frame_idx = 0;
239 int i;
240 int gld_index = -1;
241 int alt_index = -2;
242 int lst_index = -1;
243 int arf_index_stack[MAX_ARF_LAYERS];
244 int arf_stack_size = 0;
245 int extend_frame_count = 0;
246 int pframe_qindex = cpi->tpl_stats[2].base_qindex;
247 int frame_gop_offset = 0;
248
249 RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
250 int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
251
252 memset(recon_frame_index, -1, sizeof(recon_frame_index));
253 stack_init(arf_index_stack, MAX_ARF_LAYERS);
254
255 for (i = 0; i < FRAME_BUFFERS; ++i) {
256 if (frame_bufs[i].ref_count == 0) {
257 alloc_frame_mvs(cm, i);
258 if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
259 cm->subsampling_x, cm->subsampling_y,
260 #if CONFIG_VP9_HIGHBITDEPTH
261 cm->use_highbitdepth,
262 #endif
263 VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
264 NULL, NULL, NULL))
265 vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
266 "Failed to allocate frame buffer");
267
268 recon_frame_index[frame_idx] = i;
269 ++frame_idx;
270
271 if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
272 }
273 }
274
275 for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
276 assert(recon_frame_index[i] >= 0);
277 cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
278 }
279
280 *tpl_group_frames = 0;
281
282 // Initialize Golden reference frame.
283 gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
284 for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -REFS_PER_FRAME;
285 gf_picture[0].update_type = gf_group->update_type[0];
286 gld_index = 0;
287 ++*tpl_group_frames;
288
289 gf_picture[-1].frame = get_ref_frame_buffer(cpi, LAST_FRAME);
290 gf_picture[-2].frame = get_ref_frame_buffer(cpi, ALTREF_FRAME);
291
292 // Initialize base layer ARF frame
293 gf_picture[1].frame = cpi->Source;
294 gf_picture[1].ref_frame[0] = gld_index;
295 gf_picture[1].ref_frame[1] = lst_index;
296 gf_picture[1].ref_frame[2] = alt_index;
297 gf_picture[1].update_type = gf_group->update_type[1];
298 alt_index = 1;
299 ++*tpl_group_frames;
300
301 // Initialize P frames
302 for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
303 struct lookahead_entry *buf;
304 frame_gop_offset = gf_group->frame_gop_index[frame_idx];
305 buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
306
307 if (buf == NULL) break;
308
309 gf_picture[frame_idx].frame = &buf->img;
310 gf_picture[frame_idx].ref_frame[0] = gld_index;
311 gf_picture[frame_idx].ref_frame[1] = lst_index;
312 gf_picture[frame_idx].ref_frame[2] = alt_index;
313 gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
314
315 switch (gf_group->update_type[frame_idx]) {
316 case ARF_UPDATE:
317 stack_push(arf_index_stack, alt_index, arf_stack_size);
318 ++arf_stack_size;
319 alt_index = frame_idx;
320 break;
321 case LF_UPDATE: lst_index = frame_idx; break;
322 case OVERLAY_UPDATE:
323 gld_index = frame_idx;
324 alt_index = stack_pop(arf_index_stack, arf_stack_size);
325 --arf_stack_size;
326 break;
327 case USE_BUF_FRAME:
328 lst_index = alt_index;
329 alt_index = stack_pop(arf_index_stack, arf_stack_size);
330 --arf_stack_size;
331 break;
332 default: break;
333 }
334
335 ++*tpl_group_frames;
336
337 // The length of group of pictures is baseline_gf_interval, plus the
338 // beginning golden frame from last GOP, plus the last overlay frame in
339 // the same GOP.
340 if (frame_idx == gf_group->gf_group_size) break;
341 }
342
343 alt_index = -1;
344 ++frame_idx;
345 ++frame_gop_offset;
346
347 // Extend two frames outside the current gf group.
348 for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
349 struct lookahead_entry *buf =
350 vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
351
352 if (buf == NULL) break;
353
354 cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
355
356 gf_picture[frame_idx].frame = &buf->img;
357 gf_picture[frame_idx].ref_frame[0] = gld_index;
358 gf_picture[frame_idx].ref_frame[1] = lst_index;
359 gf_picture[frame_idx].ref_frame[2] = alt_index;
360 gf_picture[frame_idx].update_type = LF_UPDATE;
361 lst_index = frame_idx;
362 ++*tpl_group_frames;
363 ++extend_frame_count;
364 ++frame_gop_offset;
365 }
366
367 return extend_frame_count;
368 }
369
init_tpl_stats(VP9_COMP * cpi)370 static void init_tpl_stats(VP9_COMP *cpi) {
371 int frame_idx;
372 for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
373 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
374 memset(tpl_frame->tpl_stats_ptr, 0,
375 tpl_frame->height * tpl_frame->width *
376 sizeof(*tpl_frame->tpl_stats_ptr));
377 tpl_frame->is_valid = 0;
378 }
379 }
380
free_tpl_frame_stats_list(VpxTplGopStats * tpl_gop_stats)381 static void free_tpl_frame_stats_list(VpxTplGopStats *tpl_gop_stats) {
382 int frame_idx;
383 for (frame_idx = 0; frame_idx < tpl_gop_stats->size; ++frame_idx) {
384 vpx_free(tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list);
385 }
386 vpx_free(tpl_gop_stats->frame_stats_list);
387 }
388
init_tpl_stats_before_propagation(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,TplDepFrame * tpl_stats,int tpl_gop_frames,int frame_width,int frame_height)389 static void init_tpl_stats_before_propagation(
390 struct vpx_internal_error_info *error_info, VpxTplGopStats *tpl_gop_stats,
391 TplDepFrame *tpl_stats, int tpl_gop_frames, int frame_width,
392 int frame_height) {
393 int frame_idx;
394 free_tpl_frame_stats_list(tpl_gop_stats);
395 CHECK_MEM_ERROR(
396 error_info, tpl_gop_stats->frame_stats_list,
397 vpx_calloc(tpl_gop_frames, sizeof(*tpl_gop_stats->frame_stats_list)));
398 tpl_gop_stats->size = tpl_gop_frames;
399 for (frame_idx = 0; frame_idx < tpl_gop_frames; ++frame_idx) {
400 const int mi_rows = tpl_stats[frame_idx].mi_rows;
401 const int mi_cols = tpl_stats[frame_idx].mi_cols;
402 CHECK_MEM_ERROR(
403 error_info, tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list,
404 vpx_calloc(
405 mi_rows * mi_cols,
406 sizeof(
407 *tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list)));
408 tpl_gop_stats->frame_stats_list[frame_idx].num_blocks = mi_rows * mi_cols;
409 tpl_gop_stats->frame_stats_list[frame_idx].frame_width = frame_width;
410 tpl_gop_stats->frame_stats_list[frame_idx].frame_height = frame_height;
411 }
412 }
413
414 #if CONFIG_NON_GREEDY_MV
full_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,int mi_row,int mi_col,MV * mv)415 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
416 MotionField *motion_field,
417 int frame_idx, uint8_t *cur_frame_buf,
418 uint8_t *ref_frame_buf, int stride,
419 BLOCK_SIZE bsize, int mi_row,
420 int mi_col, MV *mv) {
421 MACROBLOCK *const x = &td->mb;
422 MACROBLOCKD *const xd = &x->e_mbd;
423 MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
424 int step_param;
425 uint32_t bestsme = UINT_MAX;
426 const MvLimits tmp_mv_limits = x->mv_limits;
427 // lambda is used to adjust the importance of motion vector consistency.
428 // TODO(angiebird): Figure out lambda's proper value.
429 const int lambda = cpi->tpl_stats[frame_idx].lambda;
430 int_mv nb_full_mvs[NB_MVS_NUM];
431 int nb_full_mv_num;
432
433 MV best_ref_mv1 = { 0, 0 };
434 MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
435
436 best_ref_mv1_full.col = best_ref_mv1.col >> 3;
437 best_ref_mv1_full.row = best_ref_mv1.row >> 3;
438
439 // Setup frame pointers
440 x->plane[0].src.buf = cur_frame_buf;
441 x->plane[0].src.stride = stride;
442 xd->plane[0].pre[0].buf = ref_frame_buf;
443 xd->plane[0].pre[0].stride = stride;
444
445 step_param = mv_sf->reduce_first_step_size;
446 step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
447
448 vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
449
450 nb_full_mv_num =
451 vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
452 vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
453 lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
454
455 /* restore UMV window */
456 x->mv_limits = tmp_mv_limits;
457
458 return bestsme;
459 }
460
sub_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)461 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
462 uint8_t *cur_frame_buf,
463 uint8_t *ref_frame_buf, int stride,
464 BLOCK_SIZE bsize, MV *mv) {
465 MACROBLOCK *const x = &td->mb;
466 MACROBLOCKD *const xd = &x->e_mbd;
467 MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
468 uint32_t bestsme = UINT_MAX;
469 uint32_t distortion;
470 uint32_t sse;
471 int cost_list[5];
472
473 MV best_ref_mv1 = { 0, 0 };
474
475 // Setup frame pointers
476 x->plane[0].src.buf = cur_frame_buf;
477 x->plane[0].src.stride = stride;
478 xd->plane[0].pre[0].buf = ref_frame_buf;
479 xd->plane[0].pre[0].stride = stride;
480
481 // TODO(yunqing): may use higher tap interp filter than 2 taps.
482 // Ignore mv costing by sending NULL pointer instead of cost array
483 bestsme = cpi->find_fractional_mv_step(
484 x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
485 &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
486 cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
487 USE_2_TAPS);
488
489 return bestsme;
490 }
491
492 #else // CONFIG_NON_GREEDY_MV
motion_compensated_prediction(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)493 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
494 uint8_t *cur_frame_buf,
495 uint8_t *ref_frame_buf,
496 int stride, BLOCK_SIZE bsize,
497 MV *mv) {
498 MACROBLOCK *const x = &td->mb;
499 MACROBLOCKD *const xd = &x->e_mbd;
500 MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
501 const SEARCH_METHODS search_method = NSTEP;
502 int step_param;
503 int sadpb = x->sadperbit16;
504 uint32_t bestsme = UINT_MAX;
505 uint32_t distortion;
506 uint32_t sse;
507 int cost_list[5];
508 const MvLimits tmp_mv_limits = x->mv_limits;
509
510 MV best_ref_mv1 = { 0, 0 };
511 MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
512
513 best_ref_mv1_full.col = best_ref_mv1.col >> 3;
514 best_ref_mv1_full.row = best_ref_mv1.row >> 3;
515
516 // Setup frame pointers
517 x->plane[0].src.buf = cur_frame_buf;
518 x->plane[0].src.stride = stride;
519 xd->plane[0].pre[0].buf = ref_frame_buf;
520 xd->plane[0].pre[0].stride = stride;
521
522 step_param = mv_sf->reduce_first_step_size;
523 step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
524
525 vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
526
527 vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
528 search_method, sadpb, cond_cost_list(cpi, cost_list),
529 &best_ref_mv1, mv, 0, 0);
530
531 /* restore UMV window */
532 x->mv_limits = tmp_mv_limits;
533
534 // TODO(yunqing): may use higher tap interp filter than 2 taps.
535 // Ignore mv costing by sending NULL pointer instead of cost array
536 bestsme = cpi->find_fractional_mv_step(
537 x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
538 &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
539 cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
540 USE_2_TAPS);
541
542 return bestsme;
543 }
544 #endif
545
get_overlap_area(int grid_pos_row,int grid_pos_col,int ref_pos_row,int ref_pos_col,int block,BLOCK_SIZE bsize)546 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
547 int ref_pos_col, int block, BLOCK_SIZE bsize) {
548 int width = 0, height = 0;
549 int bw = 4 << b_width_log2_lookup[bsize];
550 int bh = 4 << b_height_log2_lookup[bsize];
551
552 switch (block) {
553 case 0:
554 width = grid_pos_col + bw - ref_pos_col;
555 height = grid_pos_row + bh - ref_pos_row;
556 break;
557 case 1:
558 width = ref_pos_col + bw - grid_pos_col;
559 height = grid_pos_row + bh - ref_pos_row;
560 break;
561 case 2:
562 width = grid_pos_col + bw - ref_pos_col;
563 height = ref_pos_row + bh - grid_pos_row;
564 break;
565 case 3:
566 width = ref_pos_col + bw - grid_pos_col;
567 height = ref_pos_row + bh - grid_pos_row;
568 break;
569 default: assert(0);
570 }
571
572 return width * height;
573 }
574
round_floor(int ref_pos,int bsize_pix)575 static int round_floor(int ref_pos, int bsize_pix) {
576 int round;
577 if (ref_pos < 0)
578 round = -(1 + (-ref_pos - 1) / bsize_pix);
579 else
580 round = ref_pos / bsize_pix;
581
582 return round;
583 }
584
tpl_model_store(TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int stride)585 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
586 BLOCK_SIZE bsize, int stride) {
587 const int mi_height = num_8x8_blocks_high_lookup[bsize];
588 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
589 const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
590 int idx, idy;
591
592 for (idy = 0; idy < mi_height; ++idy) {
593 for (idx = 0; idx < mi_width; ++idx) {
594 TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
595 const int64_t mc_flow = tpl_ptr->mc_flow;
596 const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
597 *tpl_ptr = *src_stats;
598 tpl_ptr->mc_flow = mc_flow;
599 tpl_ptr->mc_ref_cost = mc_ref_cost;
600 tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
601 }
602 }
603 }
604
tpl_store_before_propagation(VpxTplBlockStats * tpl_block_stats,TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int src_stride,int64_t recon_error,int64_t rate_cost,int ref_frame_idx,int mi_rows,int mi_cols)605 static void tpl_store_before_propagation(VpxTplBlockStats *tpl_block_stats,
606 TplDepStats *tpl_stats, int mi_row,
607 int mi_col, BLOCK_SIZE bsize,
608 int src_stride, int64_t recon_error,
609 int64_t rate_cost, int ref_frame_idx,
610 int mi_rows, int mi_cols) {
611 const int mi_height = num_8x8_blocks_high_lookup[bsize];
612 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
613 const TplDepStats *src_stats = &tpl_stats[mi_row * src_stride + mi_col];
614 int idx, idy;
615
616 for (idy = 0; idy < mi_height; ++idy) {
617 for (idx = 0; idx < mi_width; ++idx) {
618 if (mi_row + idy >= mi_rows || mi_col + idx >= mi_cols) continue;
619 VpxTplBlockStats *tpl_block_stats_ptr =
620 &tpl_block_stats[(mi_row + idy) * mi_cols + mi_col + idx];
621 tpl_block_stats_ptr->row = mi_row * 8 + idy * 8;
622 tpl_block_stats_ptr->col = mi_col * 8 + idx * 8;
623 tpl_block_stats_ptr->inter_cost = src_stats->inter_cost;
624 tpl_block_stats_ptr->intra_cost = src_stats->intra_cost;
625 // inter/intra_cost here is calculated with SATD which should be close
626 // enough to be used as inter/intra_pred_error
627 tpl_block_stats_ptr->inter_pred_err = src_stats->inter_cost;
628 tpl_block_stats_ptr->intra_pred_err = src_stats->intra_cost;
629 tpl_block_stats_ptr->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2;
630 tpl_block_stats_ptr->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
631 tpl_block_stats_ptr->mv_r = src_stats->mv.as_mv.row;
632 tpl_block_stats_ptr->mv_c = src_stats->mv.as_mv.col;
633 tpl_block_stats_ptr->ref_frame_index = ref_frame_idx;
634 }
635 }
636 }
637
tpl_model_update_b(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)638 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
639 int mi_row, int mi_col, const BLOCK_SIZE bsize) {
640 if (tpl_stats->ref_frame_index < 0) return;
641
642 TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
643 TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
644 MV mv = tpl_stats->mv.as_mv;
645 int mv_row = mv.row >> 3;
646 int mv_col = mv.col >> 3;
647
648 int ref_pos_row = mi_row * MI_SIZE + mv_row;
649 int ref_pos_col = mi_col * MI_SIZE + mv_col;
650
651 const int bw = 4 << b_width_log2_lookup[bsize];
652 const int bh = 4 << b_height_log2_lookup[bsize];
653 const int mi_height = num_8x8_blocks_high_lookup[bsize];
654 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
655 const int pix_num = bw * bh;
656
657 // top-left on grid block location in pixel
658 int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
659 int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
660 int block;
661
662 for (block = 0; block < 4; ++block) {
663 int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
664 int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
665
666 if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
667 grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
668 int overlap_area = get_overlap_area(
669 grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
670 int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
671 int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
672
673 int64_t mc_flow = tpl_stats->mc_dep_cost -
674 (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
675 tpl_stats->intra_cost;
676
677 int idx, idy;
678
679 for (idy = 0; idy < mi_height; ++idy) {
680 for (idx = 0; idx < mi_width; ++idx) {
681 TplDepStats *des_stats =
682 &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
683 (ref_mi_col + idx)];
684
685 des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
686 des_stats->mc_ref_cost +=
687 ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
688 pix_num;
689 assert(overlap_area >= 0);
690 }
691 }
692 }
693 }
694 }
695
tpl_model_update(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)696 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
697 int mi_row, int mi_col, const BLOCK_SIZE bsize) {
698 int idx, idy;
699 const int mi_height = num_8x8_blocks_high_lookup[bsize];
700 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
701
702 for (idy = 0; idy < mi_height; ++idy) {
703 for (idx = 0; idx < mi_width; ++idx) {
704 TplDepStats *tpl_ptr =
705 &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
706 tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
707 BLOCK_8X8);
708 }
709 }
710 }
711
get_quantize_error(MACROBLOCK * x,int plane,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,TX_SIZE tx_size,int64_t * recon_error,int64_t * sse,uint16_t * eob)712 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
713 tran_low_t *qcoeff, tran_low_t *dqcoeff,
714 TX_SIZE tx_size, int64_t *recon_error,
715 int64_t *sse, uint16_t *eob) {
716 MACROBLOCKD *const xd = &x->e_mbd;
717 const struct macroblock_plane *const p = &x->plane[plane];
718 const struct macroblockd_plane *const pd = &xd->plane[plane];
719 const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
720 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
721 const int shift = tx_size == TX_32X32 ? 0 : 2;
722
723 // skip block condition should be handled before this is called.
724 assert(!x->skip_block);
725
726 #if CONFIG_VP9_HIGHBITDEPTH
727 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
728 vp9_highbd_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff,
729 pd->dequant, eob, scan_order);
730 } else {
731 vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
732 scan_order);
733 }
734 #else
735 vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
736 scan_order);
737 #endif // CONFIG_VP9_HIGHBITDEPTH
738
739 *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
740 *recon_error = VPXMAX(*recon_error, 1);
741
742 *sse = (*sse) >> shift;
743 *sse = VPXMAX(*sse, 1);
744 }
745
746 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)747 void vp9_highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
748 TX_SIZE tx_size) {
749 // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
750 switch (tx_size) {
751 case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
752 case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
753 case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
754 default: assert(0);
755 }
756 }
757 #endif // CONFIG_VP9_HIGHBITDEPTH
758
vp9_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)759 void vp9_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
760 TX_SIZE tx_size) {
761 switch (tx_size) {
762 case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
763 case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
764 case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
765 default: assert(0);
766 }
767 }
768
set_mv_limits(const VP9_COMMON * cm,MACROBLOCK * x,int mi_row,int mi_col)769 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
770 int mi_col) {
771 x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
772 x->mv_limits.row_max =
773 (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
774 x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
775 x->mv_limits.col_max =
776 ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
777 }
778
rate_estimator(const tran_low_t * qcoeff,int eob,TX_SIZE tx_size)779 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
780 const ScanOrder *const scan_order = &vp9_scan_orders[tx_size][DCT_DCT];
781 int rate_cost = 1;
782 int idx;
783 assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
784 for (idx = 0; idx < eob; ++idx) {
785 unsigned int abs_level = abs(qcoeff[scan_order->scan[idx]]);
786 rate_cost += get_msb(abs_level + 1) + 1 + (abs_level > 0);
787 }
788
789 return (rate_cost << VP9_PROB_COST_SHIFT);
790 }
791
mode_estimation(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,struct scale_factors * sf,GF_PICTURE * gf_picture,int frame_idx,TplDepFrame * tpl_frame,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,int mi_row,int mi_col,BLOCK_SIZE bsize,TX_SIZE tx_size,YV12_BUFFER_CONFIG * ref_frame[],uint8_t * predictor,int64_t * recon_error,int64_t * rate_cost,int64_t * sse,int * ref_frame_idx)792 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
793 struct scale_factors *sf, GF_PICTURE *gf_picture,
794 int frame_idx, TplDepFrame *tpl_frame,
795 int16_t *src_diff, tran_low_t *coeff,
796 tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
797 int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
798 YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
799 int64_t *recon_error, int64_t *rate_cost,
800 int64_t *sse, int *ref_frame_idx) {
801 VP9_COMMON *cm = &cpi->common;
802 ThreadData *td = &cpi->td;
803
804 const int bw = 4 << b_width_log2_lookup[bsize];
805 const int bh = 4 << b_height_log2_lookup[bsize];
806 const int pix_num = bw * bh;
807 int best_rf_idx = -1;
808 int_mv best_mv;
809 int64_t best_inter_cost = INT64_MAX;
810 int64_t inter_cost;
811 int rf_idx;
812 const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
813
814 int64_t best_intra_cost = INT64_MAX;
815 int64_t intra_cost;
816 PREDICTION_MODE mode;
817 int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
818 MODE_INFO mi_above, mi_left;
819 const int mi_height = num_8x8_blocks_high_lookup[bsize];
820 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
821 TplDepStats *tpl_stats =
822 &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
823
824 xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
825 xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
826 xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
827 xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
828 xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
829 xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
830
831 // Intra prediction search
832 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
833 uint8_t *src, *dst;
834 int src_stride, dst_stride;
835
836 src = xd->cur_buf->y_buffer + mb_y_offset;
837 src_stride = xd->cur_buf->y_stride;
838
839 dst = &predictor[0];
840 dst_stride = bw;
841
842 xd->mi[0]->sb_type = bsize;
843 xd->mi[0]->ref_frame[0] = INTRA_FRAME;
844
845 vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
846 src_stride, dst, dst_stride, 0, 0, 0);
847
848 #if CONFIG_VP9_HIGHBITDEPTH
849 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
850 vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
851 dst_stride, xd->bd);
852 vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
853 intra_cost = vpx_highbd_satd(coeff, pix_num);
854 } else {
855 vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
856 dst_stride);
857 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
858 intra_cost = vpx_satd(coeff, pix_num);
859 }
860 #else
861 vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
862 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
863 intra_cost = vpx_satd(coeff, pix_num);
864 #endif // CONFIG_VP9_HIGHBITDEPTH
865
866 if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
867 }
868
869 // Motion compensated prediction
870 best_mv.as_int = 0;
871
872 set_mv_limits(cm, x, mi_row, mi_col);
873
874 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
875 int_mv mv;
876 #if CONFIG_NON_GREEDY_MV
877 MotionField *motion_field;
878 #endif
879 if (ref_frame[rf_idx] == NULL) continue;
880
881 #if CONFIG_NON_GREEDY_MV
882 (void)td;
883 motion_field = vp9_motion_field_info_get_motion_field(
884 &cpi->motion_field_info, frame_idx, rf_idx, bsize);
885 mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
886 #else
887 motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
888 ref_frame[rf_idx]->y_buffer + mb_y_offset,
889 xd->cur_buf->y_stride, bsize, &mv.as_mv);
890 #endif
891
892 #if CONFIG_VP9_HIGHBITDEPTH
893 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
894 vp9_highbd_build_inter_predictor(
895 CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
896 ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
897 &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
898 mi_row * MI_SIZE, xd->bd);
899 vpx_highbd_subtract_block(
900 bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
901 xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
902 vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
903 inter_cost = vpx_highbd_satd(coeff, pix_num);
904 } else {
905 vp9_build_inter_predictor(
906 ref_frame[rf_idx]->y_buffer + mb_y_offset,
907 ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
908 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
909 vpx_subtract_block(bh, bw, src_diff, bw,
910 xd->cur_buf->y_buffer + mb_y_offset,
911 xd->cur_buf->y_stride, &predictor[0], bw);
912 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
913 inter_cost = vpx_satd(coeff, pix_num);
914 }
915 #else
916 vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
917 ref_frame[rf_idx]->y_stride, &predictor[0], bw,
918 &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
919 mi_col * MI_SIZE, mi_row * MI_SIZE);
920 vpx_subtract_block(bh, bw, src_diff, bw,
921 xd->cur_buf->y_buffer + mb_y_offset,
922 xd->cur_buf->y_stride, &predictor[0], bw);
923 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
924 inter_cost = vpx_satd(coeff, pix_num);
925 #endif
926
927 if (inter_cost < best_inter_cost) {
928 uint16_t eob = 0;
929 best_rf_idx = rf_idx;
930 best_inter_cost = inter_cost;
931 best_mv.as_int = mv.as_int;
932 // Since best_inter_cost is initialized as INT64_MAX, recon_error and
933 // rate_cost will be calculated with the best reference frame.
934 get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
935 sse, &eob);
936 *rate_cost = rate_estimator(qcoeff, eob, tx_size);
937 }
938 }
939 best_intra_cost = VPXMAX(best_intra_cost, 1);
940 best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
941 tpl_stats->inter_cost = VPXMAX(
942 1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
943 tpl_stats->intra_cost = VPXMAX(
944 1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
945 if (best_rf_idx >= 0) {
946 tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
947 }
948 tpl_stats->mv.as_int = best_mv.as_int;
949 *ref_frame_idx = best_rf_idx;
950 }
951
952 #if CONFIG_NON_GREEDY_MV
get_block_src_pred_buf(MACROBLOCKD * xd,GF_PICTURE * gf_picture,int frame_idx,int rf_idx,int mi_row,int mi_col,struct buf_2d * src,struct buf_2d * pre)953 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
954 int frame_idx, int rf_idx, int mi_row,
955 int mi_col, struct buf_2d *src,
956 struct buf_2d *pre) {
957 const int mb_y_offset =
958 mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
959 YV12_BUFFER_CONFIG *ref_frame = NULL;
960 int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
961 if (ref_frame_idx != -1) {
962 ref_frame = gf_picture[ref_frame_idx].frame;
963 src->buf = xd->cur_buf->y_buffer + mb_y_offset;
964 src->stride = xd->cur_buf->y_stride;
965 pre->buf = ref_frame->y_buffer + mb_y_offset;
966 pre->stride = ref_frame->y_stride;
967 assert(src->stride == pre->stride);
968 return 1;
969 } else {
970 printf("invalid ref_frame_idx");
971 assert(ref_frame_idx != -1);
972 return 0;
973 }
974 }
975
976 #define kMvPreCheckLines 5
977 #define kMvPreCheckSize 15
978
979 #define MV_REF_POS_NUM 3
980 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
981 { -1, 0 },
982 { 0, -1 },
983 { -1, -1 },
984 };
985
get_select_mv(VP9_COMP * cpi,TplDepFrame * tpl_frame,int mi_row,int mi_col)986 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
987 int mi_col) {
988 return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
989 }
990
find_ref_mv(int mv_mode,VP9_COMP * cpi,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)991 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
992 BLOCK_SIZE bsize, int mi_row, int mi_col) {
993 int i;
994 const int mi_height = num_8x8_blocks_high_lookup[bsize];
995 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
996 int_mv nearest_mv, near_mv, invalid_mv;
997 nearest_mv.as_int = INVALID_MV;
998 near_mv.as_int = INVALID_MV;
999 invalid_mv.as_int = INVALID_MV;
1000 for (i = 0; i < MV_REF_POS_NUM; ++i) {
1001 int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
1002 int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
1003 assert(mv_ref_pos[i].row <= 0);
1004 assert(mv_ref_pos[i].col <= 0);
1005 if (nb_row >= 0 && nb_col >= 0) {
1006 if (nearest_mv.as_int == INVALID_MV) {
1007 nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
1008 } else {
1009 int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
1010 if (mv.as_int == nearest_mv.as_int) {
1011 continue;
1012 } else {
1013 near_mv = mv;
1014 break;
1015 }
1016 }
1017 }
1018 }
1019 if (nearest_mv.as_int == INVALID_MV) {
1020 nearest_mv.as_mv.row = 0;
1021 nearest_mv.as_mv.col = 0;
1022 }
1023 if (near_mv.as_int == INVALID_MV) {
1024 near_mv.as_mv.row = 0;
1025 near_mv.as_mv.col = 0;
1026 }
1027 if (mv_mode == NEAREST_MV_MODE) {
1028 return nearest_mv;
1029 }
1030 if (mv_mode == NEAR_MV_MODE) {
1031 return near_mv;
1032 }
1033 assert(0);
1034 return invalid_mv;
1035 }
1036
get_mv_from_mv_mode(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1037 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
1038 MotionField *motion_field,
1039 TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
1040 int mi_row, int mi_col) {
1041 int_mv mv;
1042 switch (mv_mode) {
1043 case ZERO_MV_MODE:
1044 mv.as_mv.row = 0;
1045 mv.as_mv.col = 0;
1046 break;
1047 case NEW_MV_MODE:
1048 mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
1049 break;
1050 case NEAREST_MV_MODE:
1051 mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
1052 break;
1053 case NEAR_MV_MODE:
1054 mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
1055 break;
1056 default:
1057 mv.as_int = INVALID_MV;
1058 assert(0);
1059 break;
1060 }
1061 return mv;
1062 }
1063
get_mv_dist(int mv_mode,VP9_COMP * cpi,MACROBLOCKD * xd,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)1064 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
1065 GF_PICTURE *gf_picture, MotionField *motion_field,
1066 int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
1067 BLOCK_SIZE bsize, int mi_row, int mi_col,
1068 int_mv *mv) {
1069 uint32_t sse;
1070 struct buf_2d src;
1071 struct buf_2d pre;
1072 MV full_mv;
1073 *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
1074 mi_row, mi_col);
1075 full_mv = get_full_mv(&mv->as_mv);
1076 if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
1077 &src, &pre)) {
1078 // TODO(angiebird): Consider subpixel when computing the sse.
1079 cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
1080 pre.stride, &sse);
1081 return (double)(sse << VP9_DIST_SCALE_LOG2);
1082 } else {
1083 assert(0);
1084 return 0;
1085 }
1086 }
1087
get_mv_mode_cost(int mv_mode)1088 static int get_mv_mode_cost(int mv_mode) {
1089 // TODO(angiebird): The probabilities are roughly inferred from
1090 // default_inter_mode_probs. Check if there is a better way to set the
1091 // probabilities.
1092 const int zero_mv_prob = 16;
1093 const int new_mv_prob = 24 * 1;
1094 const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
1095 assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
1096 switch (mv_mode) {
1097 case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
1098 case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
1099 case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
1100 case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
1101 default: assert(0); return -1;
1102 }
1103 }
1104
get_mv_diff_cost(MV * new_mv,MV * ref_mv)1105 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
1106 double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
1107 log2(1 + abs(new_mv->col - ref_mv->col));
1108 mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
1109 return mv_diff_cost;
1110 }
get_mv_cost(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1111 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
1112 TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
1113 int mi_col) {
1114 double mv_cost = get_mv_mode_cost(mv_mode);
1115 if (mv_mode == NEW_MV_MODE) {
1116 MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
1117 bsize, mi_row, mi_col)
1118 .as_mv;
1119 MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
1120 tpl_frame, bsize, mi_row, mi_col)
1121 .as_mv;
1122 MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
1123 bsize, mi_row, mi_col)
1124 .as_mv;
1125 double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
1126 double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
1127 mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
1128 }
1129 return mv_cost;
1130 }
1131
eval_mv_mode(int mv_mode,VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)1132 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
1133 GF_PICTURE *gf_picture, MotionField *motion_field,
1134 int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
1135 BLOCK_SIZE bsize, int mi_row, int mi_col,
1136 int_mv *mv) {
1137 MACROBLOCKD *xd = &x->e_mbd;
1138 double mv_dist =
1139 get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
1140 tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
1141 double mv_cost =
1142 get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
1143 double mult = 180;
1144
1145 return mv_cost + mult * log2f(1 + mv_dist);
1146 }
1147
find_best_ref_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,double * rd,int_mv * mv)1148 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1149 GF_PICTURE *gf_picture,
1150 MotionField *motion_field, int frame_idx,
1151 TplDepFrame *tpl_frame, int rf_idx,
1152 BLOCK_SIZE bsize, int mi_row, int mi_col,
1153 double *rd, int_mv *mv) {
1154 int best_mv_mode = ZERO_MV_MODE;
1155 int update = 0;
1156 int mv_mode;
1157 *rd = 0;
1158 for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
1159 double this_rd;
1160 int_mv this_mv;
1161 if (mv_mode == NEW_MV_MODE) {
1162 continue;
1163 }
1164 this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
1165 tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
1166 if (update == 0) {
1167 *rd = this_rd;
1168 *mv = this_mv;
1169 best_mv_mode = mv_mode;
1170 update = 1;
1171 } else {
1172 if (this_rd < *rd) {
1173 *rd = this_rd;
1174 *mv = this_mv;
1175 best_mv_mode = mv_mode;
1176 }
1177 }
1178 }
1179 return best_mv_mode;
1180 }
1181
predict_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col)1182 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1183 GF_PICTURE *gf_picture, MotionField *motion_field,
1184 int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
1185 BLOCK_SIZE bsize, int mi_row, int mi_col) {
1186 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1187 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1188 int tmp_mv_mode_arr[kMvPreCheckSize];
1189 int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
1190 double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
1191 int_mv *select_mv_arr = cpi->select_mv_arr;
1192 int_mv tmp_select_mv_arr[kMvPreCheckSize];
1193 int stride = tpl_frame->stride;
1194 double new_mv_rd = 0;
1195 double no_new_mv_rd = 0;
1196 double this_new_mv_rd = 0;
1197 double this_no_new_mv_rd = 0;
1198 int idx;
1199 int tmp_idx;
1200 assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
1201
1202 // no new mv
1203 // diagonal scan order
1204 tmp_idx = 0;
1205 for (idx = 0; idx < kMvPreCheckLines; ++idx) {
1206 int r;
1207 for (r = 0; r <= idx; ++r) {
1208 int c = idx - r;
1209 int nb_row = mi_row + r * mi_height;
1210 int nb_col = mi_col + c * mi_width;
1211 if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1212 double this_rd;
1213 int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
1214 mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
1215 cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
1216 bsize, nb_row, nb_col, &this_rd, mv);
1217 if (r == 0 && c == 0) {
1218 this_no_new_mv_rd = this_rd;
1219 }
1220 no_new_mv_rd += this_rd;
1221 tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
1222 tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
1223 ++tmp_idx;
1224 }
1225 }
1226 }
1227
1228 // new mv
1229 mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
1230 this_new_mv_rd = eval_mv_mode(
1231 NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1232 rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
1233 new_mv_rd = this_new_mv_rd;
1234 // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
1235 // beforehand.
1236 for (idx = 1; idx < kMvPreCheckLines; ++idx) {
1237 int r;
1238 for (r = 0; r <= idx; ++r) {
1239 int c = idx - r;
1240 int nb_row = mi_row + r * mi_height;
1241 int nb_col = mi_col + c * mi_width;
1242 if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1243 double this_rd;
1244 int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
1245 mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
1246 cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
1247 bsize, nb_row, nb_col, &this_rd, mv);
1248 new_mv_rd += this_rd;
1249 }
1250 }
1251 }
1252
1253 // update best_mv_mode
1254 tmp_idx = 0;
1255 if (no_new_mv_rd < new_mv_rd) {
1256 for (idx = 0; idx < kMvPreCheckLines; ++idx) {
1257 int r;
1258 for (r = 0; r <= idx; ++r) {
1259 int c = idx - r;
1260 int nb_row = mi_row + r * mi_height;
1261 int nb_col = mi_col + c * mi_width;
1262 if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1263 mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
1264 select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
1265 ++tmp_idx;
1266 }
1267 }
1268 }
1269 rd_diff_arr[mi_row * stride + mi_col] = 0;
1270 } else {
1271 rd_diff_arr[mi_row * stride + mi_col] =
1272 (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
1273 }
1274 }
1275
predict_mv_mode_arr(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize)1276 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
1277 GF_PICTURE *gf_picture,
1278 MotionField *motion_field, int frame_idx,
1279 TplDepFrame *tpl_frame, int rf_idx,
1280 BLOCK_SIZE bsize) {
1281 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1282 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1283 const int unit_rows = tpl_frame->mi_rows / mi_height;
1284 const int unit_cols = tpl_frame->mi_cols / mi_width;
1285 const int max_diagonal_lines = unit_rows + unit_cols - 1;
1286 int idx;
1287 for (idx = 0; idx < max_diagonal_lines; ++idx) {
1288 int r;
1289 for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
1290 ++r) {
1291 int c = idx - r;
1292 int mi_row = r * mi_height;
1293 int mi_col = c * mi_width;
1294 assert(c >= 0 && c < unit_cols);
1295 assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
1296 assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
1297 predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1298 rf_idx, bsize, mi_row, mi_col);
1299 }
1300 }
1301 }
1302
do_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,YV12_BUFFER_CONFIG * ref_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1303 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
1304 MotionField *motion_field, int frame_idx,
1305 YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
1306 int mi_row, int mi_col) {
1307 VP9_COMMON *cm = &cpi->common;
1308 MACROBLOCK *x = &td->mb;
1309 MACROBLOCKD *xd = &x->e_mbd;
1310 const int mb_y_offset =
1311 mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
1312 assert(ref_frame != NULL);
1313 set_mv_limits(cm, x, mi_row, mi_col);
1314 {
1315 int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
1316 uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
1317 uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
1318 const int stride = xd->cur_buf->y_stride;
1319 full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
1320 ref_frame_buf, stride, bsize, mi_row, mi_col,
1321 &mv.as_mv);
1322 sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
1323 bsize, &mv.as_mv);
1324 vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
1325 }
1326 }
1327
build_motion_field(VP9_COMP * cpi,int frame_idx,YV12_BUFFER_CONFIG * ref_frame[MAX_INTER_REF_FRAMES],BLOCK_SIZE bsize)1328 static void build_motion_field(
1329 VP9_COMP *cpi, int frame_idx,
1330 YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
1331 VP9_COMMON *cm = &cpi->common;
1332 ThreadData *td = &cpi->td;
1333 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1334 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1335 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1336 const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
1337 const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
1338 int mi_row, mi_col;
1339 int rf_idx;
1340
1341 tpl_frame->lambda = (pw * ph) >> 2;
1342 assert(pw * ph == tpl_frame->lambda << 2);
1343
1344 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1345 MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1346 &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1347 if (ref_frame[rf_idx] == NULL) {
1348 continue;
1349 }
1350 vp9_motion_field_reset_mvs(motion_field);
1351 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1352 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1353 do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
1354 bsize, mi_row, mi_col);
1355 }
1356 }
1357 }
1358 }
1359 #endif // CONFIG_NON_GREEDY_MV
1360
mc_flow_dispenser(VP9_COMP * cpi,GF_PICTURE * gf_picture,int frame_idx,BLOCK_SIZE bsize)1361 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
1362 int frame_idx, BLOCK_SIZE bsize) {
1363 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1364 VpxTplFrameStats *tpl_frame_stats_before_propagation =
1365 &cpi->tpl_gop_stats.frame_stats_list[frame_idx];
1366 YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
1367 YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
1368
1369 VP9_COMMON *cm = &cpi->common;
1370 struct scale_factors sf;
1371 int rdmult, idx;
1372 ThreadData *td = &cpi->td;
1373 MACROBLOCK *x = &td->mb;
1374 MACROBLOCKD *xd = &x->e_mbd;
1375 int mi_row, mi_col;
1376
1377 #if CONFIG_VP9_HIGHBITDEPTH
1378 DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
1379 DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
1380 uint8_t *predictor;
1381 #else
1382 DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
1383 #endif
1384 DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
1385 DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
1386 DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
1387 DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
1388
1389 const TX_SIZE tx_size = max_txsize_lookup[bsize];
1390 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1391 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1392
1393 tpl_frame_stats_before_propagation->frame_width = cm->width;
1394 tpl_frame_stats_before_propagation->frame_height = cm->height;
1395 // Setup scaling factor
1396 #if CONFIG_VP9_HIGHBITDEPTH
1397 vp9_setup_scale_factors_for_frame(
1398 &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1399 this_frame->y_crop_width, this_frame->y_crop_height,
1400 cpi->common.use_highbitdepth);
1401
1402 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1403 predictor = CONVERT_TO_BYTEPTR(predictor16);
1404 else
1405 predictor = predictor8;
1406 #else
1407 vp9_setup_scale_factors_for_frame(
1408 &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1409 this_frame->y_crop_width, this_frame->y_crop_height);
1410 #endif // CONFIG_VP9_HIGHBITDEPTH
1411
1412 // Prepare reference frame pointers. If any reference frame slot is
1413 // unavailable, the pointer will be set to Null.
1414 for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
1415 int rf_idx = gf_picture[frame_idx].ref_frame[idx];
1416 if (rf_idx != -REFS_PER_FRAME) ref_frame[idx] = gf_picture[rf_idx].frame;
1417 }
1418
1419 xd->mi = cm->mi_grid_visible;
1420 xd->mi[0] = cm->mi;
1421 xd->cur_buf = this_frame;
1422
1423 // Get rd multiplier set up.
1424 rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
1425 set_error_per_bit(&cpi->td.mb, rdmult);
1426 vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
1427
1428 tpl_frame->is_valid = 1;
1429
1430 cm->base_qindex = tpl_frame->base_qindex;
1431 vp9_frame_init_quantizer(cpi);
1432
1433 #if CONFIG_NON_GREEDY_MV
1434 {
1435 int square_block_idx;
1436 int rf_idx;
1437 for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
1438 ++square_block_idx) {
1439 BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
1440 build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
1441 }
1442 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1443 int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1444 if (ref_frame_idx != -1) {
1445 MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1446 &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1447 predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
1448 tpl_frame, rf_idx, bsize);
1449 }
1450 }
1451 }
1452 #endif // CONFIG_NON_GREEDY_MV
1453
1454 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1455 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1456 int64_t recon_error = 0;
1457 int64_t rate_cost = 0;
1458 int64_t sse = 0;
1459 // Ref frame index in the ref frame buffer.
1460 int ref_frame_idx = -1;
1461 mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
1462 src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
1463 tx_size, ref_frame, predictor, &recon_error, &rate_cost,
1464 &sse, &ref_frame_idx);
1465 // Motion flow dependency dispenser.
1466 tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
1467 tpl_frame->stride);
1468
1469 tpl_store_before_propagation(
1470 tpl_frame_stats_before_propagation->block_stats_list,
1471 tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, tpl_frame->stride,
1472 recon_error, rate_cost, ref_frame_idx, tpl_frame->mi_rows,
1473 tpl_frame->mi_cols);
1474
1475 tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
1476 bsize);
1477 }
1478 }
1479 }
1480
trim_tpl_stats(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,int extra_frames)1481 static void trim_tpl_stats(struct vpx_internal_error_info *error_info,
1482 VpxTplGopStats *tpl_gop_stats, int extra_frames) {
1483 int i;
1484 VpxTplFrameStats *new_frame_stats;
1485 const int new_size = tpl_gop_stats->size - extra_frames;
1486 if (tpl_gop_stats->size <= extra_frames)
1487 vpx_internal_error(
1488 error_info, VPX_CODEC_ERROR,
1489 "The number of frames in VpxTplGopStats is fewer than expected.");
1490 CHECK_MEM_ERROR(error_info, new_frame_stats,
1491 vpx_calloc(new_size, sizeof(*new_frame_stats)));
1492 for (i = 0; i < new_size; i++) {
1493 VpxTplFrameStats *frame_stats = &tpl_gop_stats->frame_stats_list[i];
1494 const int num_blocks = frame_stats->num_blocks;
1495 new_frame_stats[i].num_blocks = frame_stats->num_blocks;
1496 new_frame_stats[i].frame_width = frame_stats->frame_width;
1497 new_frame_stats[i].frame_height = frame_stats->frame_height;
1498 new_frame_stats[i].num_blocks = num_blocks;
1499 CHECK_MEM_ERROR(
1500 error_info, new_frame_stats[i].block_stats_list,
1501 vpx_calloc(num_blocks, sizeof(*new_frame_stats[i].block_stats_list)));
1502 memcpy(new_frame_stats[i].block_stats_list, frame_stats->block_stats_list,
1503 num_blocks * sizeof(*new_frame_stats[i].block_stats_list));
1504 }
1505 free_tpl_frame_stats_list(tpl_gop_stats);
1506 tpl_gop_stats->size = new_size;
1507 tpl_gop_stats->frame_stats_list = new_frame_stats;
1508 }
1509
1510 #if CONFIG_NON_GREEDY_MV
1511 #define DUMP_TPL_STATS 0
1512 #if DUMP_TPL_STATS
dump_buf(uint8_t * buf,int stride,int row,int col,int h,int w)1513 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
1514 int i, j;
1515 printf("%d %d\n", h, w);
1516 for (i = 0; i < h; ++i) {
1517 for (j = 0; j < w; ++j) {
1518 printf("%d ", buf[(row + i) * stride + col + j]);
1519 }
1520 }
1521 printf("\n");
1522 }
1523
dump_frame_buf(const YV12_BUFFER_CONFIG * frame_buf)1524 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
1525 dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
1526 frame_buf->y_width);
1527 dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
1528 frame_buf->uv_height, frame_buf->uv_width);
1529 dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
1530 frame_buf->uv_height, frame_buf->uv_width);
1531 }
1532
dump_tpl_stats(const VP9_COMP * cpi,int tpl_group_frames,const GF_GROUP * gf_group,const GF_PICTURE * gf_picture,BLOCK_SIZE bsize)1533 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
1534 const GF_GROUP *gf_group,
1535 const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
1536 int frame_idx;
1537 const VP9_COMMON *cm = &cpi->common;
1538 int rf_idx;
1539 for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
1540 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1541 const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1542 int mi_row, mi_col;
1543 int ref_frame_idx;
1544 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1545 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1546 ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1547 if (ref_frame_idx != -1) {
1548 YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
1549 const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
1550 const int ref_gf_frame_offset =
1551 gf_group->frame_gop_index[ref_frame_idx];
1552 printf("=\n");
1553 printf(
1554 "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
1555 "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
1556 frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
1557 ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
1558 for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1559 for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1560 if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1561 int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
1562 frame_idx, rf_idx, bsize,
1563 mi_row, mi_col);
1564 printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
1565 mv.as_mv.col);
1566 }
1567 }
1568 }
1569 for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1570 for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1571 if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1572 const TplDepStats *tpl_ptr =
1573 &tpl_frame
1574 ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
1575 printf("%f ", tpl_ptr->feature_score);
1576 }
1577 }
1578 }
1579 printf("\n");
1580
1581 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1582 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1583 const int mv_mode =
1584 tpl_frame
1585 ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
1586 printf("%d ", mv_mode);
1587 }
1588 }
1589 printf("\n");
1590
1591 dump_frame_buf(gf_picture[frame_idx].frame);
1592 dump_frame_buf(ref_frame_buf);
1593 }
1594 }
1595 }
1596 }
1597 #endif // DUMP_TPL_STATS
1598 #endif // CONFIG_NON_GREEDY_MV
1599
vp9_init_tpl_buffer(VP9_COMP * cpi)1600 void vp9_init_tpl_buffer(VP9_COMP *cpi) {
1601 VP9_COMMON *cm = &cpi->common;
1602 int frame;
1603
1604 const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
1605 const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
1606 #if CONFIG_NON_GREEDY_MV
1607 int rf_idx;
1608
1609 vpx_free(cpi->select_mv_arr);
1610 CHECK_MEM_ERROR(
1611 &cm->error, cpi->select_mv_arr,
1612 vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
1613 #endif
1614
1615 // TODO(jingning): Reduce the actual memory use for tpl model build up.
1616 for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1617 if (cpi->tpl_stats[frame].width >= mi_cols &&
1618 cpi->tpl_stats[frame].height >= mi_rows &&
1619 cpi->tpl_stats[frame].tpl_stats_ptr)
1620 continue;
1621
1622 #if CONFIG_NON_GREEDY_MV
1623 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1624 vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1625 CHECK_MEM_ERROR(
1626 &cm->error, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
1627 vpx_calloc(mi_rows * mi_cols * 4,
1628 sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
1629 vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1630 CHECK_MEM_ERROR(
1631 &cm->error, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
1632 vpx_calloc(mi_rows * mi_cols * 4,
1633 sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
1634 }
1635 #endif
1636 vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1637 CHECK_MEM_ERROR(&cm->error, cpi->tpl_stats[frame].tpl_stats_ptr,
1638 vpx_calloc(mi_rows * mi_cols,
1639 sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
1640 cpi->tpl_stats[frame].is_valid = 0;
1641 cpi->tpl_stats[frame].width = mi_cols;
1642 cpi->tpl_stats[frame].height = mi_rows;
1643 cpi->tpl_stats[frame].stride = mi_cols;
1644 cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
1645 cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
1646 }
1647
1648 for (frame = 0; frame < REF_FRAMES; ++frame) {
1649 cpi->enc_frame_buf[frame].mem_valid = 0;
1650 cpi->enc_frame_buf[frame].released = 1;
1651 }
1652 }
1653
vp9_free_tpl_buffer(VP9_COMP * cpi)1654 void vp9_free_tpl_buffer(VP9_COMP *cpi) {
1655 int frame;
1656 #if CONFIG_NON_GREEDY_MV
1657 vp9_free_motion_field_info(&cpi->motion_field_info);
1658 vpx_free(cpi->select_mv_arr);
1659 #endif
1660 for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1661 #if CONFIG_NON_GREEDY_MV
1662 int rf_idx;
1663 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1664 vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1665 vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1666 }
1667 #endif
1668 vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1669 cpi->tpl_stats[frame].is_valid = 0;
1670 }
1671 free_tpl_frame_stats_list(&cpi->tpl_gop_stats);
1672 }
1673
1674 #if CONFIG_RATE_CTRL
accumulate_frame_tpl_stats(VP9_COMP * cpi)1675 static void accumulate_frame_tpl_stats(VP9_COMP *cpi) {
1676 VP9_COMMON *const cm = &cpi->common;
1677 const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1678 int show_frame_count = 0;
1679 int frame_idx;
1680 // Accumulate tpl stats for each frame in the current group of picture.
1681 for (frame_idx = 1; frame_idx < gf_group->gf_group_size; ++frame_idx) {
1682 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1683 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
1684 const int tpl_stride = tpl_frame->stride;
1685 int64_t intra_cost_base = 0;
1686 int64_t inter_cost_base = 0;
1687 int64_t mc_dep_cost_base = 0;
1688 int64_t mc_ref_cost_base = 0;
1689 int64_t mc_flow_base = 0;
1690 int row, col;
1691
1692 if (!tpl_frame->is_valid) continue;
1693
1694 for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
1695 for (col = 0; col < cm->mi_cols; ++col) {
1696 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
1697 intra_cost_base += this_stats->intra_cost;
1698 inter_cost_base += this_stats->inter_cost;
1699 mc_dep_cost_base += this_stats->mc_dep_cost;
1700 mc_ref_cost_base += this_stats->mc_ref_cost;
1701 mc_flow_base += this_stats->mc_flow;
1702 }
1703 }
1704
1705 cpi->tpl_stats_info[show_frame_count].intra_cost = intra_cost_base;
1706 cpi->tpl_stats_info[show_frame_count].inter_cost = inter_cost_base;
1707 cpi->tpl_stats_info[show_frame_count].mc_dep_cost = mc_dep_cost_base;
1708 cpi->tpl_stats_info[show_frame_count].mc_ref_cost = mc_ref_cost_base;
1709 cpi->tpl_stats_info[show_frame_count].mc_flow = mc_flow_base;
1710
1711 ++show_frame_count;
1712 }
1713 }
1714 #endif // CONFIG_RATE_CTRL
1715
vp9_estimate_tpl_qp_gop(VP9_COMP * cpi)1716 void vp9_estimate_tpl_qp_gop(VP9_COMP *cpi) {
1717 int gop_length = cpi->twopass.gf_group.gf_group_size;
1718 int bottom_index, top_index;
1719 int idx;
1720 const int gf_index = cpi->twopass.gf_group.index;
1721 const int is_src_frame_alt_ref = cpi->rc.is_src_frame_alt_ref;
1722 const int refresh_frame_context = cpi->common.refresh_frame_context;
1723
1724 for (idx = gf_index; idx <= gop_length; ++idx) {
1725 TplDepFrame *tpl_frame = &cpi->tpl_stats[idx];
1726 int target_rate = cpi->twopass.gf_group.bit_allocation[idx];
1727 cpi->twopass.gf_group.index = idx;
1728 vp9_rc_set_frame_target(cpi, target_rate);
1729 vp9_configure_buffer_updates(cpi, idx);
1730 if (cpi->ext_ratectrl.ready &&
1731 (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_QP) != 0 &&
1732 cpi->ext_ratectrl.funcs.get_encodeframe_decision != NULL) {
1733 VP9_COMMON *cm = &cpi->common;
1734 vpx_codec_err_t codec_status;
1735 const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1736 vpx_rc_encodeframe_decision_t encode_frame_decision;
1737 if (idx == gop_length) break;
1738 codec_status = vp9_extrc_get_encodeframe_decision(
1739 &cpi->ext_ratectrl, gf_group->index, &encode_frame_decision);
1740 if (codec_status != VPX_CODEC_OK) {
1741 vpx_internal_error(&cm->error, codec_status,
1742 "vp9_extrc_get_encodeframe_decision() failed");
1743 }
1744 tpl_frame->base_qindex = encode_frame_decision.q_index;
1745 } else {
1746 tpl_frame->base_qindex = vp9_rc_pick_q_and_bounds_two_pass(
1747 cpi, &bottom_index, &top_index, idx);
1748 tpl_frame->base_qindex = VPXMAX(tpl_frame->base_qindex, 1);
1749 }
1750 }
1751 // Reset the actual index and frame update
1752 cpi->twopass.gf_group.index = gf_index;
1753 cpi->rc.is_src_frame_alt_ref = is_src_frame_alt_ref;
1754 cpi->common.refresh_frame_context = refresh_frame_context;
1755 vp9_configure_buffer_updates(cpi, gf_index);
1756 }
1757
vp9_setup_tpl_stats(VP9_COMP * cpi)1758 void vp9_setup_tpl_stats(VP9_COMP *cpi) {
1759 GF_PICTURE gf_picture_buf[MAX_ARF_GOP_SIZE + REFS_PER_FRAME];
1760 GF_PICTURE *gf_picture = &gf_picture_buf[REFS_PER_FRAME];
1761 const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1762 int tpl_group_frames = 0;
1763 int frame_idx;
1764 int extended_frame_count;
1765 cpi->tpl_bsize = BLOCK_32X32;
1766
1767 memset(gf_picture_buf, 0, sizeof(gf_picture_buf));
1768 extended_frame_count =
1769 init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
1770
1771 init_tpl_stats(cpi);
1772
1773 init_tpl_stats_before_propagation(&cpi->common.error, &cpi->tpl_gop_stats,
1774 cpi->tpl_stats, tpl_group_frames,
1775 cpi->common.width, cpi->common.height);
1776
1777 // Backward propagation from tpl_group_frames to 1.
1778 for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
1779 if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
1780 mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
1781 }
1782
1783 if (cpi->ext_ratectrl.ready &&
1784 cpi->ext_ratectrl.funcs.send_tpl_gop_stats != NULL) {
1785 // Intra search on key frame
1786 if (gf_picture[0].update_type != OVERLAY_UPDATE) {
1787 mc_flow_dispenser(cpi, gf_picture, 0, cpi->tpl_bsize);
1788 }
1789 // TPL stats has extra frames from next GOP. Trim those extra frames for
1790 // Qmode.
1791 trim_tpl_stats(&cpi->common.error, &cpi->tpl_gop_stats,
1792 extended_frame_count);
1793 const vpx_codec_err_t codec_status =
1794 vp9_extrc_send_tpl_stats(&cpi->ext_ratectrl, &cpi->tpl_gop_stats);
1795 if (codec_status != VPX_CODEC_OK) {
1796 vpx_internal_error(&cpi->common.error, codec_status,
1797 "vp9_extrc_send_tpl_stats() failed");
1798 }
1799 }
1800
1801 #if CONFIG_NON_GREEDY_MV
1802 cpi->tpl_ready = 1;
1803 #if DUMP_TPL_STATS
1804 dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
1805 #endif // DUMP_TPL_STATS
1806 #endif // CONFIG_NON_GREEDY_MV
1807
1808 #if CONFIG_RATE_CTRL
1809 if (cpi->oxcf.use_simple_encode_api) {
1810 accumulate_frame_tpl_stats(cpi);
1811 }
1812 #endif // CONFIG_RATE_CTRL
1813 }
1814