xref: /aosp_15_r20/external/libvpx/vp9/simple_encode.cc (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 
14 #include <memory>
15 #include <vector>
16 
17 #include "./ivfenc.h"
18 #include "vp9/common/vp9_entropymode.h"
19 #include "vp9/common/vp9_enums.h"
20 #include "vp9/common/vp9_onyxc_int.h"
21 #include "vp9/vp9_iface_common.h"
22 #include "vp9/encoder/vp9_encoder.h"
23 #include "vp9/encoder/vp9_firstpass.h"
24 #include "vp9/simple_encode.h"
25 #include "vp9/vp9_cx_iface.h"
26 
27 namespace vp9 {
28 
get_plane_height(vpx_img_fmt_t img_fmt,int frame_height,int plane)29 static int get_plane_height(vpx_img_fmt_t img_fmt, int frame_height,
30                             int plane) {
31   assert(plane < 3);
32   if (plane == 0) {
33     return frame_height;
34   }
35   switch (img_fmt) {
36     case VPX_IMG_FMT_I420:
37     case VPX_IMG_FMT_I440:
38     case VPX_IMG_FMT_YV12:
39     case VPX_IMG_FMT_I42016:
40     case VPX_IMG_FMT_I44016: return (frame_height + 1) >> 1;
41     default: return frame_height;
42   }
43 }
44 
get_plane_width(vpx_img_fmt_t img_fmt,int frame_width,int plane)45 static int get_plane_width(vpx_img_fmt_t img_fmt, int frame_width, int plane) {
46   assert(plane < 3);
47   if (plane == 0) {
48     return frame_width;
49   }
50   switch (img_fmt) {
51     case VPX_IMG_FMT_I420:
52     case VPX_IMG_FMT_YV12:
53     case VPX_IMG_FMT_I422:
54     case VPX_IMG_FMT_I42016:
55     case VPX_IMG_FMT_I42216: return (frame_width + 1) >> 1;
56     default: return frame_width;
57   }
58 }
59 
60 // TODO(angiebird): Merge this function with vpx_img_plane_width()
img_plane_width(const vpx_image_t * img,int plane)61 static int img_plane_width(const vpx_image_t *img, int plane) {
62   if (plane > 0 && img->x_chroma_shift > 0)
63     return (img->d_w + 1) >> img->x_chroma_shift;
64   else
65     return img->d_w;
66 }
67 
68 // TODO(angiebird): Merge this function with vpx_img_plane_height()
img_plane_height(const vpx_image_t * img,int plane)69 static int img_plane_height(const vpx_image_t *img, int plane) {
70   if (plane > 0 && img->y_chroma_shift > 0)
71     return (img->d_h + 1) >> img->y_chroma_shift;
72   else
73     return img->d_h;
74 }
75 
76 // TODO(angiebird): Merge this function with vpx_img_read()
img_read(vpx_image_t * img,FILE * file)77 static int img_read(vpx_image_t *img, FILE *file) {
78   int plane;
79 
80   for (plane = 0; plane < 3; ++plane) {
81     unsigned char *buf = img->planes[plane];
82     const int stride = img->stride[plane];
83     const int w = img_plane_width(img, plane) *
84                   ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
85     const int h = img_plane_height(img, plane);
86     int y;
87 
88     for (y = 0; y < h; ++y) {
89       if (fread(buf, 1, w, file) != (size_t)w) return 0;
90       buf += stride;
91     }
92   }
93 
94   return 1;
95 }
96 
97 // Assume every config in VP9EncoderConfig is less than 100 characters.
98 #define ENCODE_CONFIG_BUF_SIZE 100
99 struct EncodeConfig {
100   char name[ENCODE_CONFIG_BUF_SIZE];
101   char value[ENCODE_CONFIG_BUF_SIZE];
102 };
103 
104 class SimpleEncode::EncodeImpl {
105  public:
106   VP9_COMP *cpi;
107   vpx_img_fmt_t img_fmt;
108   vpx_image_t tmp_img;
109   std::vector<FIRSTPASS_STATS> first_pass_stats;
110   std::vector<EncodeConfig> encode_config_list;
111 };
112 
init_encoder(const VP9EncoderConfig * oxcf,vpx_img_fmt_t img_fmt)113 static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
114                               vpx_img_fmt_t img_fmt) {
115   VP9_COMP *cpi;
116   BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
117   if (!buffer_pool) return NULL;
118   vp9_initialize_enc();
119   cpi = vp9_create_compressor(oxcf, buffer_pool);
120   vp9_update_compressor_with_img_fmt(cpi, img_fmt);
121   return cpi;
122 }
123 
free_encoder(VP9_COMP * cpi)124 static void free_encoder(VP9_COMP *cpi) {
125   BufferPool *buffer_pool = cpi->common.buffer_pool;
126   vp9_remove_compressor(cpi);
127   // buffer_pool needs to be free after cpi because buffer_pool contains
128   // allocated buffers that will be free in vp9_remove_compressor()
129   vpx_free(buffer_pool);
130 }
131 
make_vpx_rational(int num,int den)132 static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
133   vpx_rational_t v;
134   v.num = num;
135   v.den = den;
136   return v;
137 }
138 
139 static INLINE FrameType
get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type)140 get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
141   switch (update_type) {
142     case KF_UPDATE: return kFrameTypeKey;
143     case ARF_UPDATE: return kFrameTypeAltRef;
144     case GF_UPDATE: return kFrameTypeGolden;
145     case OVERLAY_UPDATE: return kFrameTypeOverlay;
146     case LF_UPDATE: return kFrameTypeInter;
147     default:
148       fprintf(stderr, "Unsupported update_type %d\n", update_type);
149       abort();
150   }
151 }
152 
update_partition_info(const PARTITION_INFO * input_partition_info,const int num_rows_4x4,const int num_cols_4x4,PartitionInfo * output_partition_info)153 static void update_partition_info(const PARTITION_INFO *input_partition_info,
154                                   const int num_rows_4x4,
155                                   const int num_cols_4x4,
156                                   PartitionInfo *output_partition_info) {
157   const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
158   for (int i = 0; i < num_units_4x4; ++i) {
159     output_partition_info[i].row = input_partition_info[i].row;
160     output_partition_info[i].column = input_partition_info[i].column;
161     output_partition_info[i].row_start = input_partition_info[i].row_start;
162     output_partition_info[i].column_start =
163         input_partition_info[i].column_start;
164     output_partition_info[i].width = input_partition_info[i].width;
165     output_partition_info[i].height = input_partition_info[i].height;
166   }
167 }
168 
169 // translate MV_REFERENCE_FRAME to RefFrameType
mv_ref_frame_to_ref_frame_type(MV_REFERENCE_FRAME mv_ref_frame)170 static RefFrameType mv_ref_frame_to_ref_frame_type(
171     MV_REFERENCE_FRAME mv_ref_frame) {
172   switch (mv_ref_frame) {
173     case LAST_FRAME: return kRefFrameTypeLast;
174     case GOLDEN_FRAME: return kRefFrameTypePast;
175     case ALTREF_FRAME: return kRefFrameTypeFuture;
176     default: return kRefFrameTypeNone;
177   }
178 }
179 
update_motion_vector_info(const MOTION_VECTOR_INFO * input_motion_vector_info,const int num_rows_4x4,const int num_cols_4x4,MotionVectorInfo * output_motion_vector_info,int motion_vector_scale)180 static void update_motion_vector_info(
181     const MOTION_VECTOR_INFO *input_motion_vector_info, const int num_rows_4x4,
182     const int num_cols_4x4, MotionVectorInfo *output_motion_vector_info,
183     int motion_vector_scale) {
184   const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
185   for (int i = 0; i < num_units_4x4; ++i) {
186     const MV_REFERENCE_FRAME *in_ref_frame =
187         input_motion_vector_info[i].ref_frame;
188     output_motion_vector_info[i].mv_count =
189         (in_ref_frame[0] == INTRA_FRAME)
190             ? 0
191             : ((in_ref_frame[1] == NO_REF_FRAME) ? 1 : 2);
192     if (in_ref_frame[0] == NO_REF_FRAME) {
193       fprintf(stderr, "in_ref_frame[0] shouldn't be NO_REF_FRAME\n");
194       abort();
195     }
196     output_motion_vector_info[i].ref_frame[0] =
197         mv_ref_frame_to_ref_frame_type(in_ref_frame[0]);
198     output_motion_vector_info[i].ref_frame[1] =
199         mv_ref_frame_to_ref_frame_type(in_ref_frame[1]);
200     output_motion_vector_info[i].mv_row[0] =
201         (double)input_motion_vector_info[i].mv[0].as_mv.row /
202         motion_vector_scale;
203     output_motion_vector_info[i].mv_column[0] =
204         (double)input_motion_vector_info[i].mv[0].as_mv.col /
205         motion_vector_scale;
206     output_motion_vector_info[i].mv_row[1] =
207         (double)input_motion_vector_info[i].mv[1].as_mv.row /
208         motion_vector_scale;
209     output_motion_vector_info[i].mv_column[1] =
210         (double)input_motion_vector_info[i].mv[1].as_mv.col /
211         motion_vector_scale;
212   }
213 }
214 
update_tpl_stats_info(const TplDepStats * input_tpl_stats_info,const int show_frame_count,TplStatsInfo * output_tpl_stats_info)215 static void update_tpl_stats_info(const TplDepStats *input_tpl_stats_info,
216                                   const int show_frame_count,
217                                   TplStatsInfo *output_tpl_stats_info) {
218   int frame_idx;
219   for (frame_idx = 0; frame_idx < show_frame_count; ++frame_idx) {
220     output_tpl_stats_info[frame_idx].intra_cost =
221         input_tpl_stats_info[frame_idx].intra_cost;
222     output_tpl_stats_info[frame_idx].inter_cost =
223         input_tpl_stats_info[frame_idx].inter_cost;
224     output_tpl_stats_info[frame_idx].mc_flow =
225         input_tpl_stats_info[frame_idx].mc_flow;
226     output_tpl_stats_info[frame_idx].mc_dep_cost =
227         input_tpl_stats_info[frame_idx].mc_dep_cost;
228     output_tpl_stats_info[frame_idx].mc_ref_cost =
229         input_tpl_stats_info[frame_idx].mc_ref_cost;
230   }
231 }
232 
update_frame_counts(const FRAME_COUNTS * input_counts,FrameCounts * output_counts)233 static void update_frame_counts(const FRAME_COUNTS *input_counts,
234                                 FrameCounts *output_counts) {
235   // Init array sizes.
236   output_counts->y_mode.resize(BLOCK_SIZE_GROUPS);
237   for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
238     output_counts->y_mode[i].resize(INTRA_MODES);
239   }
240 
241   output_counts->uv_mode.resize(INTRA_MODES);
242   for (int i = 0; i < INTRA_MODES; ++i) {
243     output_counts->uv_mode[i].resize(INTRA_MODES);
244   }
245 
246   output_counts->partition.resize(PARTITION_CONTEXTS);
247   for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
248     output_counts->partition[i].resize(PARTITION_TYPES);
249   }
250 
251   output_counts->coef.resize(TX_SIZES);
252   output_counts->eob_branch.resize(TX_SIZES);
253   for (int i = 0; i < TX_SIZES; ++i) {
254     output_counts->coef[i].resize(PLANE_TYPES);
255     output_counts->eob_branch[i].resize(PLANE_TYPES);
256     for (int j = 0; j < PLANE_TYPES; ++j) {
257       output_counts->coef[i][j].resize(REF_TYPES);
258       output_counts->eob_branch[i][j].resize(REF_TYPES);
259       for (int k = 0; k < REF_TYPES; ++k) {
260         output_counts->coef[i][j][k].resize(COEF_BANDS);
261         output_counts->eob_branch[i][j][k].resize(COEF_BANDS);
262         for (int l = 0; l < COEF_BANDS; ++l) {
263           output_counts->coef[i][j][k][l].resize(COEFF_CONTEXTS);
264           output_counts->eob_branch[i][j][k][l].resize(COEFF_CONTEXTS);
265           for (int m = 0; m < COEFF_CONTEXTS; ++m) {
266             output_counts->coef[i][j][k][l][m].resize(UNCONSTRAINED_NODES + 1);
267           }
268         }
269       }
270     }
271   }
272 
273   output_counts->switchable_interp.resize(SWITCHABLE_FILTER_CONTEXTS);
274   for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
275     output_counts->switchable_interp[i].resize(SWITCHABLE_FILTERS);
276   }
277 
278   output_counts->inter_mode.resize(INTER_MODE_CONTEXTS);
279   for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
280     output_counts->inter_mode[i].resize(INTER_MODES);
281   }
282 
283   output_counts->intra_inter.resize(INTRA_INTER_CONTEXTS);
284   for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
285     output_counts->intra_inter[i].resize(2);
286   }
287 
288   output_counts->comp_inter.resize(COMP_INTER_CONTEXTS);
289   for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
290     output_counts->comp_inter[i].resize(2);
291   }
292 
293   output_counts->single_ref.resize(REF_CONTEXTS);
294   for (int i = 0; i < REF_CONTEXTS; ++i) {
295     output_counts->single_ref[i].resize(2);
296     for (int j = 0; j < 2; ++j) {
297       output_counts->single_ref[i][j].resize(2);
298     }
299   }
300 
301   output_counts->comp_ref.resize(REF_CONTEXTS);
302   for (int i = 0; i < REF_CONTEXTS; ++i) {
303     output_counts->comp_ref[i].resize(2);
304   }
305 
306   output_counts->skip.resize(SKIP_CONTEXTS);
307   for (int i = 0; i < SKIP_CONTEXTS; ++i) {
308     output_counts->skip[i].resize(2);
309   }
310 
311   output_counts->tx.p32x32.resize(TX_SIZE_CONTEXTS);
312   output_counts->tx.p16x16.resize(TX_SIZE_CONTEXTS);
313   output_counts->tx.p8x8.resize(TX_SIZE_CONTEXTS);
314   for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
315     output_counts->tx.p32x32[i].resize(TX_SIZES);
316     output_counts->tx.p16x16[i].resize(TX_SIZES - 1);
317     output_counts->tx.p8x8[i].resize(TX_SIZES - 2);
318   }
319   output_counts->tx.tx_totals.resize(TX_SIZES);
320 
321   output_counts->mv.joints.resize(MV_JOINTS);
322   output_counts->mv.comps.resize(2);
323   for (int i = 0; i < 2; ++i) {
324     output_counts->mv.comps[i].sign.resize(2);
325     output_counts->mv.comps[i].classes.resize(MV_CLASSES);
326     output_counts->mv.comps[i].class0.resize(CLASS0_SIZE);
327     output_counts->mv.comps[i].bits.resize(MV_OFFSET_BITS);
328     for (int j = 0; j < MV_OFFSET_BITS; ++j) {
329       output_counts->mv.comps[i].bits[j].resize(2);
330     }
331     output_counts->mv.comps[i].class0_fp.resize(CLASS0_SIZE);
332     for (int j = 0; j < CLASS0_SIZE; ++j) {
333       output_counts->mv.comps[i].class0_fp[j].resize(MV_FP_SIZE);
334     }
335     output_counts->mv.comps[i].fp.resize(MV_FP_SIZE);
336     output_counts->mv.comps[i].class0_hp.resize(2);
337     output_counts->mv.comps[i].hp.resize(2);
338   }
339 
340   // Populate counts.
341   for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
342     for (int j = 0; j < INTRA_MODES; ++j) {
343       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
344     }
345   }
346   for (int i = 0; i < INTRA_MODES; ++i) {
347     for (int j = 0; j < INTRA_MODES; ++j) {
348       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
349     }
350   }
351   for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
352     for (int j = 0; j < PARTITION_TYPES; ++j) {
353       output_counts->partition[i][j] = input_counts->partition[i][j];
354     }
355   }
356   for (int i = 0; i < TX_SIZES; ++i) {
357     for (int j = 0; j < PLANE_TYPES; ++j) {
358       for (int k = 0; k < REF_TYPES; ++k) {
359         for (int l = 0; l < COEF_BANDS; ++l) {
360           for (int m = 0; m < COEFF_CONTEXTS; ++m) {
361             output_counts->eob_branch[i][j][k][l][m] =
362                 input_counts->eob_branch[i][j][k][l][m];
363             for (int n = 0; n < UNCONSTRAINED_NODES + 1; n++) {
364               output_counts->coef[i][j][k][l][m][n] =
365                   input_counts->coef[i][j][k][l][m][n];
366             }
367           }
368         }
369       }
370     }
371   }
372   for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
373     for (int j = 0; j < SWITCHABLE_FILTERS; ++j) {
374       output_counts->switchable_interp[i][j] =
375           input_counts->switchable_interp[i][j];
376     }
377   }
378   for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
379     for (int j = 0; j < INTER_MODES; ++j) {
380       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
381     }
382   }
383   for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
384     for (int j = 0; j < 2; ++j) {
385       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
386     }
387   }
388   for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
389     for (int j = 0; j < 2; ++j) {
390       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
391     }
392   }
393   for (int i = 0; i < REF_CONTEXTS; ++i) {
394     for (int j = 0; j < 2; ++j) {
395       for (int k = 0; k < 2; ++k) {
396         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
397       }
398     }
399   }
400   for (int i = 0; i < REF_CONTEXTS; ++i) {
401     for (int j = 0; j < 2; ++j) {
402       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
403     }
404   }
405   for (int i = 0; i < SKIP_CONTEXTS; ++i) {
406     for (int j = 0; j < 2; ++j) {
407       output_counts->skip[i][j] = input_counts->skip[i][j];
408     }
409   }
410   for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
411     for (int j = 0; j < TX_SIZES; j++) {
412       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
413     }
414     for (int j = 0; j < TX_SIZES - 1; j++) {
415       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
416     }
417     for (int j = 0; j < TX_SIZES - 2; j++) {
418       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
419     }
420   }
421   for (int i = 0; i < TX_SIZES; i++) {
422     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
423   }
424   for (int i = 0; i < MV_JOINTS; i++) {
425     output_counts->mv.joints[i] = input_counts->mv.joints[i];
426   }
427   for (int k = 0; k < 2; k++) {
428     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
429     for (int i = 0; i < 2; i++) {
430       output_counts->mv.comps[k].sign[i] = comps_t->sign[i];
431       output_counts->mv.comps[k].class0_hp[i] = comps_t->class0_hp[i];
432       output_counts->mv.comps[k].hp[i] = comps_t->hp[i];
433     }
434     for (int i = 0; i < MV_CLASSES; i++) {
435       output_counts->mv.comps[k].classes[i] = comps_t->classes[i];
436     }
437     for (int i = 0; i < CLASS0_SIZE; i++) {
438       output_counts->mv.comps[k].class0[i] = comps_t->class0[i];
439       for (int j = 0; j < MV_FP_SIZE; j++) {
440         output_counts->mv.comps[k].class0_fp[i][j] = comps_t->class0_fp[i][j];
441       }
442     }
443     for (int i = 0; i < MV_OFFSET_BITS; i++) {
444       for (int j = 0; j < 2; j++) {
445         output_counts->mv.comps[k].bits[i][j] = comps_t->bits[i][j];
446       }
447     }
448     for (int i = 0; i < MV_FP_SIZE; i++) {
449       output_counts->mv.comps[k].fp[i] = comps_t->fp[i];
450     }
451   }
452 }
453 
output_image_buffer(const ImageBuffer & image_buffer,std::FILE * out_file)454 void output_image_buffer(const ImageBuffer &image_buffer, std::FILE *out_file) {
455   for (int plane = 0; plane < 3; ++plane) {
456     const int w = image_buffer.plane_width[plane];
457     const int h = image_buffer.plane_height[plane];
458     const uint8_t *buf = image_buffer.plane_buffer[plane].get();
459     fprintf(out_file, "%d %d\n", h, w);
460     for (int i = 0; i < w * h; ++i) {
461       fprintf(out_file, "%d ", (int)buf[i]);
462     }
463     fprintf(out_file, "\n");
464   }
465 }
466 
init_image_buffer(ImageBuffer * image_buffer,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)467 static bool init_image_buffer(ImageBuffer *image_buffer, int frame_width,
468                               int frame_height, vpx_img_fmt_t img_fmt) {
469   for (int plane = 0; plane < 3; ++plane) {
470     const int w = get_plane_width(img_fmt, frame_width, plane);
471     const int h = get_plane_height(img_fmt, frame_height, plane);
472     image_buffer->plane_width[plane] = w;
473     image_buffer->plane_height[plane] = h;
474     image_buffer->plane_buffer[plane].reset(new (std::nothrow) uint8_t[w * h]);
475     if (image_buffer->plane_buffer[plane].get() == nullptr) {
476       return false;
477     }
478   }
479   return true;
480 }
481 
ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer & image_buffer,IMAGE_BUFFER * image_buffer_c)482 static void ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer &image_buffer,
483                                         IMAGE_BUFFER *image_buffer_c) {
484   image_buffer_c->allocated = 1;
485   for (int plane = 0; plane < 3; ++plane) {
486     image_buffer_c->plane_width[plane] = image_buffer.plane_width[plane];
487     image_buffer_c->plane_height[plane] = image_buffer.plane_height[plane];
488     image_buffer_c->plane_buffer[plane] =
489         image_buffer.plane_buffer[plane].get();
490   }
491 }
492 
get_max_coding_data_byte_size(int frame_width,int frame_height)493 static size_t get_max_coding_data_byte_size(int frame_width, int frame_height) {
494   return frame_width * frame_height * 3;
495 }
496 
init_encode_frame_result(EncodeFrameResult * encode_frame_result,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)497 static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
498                                      int frame_width, int frame_height,
499                                      vpx_img_fmt_t img_fmt) {
500   const size_t max_coding_data_byte_size =
501       get_max_coding_data_byte_size(frame_width, frame_height);
502 
503   encode_frame_result->coding_data.reset(
504       new (std::nothrow) uint8_t[max_coding_data_byte_size]);
505   encode_frame_result->max_coding_data_byte_size = max_coding_data_byte_size;
506 
507   encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
508   encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
509   encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
510                                              encode_frame_result->num_cols_4x4);
511   encode_frame_result->motion_vector_info.resize(
512       encode_frame_result->num_rows_4x4 * encode_frame_result->num_cols_4x4);
513   encode_frame_result->tpl_stats_info.resize(MAX_LAG_BUFFERS);
514 
515   if (encode_frame_result->coding_data.get() == nullptr) {
516     encode_frame_result->max_coding_data_byte_size = 0;
517     return false;
518   }
519   return init_image_buffer(&encode_frame_result->coded_frame, frame_width,
520                            frame_height, img_fmt);
521 }
522 
encode_frame_result_update_rq_history(const RATE_QINDEX_HISTORY * rq_history,EncodeFrameResult * encode_frame_result)523 static void encode_frame_result_update_rq_history(
524     const RATE_QINDEX_HISTORY *rq_history,
525     EncodeFrameResult *encode_frame_result) {
526   encode_frame_result->recode_count = rq_history->recode_count;
527   for (int i = 0; i < encode_frame_result->recode_count; ++i) {
528     const int q_index = rq_history->q_index_history[i];
529     const int rate = rq_history->rate_history[i];
530     encode_frame_result->q_index_history.push_back(q_index);
531     encode_frame_result->rate_history.push_back(rate);
532   }
533 }
534 
update_encode_frame_result(EncodeFrameResult * encode_frame_result,const int show_frame_count,const ENCODE_FRAME_RESULT * encode_frame_info)535 static void update_encode_frame_result(
536     EncodeFrameResult *encode_frame_result, const int show_frame_count,
537     const ENCODE_FRAME_RESULT *encode_frame_info) {
538   encode_frame_result->coding_data_bit_size =
539       encode_frame_result->coding_data_byte_size * 8;
540   encode_frame_result->show_idx = encode_frame_info->show_idx;
541   encode_frame_result->coding_idx = encode_frame_info->frame_coding_index;
542   assert(kRefFrameTypeMax == MAX_INTER_REF_FRAMES);
543   for (int i = 0; i < kRefFrameTypeMax; ++i) {
544     encode_frame_result->ref_frame_info.coding_indexes[i] =
545         encode_frame_info->ref_frame_coding_indexes[i];
546     encode_frame_result->ref_frame_info.valid_list[i] =
547         encode_frame_info->ref_frame_valid_list[i];
548   }
549   encode_frame_result->frame_type =
550       get_frame_type_from_update_type(encode_frame_info->update_type);
551   encode_frame_result->psnr = encode_frame_info->psnr;
552   encode_frame_result->sse = encode_frame_info->sse;
553   encode_frame_result->quantize_index = encode_frame_info->quantize_index;
554   update_partition_info(encode_frame_info->partition_info,
555                         encode_frame_result->num_rows_4x4,
556                         encode_frame_result->num_cols_4x4,
557                         &encode_frame_result->partition_info[0]);
558   update_motion_vector_info(encode_frame_info->motion_vector_info,
559                             encode_frame_result->num_rows_4x4,
560                             encode_frame_result->num_cols_4x4,
561                             &encode_frame_result->motion_vector_info[0],
562                             kMotionVectorSubPixelPrecision);
563   update_frame_counts(&encode_frame_info->frame_counts,
564                       &encode_frame_result->frame_counts);
565   if (encode_frame_result->frame_type == kFrameTypeAltRef) {
566     update_tpl_stats_info(encode_frame_info->tpl_stats_info, show_frame_count,
567                           &encode_frame_result->tpl_stats_info[0]);
568   }
569   encode_frame_result_update_rq_history(&encode_frame_info->rq_history,
570                                         encode_frame_result);
571 }
572 
IncreaseGroupOfPictureIndex(GroupOfPicture * group_of_picture)573 static void IncreaseGroupOfPictureIndex(GroupOfPicture *group_of_picture) {
574   ++group_of_picture->next_encode_frame_index;
575 }
576 
IsGroupOfPictureFinished(const GroupOfPicture & group_of_picture)577 static int IsGroupOfPictureFinished(const GroupOfPicture &group_of_picture) {
578   return static_cast<size_t>(group_of_picture.next_encode_frame_index) ==
579          group_of_picture.encode_frame_list.size();
580 }
581 
operator ==(const RefFrameInfo & a,const RefFrameInfo & b)582 bool operator==(const RefFrameInfo &a, const RefFrameInfo &b) {
583   bool match = true;
584   for (int i = 0; i < kRefFrameTypeMax; ++i) {
585     match &= a.coding_indexes[i] == b.coding_indexes[i];
586     match &= a.valid_list[i] == b.valid_list[i];
587   }
588   return match;
589 }
590 
InitRefFrameInfo(RefFrameInfo * ref_frame_info)591 static void InitRefFrameInfo(RefFrameInfo *ref_frame_info) {
592   for (int i = 0; i < kRefFrameTypeMax; ++i) {
593     ref_frame_info->coding_indexes[i] = -1;
594     ref_frame_info->valid_list[i] = 0;
595   }
596 }
597 
598 // After finishing coding a frame, this function will update the coded frame
599 // into the ref_frame_info based on the frame_type and the coding_index.
PostUpdateRefFrameInfo(FrameType frame_type,int frame_coding_index,RefFrameInfo * ref_frame_info)600 static void PostUpdateRefFrameInfo(FrameType frame_type, int frame_coding_index,
601                                    RefFrameInfo *ref_frame_info) {
602   // This part is written based on the logics in vp9_configure_buffer_updates()
603   // and update_ref_frames()
604   int *ref_frame_coding_indexes = ref_frame_info->coding_indexes;
605   switch (frame_type) {
606     case kFrameTypeKey:
607       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
608       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
609       ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
610       break;
611     case kFrameTypeInter:
612       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
613       break;
614     case kFrameTypeAltRef:
615       ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
616       break;
617     case kFrameTypeOverlay:
618       // Reserve the past coding_index in the future slot. This logic is from
619       // update_ref_frames() with condition vp9_preserve_existing_gf() == 1
620       // TODO(angiebird): Invetegate why we need this.
621       ref_frame_coding_indexes[kRefFrameTypeFuture] =
622           ref_frame_coding_indexes[kRefFrameTypePast];
623       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
624       break;
625     case kFrameTypeGolden:
626       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
627       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
628       break;
629   }
630 
631   //  This part is written based on the logics in get_ref_frame_flags() but we
632   //  rename the flags alt, golden to future, past respectively. Mark
633   //  non-duplicated reference frames as valid. The priorities are
634   //  kRefFrameTypeLast > kRefFrameTypePast > kRefFrameTypeFuture.
635   const int last_index = ref_frame_coding_indexes[kRefFrameTypeLast];
636   const int past_index = ref_frame_coding_indexes[kRefFrameTypePast];
637   const int future_index = ref_frame_coding_indexes[kRefFrameTypeFuture];
638 
639   int *ref_frame_valid_list = ref_frame_info->valid_list;
640   for (int ref_frame_idx = 0; ref_frame_idx < kRefFrameTypeMax;
641        ++ref_frame_idx) {
642     ref_frame_valid_list[ref_frame_idx] = 1;
643   }
644 
645   if (past_index == last_index) {
646     ref_frame_valid_list[kRefFrameTypePast] = 0;
647   }
648 
649   if (future_index == last_index) {
650     ref_frame_valid_list[kRefFrameTypeFuture] = 0;
651   }
652 
653   if (future_index == past_index) {
654     ref_frame_valid_list[kRefFrameTypeFuture] = 0;
655   }
656 }
657 
SetGroupOfPicture(int first_is_key_frame,int use_alt_ref,int coding_frame_count,int first_show_idx,int last_gop_use_alt_ref,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)658 static void SetGroupOfPicture(int first_is_key_frame, int use_alt_ref,
659                               int coding_frame_count, int first_show_idx,
660                               int last_gop_use_alt_ref, int start_coding_index,
661                               const RefFrameInfo &start_ref_frame_info,
662                               GroupOfPicture *group_of_picture) {
663   // Clean up the state of previous group of picture.
664   group_of_picture->encode_frame_list.clear();
665   group_of_picture->next_encode_frame_index = 0;
666   group_of_picture->show_frame_count = coding_frame_count - use_alt_ref;
667   group_of_picture->start_show_index = first_show_idx;
668   group_of_picture->start_coding_index = start_coding_index;
669   group_of_picture->first_is_key_frame = first_is_key_frame;
670   group_of_picture->use_alt_ref = use_alt_ref;
671   group_of_picture->last_gop_use_alt_ref = last_gop_use_alt_ref;
672 
673   // We need to make a copy of start reference frame info because we
674   // use it to simulate the ref frame update.
675   RefFrameInfo ref_frame_info = start_ref_frame_info;
676 
677   {
678     // First frame in the group of pictures. It's either key frame or show inter
679     // frame.
680     EncodeFrameInfo encode_frame_info;
681     // Set frame_type
682     if (first_is_key_frame) {
683       encode_frame_info.frame_type = kFrameTypeKey;
684     } else {
685       if (last_gop_use_alt_ref) {
686         encode_frame_info.frame_type = kFrameTypeOverlay;
687       } else {
688         encode_frame_info.frame_type = kFrameTypeGolden;
689       }
690     }
691 
692     encode_frame_info.show_idx = first_show_idx;
693     encode_frame_info.coding_index = start_coding_index;
694 
695     encode_frame_info.ref_frame_info = ref_frame_info;
696     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
697                            encode_frame_info.coding_index, &ref_frame_info);
698 
699     group_of_picture->encode_frame_list.push_back(encode_frame_info);
700   }
701 
702   const int show_frame_count = coding_frame_count - use_alt_ref;
703   if (use_alt_ref) {
704     // If there is alternate reference, it is always coded at the second place.
705     // Its show index (or timestamp) is at the last of this group
706     EncodeFrameInfo encode_frame_info;
707     encode_frame_info.frame_type = kFrameTypeAltRef;
708     encode_frame_info.show_idx = first_show_idx + show_frame_count;
709     encode_frame_info.coding_index = start_coding_index + 1;
710 
711     encode_frame_info.ref_frame_info = ref_frame_info;
712     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
713                            encode_frame_info.coding_index, &ref_frame_info);
714 
715     group_of_picture->encode_frame_list.push_back(encode_frame_info);
716   }
717 
718   // Encode the rest show inter frames.
719   for (int i = 1; i < show_frame_count; ++i) {
720     EncodeFrameInfo encode_frame_info;
721     encode_frame_info.frame_type = kFrameTypeInter;
722     encode_frame_info.show_idx = first_show_idx + i;
723     encode_frame_info.coding_index = start_coding_index + use_alt_ref + i;
724 
725     encode_frame_info.ref_frame_info = ref_frame_info;
726     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
727                            encode_frame_info.coding_index, &ref_frame_info);
728 
729     group_of_picture->encode_frame_list.push_back(encode_frame_info);
730   }
731 }
732 
733 // Gets group of picture information from VP9's decision, and update
734 // |group_of_picture| accordingly.
735 // This is called at the starting of encoding of each group of picture.
UpdateGroupOfPicture(const VP9_COMP * cpi,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)736 static void UpdateGroupOfPicture(const VP9_COMP *cpi, int start_coding_index,
737                                  const RefFrameInfo &start_ref_frame_info,
738                                  GroupOfPicture *group_of_picture) {
739   int first_is_key_frame;
740   int use_alt_ref;
741   int coding_frame_count;
742   int first_show_idx;
743   int last_gop_use_alt_ref;
744   vp9_get_next_group_of_picture(cpi, &first_is_key_frame, &use_alt_ref,
745                                 &coding_frame_count, &first_show_idx,
746                                 &last_gop_use_alt_ref);
747   SetGroupOfPicture(first_is_key_frame, use_alt_ref, coding_frame_count,
748                     first_show_idx, last_gop_use_alt_ref, start_coding_index,
749                     start_ref_frame_info, group_of_picture);
750 }
751 
752 #define SET_STRUCT_VALUE(config, structure, ret, field) \
753   do {                                                  \
754     if (strcmp(config.name, #field) == 0) {             \
755       structure->field = atoi(config.value);            \
756       ret = 1;                                          \
757     }                                                   \
758   } while (false)
759 
UpdateEncodeConfig(const EncodeConfig & config,VP9EncoderConfig * oxcf)760 static void UpdateEncodeConfig(const EncodeConfig &config,
761                                VP9EncoderConfig *oxcf) {
762   int ret = 0;
763   SET_STRUCT_VALUE(config, oxcf, ret, key_freq);
764   SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmin_section);
765   SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmax_section);
766   SET_STRUCT_VALUE(config, oxcf, ret, under_shoot_pct);
767   SET_STRUCT_VALUE(config, oxcf, ret, over_shoot_pct);
768   SET_STRUCT_VALUE(config, oxcf, ret, max_threads);
769   SET_STRUCT_VALUE(config, oxcf, ret, frame_parallel_decoding_mode);
770   SET_STRUCT_VALUE(config, oxcf, ret, tile_columns);
771   SET_STRUCT_VALUE(config, oxcf, ret, arnr_max_frames);
772   SET_STRUCT_VALUE(config, oxcf, ret, arnr_strength);
773   SET_STRUCT_VALUE(config, oxcf, ret, lag_in_frames);
774   SET_STRUCT_VALUE(config, oxcf, ret, encode_breakout);
775   SET_STRUCT_VALUE(config, oxcf, ret, enable_tpl_model);
776   SET_STRUCT_VALUE(config, oxcf, ret, enable_auto_arf);
777   if (strcmp(config.name, "rc_mode") == 0) {
778     int rc_mode = atoi(config.value);
779     if (rc_mode >= VPX_VBR && rc_mode <= VPX_Q) {
780       oxcf->rc_mode = (enum vpx_rc_mode)rc_mode;
781       ret = 1;
782     } else {
783       fprintf(stderr, "Invalid rc_mode value: %d\n", rc_mode);
784     }
785   }
786   SET_STRUCT_VALUE(config, oxcf, ret, cq_level);
787   if (ret == 0) {
788     fprintf(stderr, "Ignored unsupported encode_config %s\n", config.name);
789   }
790 }
791 
GetEncodeConfig(int frame_width,int frame_height,vpx_rational_t frame_rate,int target_bitrate,int encode_speed,int target_level,vpx_enc_pass enc_pass,const std::vector<EncodeConfig> & encode_config_list)792 static VP9EncoderConfig GetEncodeConfig(
793     int frame_width, int frame_height, vpx_rational_t frame_rate,
794     int target_bitrate, int encode_speed, int target_level,
795     vpx_enc_pass enc_pass,
796     const std::vector<EncodeConfig> &encode_config_list) {
797   VP9EncoderConfig oxcf = vp9_get_encoder_config(
798       frame_width, frame_height, frame_rate, target_bitrate, encode_speed,
799       target_level, enc_pass);
800   for (const auto &config : encode_config_list) {
801     UpdateEncodeConfig(config, &oxcf);
802   }
803   if (enc_pass == VPX_RC_FIRST_PASS) {
804     oxcf.lag_in_frames = 0;
805   }
806   oxcf.use_simple_encode_api = 1;
807   return oxcf;
808 }
809 
SimpleEncode(int frame_width,int frame_height,int frame_rate_num,int frame_rate_den,int target_bitrate,int num_frames,int target_level,const char * infile_path,const char * outfile_path)810 SimpleEncode::SimpleEncode(int frame_width, int frame_height,
811                            int frame_rate_num, int frame_rate_den,
812                            int target_bitrate, int num_frames, int target_level,
813                            const char *infile_path, const char *outfile_path) {
814   impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
815   frame_width_ = frame_width;
816   frame_height_ = frame_height;
817   frame_rate_num_ = frame_rate_num;
818   frame_rate_den_ = frame_rate_den;
819   target_bitrate_ = target_bitrate;
820   num_frames_ = num_frames;
821   encode_speed_ = 0;
822   target_level_ = target_level;
823 
824   frame_coding_index_ = 0;
825   show_frame_count_ = 0;
826 
827   key_frame_group_index_ = 0;
828   key_frame_group_size_ = 0;
829 
830   // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
831   assert(infile_path != nullptr);
832   in_file_ = fopen(infile_path, "r");
833   if (outfile_path != nullptr) {
834     out_file_ = fopen(outfile_path, "w");
835   } else {
836     out_file_ = nullptr;
837   }
838   impl_ptr_->cpi = nullptr;
839   impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
840 
841   InitRefFrameInfo(&ref_frame_info_);
842 }
843 
SetEncodeSpeed(int encode_speed)844 void SimpleEncode::SetEncodeSpeed(int encode_speed) {
845   encode_speed_ = encode_speed;
846 }
847 
SetEncodeConfig(const char * name,const char * value)848 StatusCode SimpleEncode::SetEncodeConfig(const char *name, const char *value) {
849   if (name == nullptr || value == nullptr) {
850     fprintf(stderr, "SetEncodeConfig: null pointer, name %p value %p\n", name,
851             value);
852     return StatusError;
853   }
854   EncodeConfig config;
855   snprintf(config.name, ENCODE_CONFIG_BUF_SIZE, "%s", name);
856   snprintf(config.value, ENCODE_CONFIG_BUF_SIZE, "%s", value);
857   impl_ptr_->encode_config_list.push_back(config);
858   return StatusOk;
859 }
860 
DumpEncodeConfigs(int pass,FILE * fp)861 StatusCode SimpleEncode::DumpEncodeConfigs(int pass, FILE *fp) {
862   if (fp == nullptr) {
863     fprintf(stderr, "DumpEncodeConfigs: null pointer, fp %p\n", fp);
864     return StatusError;
865   }
866   vpx_enc_pass enc_pass;
867   if (pass == 1) {
868     enc_pass = VPX_RC_FIRST_PASS;
869   } else {
870     enc_pass = VPX_RC_LAST_PASS;
871   }
872   const vpx_rational_t frame_rate =
873       make_vpx_rational(frame_rate_num_, frame_rate_den_);
874   const VP9EncoderConfig oxcf = GetEncodeConfig(
875       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
876       target_level_, enc_pass, impl_ptr_->encode_config_list);
877   vp9_dump_encoder_config(&oxcf, fp);
878   return StatusOk;
879 }
880 
ComputeFirstPassStats()881 void SimpleEncode::ComputeFirstPassStats() {
882   vpx_rational_t frame_rate =
883       make_vpx_rational(frame_rate_num_, frame_rate_den_);
884   const VP9EncoderConfig oxcf = GetEncodeConfig(
885       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
886       target_level_, VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);
887   impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
888   struct lookahead_ctx *lookahead = impl_ptr_->cpi->lookahead;
889   int i;
890   int use_highbitdepth = 0;
891   const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
892   const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
893 #if CONFIG_VP9_HIGHBITDEPTH
894   use_highbitdepth = impl_ptr_->cpi->common.use_highbitdepth;
895 #endif
896   vpx_image_t img;
897   if (impl_ptr_->img_fmt == VPX_IMG_FMT_NV12) {
898     fprintf(stderr, "VPX_IMG_FMT_NV12 is not supported\n");
899     abort();
900   }
901   vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
902   rewind(in_file_);
903   impl_ptr_->first_pass_stats.clear();
904   for (i = 0; i < num_frames_; ++i) {
905     assert(!vp9_lookahead_full(lookahead));
906     if (img_read(&img, in_file_)) {
907       int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
908       int64_t ts_start =
909           timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
910       int64_t ts_end =
911           timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
912       YV12_BUFFER_CONFIG sd;
913       image2yuvconfig(&img, &sd);
914       vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
915       {
916         int64_t time_stamp;
917         int64_t time_end;
918         int flush = 1;  // Makes vp9_get_compressed_data process a frame
919         size_t size;
920         unsigned int frame_flags = 0;
921         ENCODE_FRAME_RESULT encode_frame_info;
922         vp9_init_encode_frame_result(&encode_frame_info);
923         // TODO(angiebird): Call vp9_first_pass directly
924         vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr, 0,
925                                 &time_stamp, &time_end, flush,
926                                 &encode_frame_info);
927         // vp9_get_compressed_data only generates first pass stats not
928         // compresses data
929         assert(size == 0);
930         // Get vp9 first pass motion vector info.
931         std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
932         update_motion_vector_info(
933             impl_ptr_->cpi->fp_motion_vector_info, num_rows_16x16,
934             num_cols_16x16, mv_info.data(), kMotionVectorFullPixelPrecision);
935         fp_motion_vector_info_.push_back(mv_info);
936       }
937       impl_ptr_->first_pass_stats.push_back(
938           vp9_get_frame_stats(&impl_ptr_->cpi->twopass));
939     }
940   }
941   // TODO(angiebird): Store the total_stats apart form first_pass_stats
942   impl_ptr_->first_pass_stats.push_back(
943       vp9_get_total_stats(&impl_ptr_->cpi->twopass));
944   vp9_end_first_pass(impl_ptr_->cpi);
945 
946   // Generate key_frame_map based on impl_ptr_->first_pass_stats.
947   key_frame_map_ = ComputeKeyFrameMap();
948 
949   free_encoder(impl_ptr_->cpi);
950   impl_ptr_->cpi = nullptr;
951   rewind(in_file_);
952   vpx_img_free(&img);
953 }
954 
ObserveFirstPassStats()955 std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
956   std::vector<std::vector<double>> output_stats;
957   // TODO(angiebird): This function make several assumptions of
958   // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
959   // last one. 2) The last entry of first_pass_stats is the total_stats.
960   // Change the code structure, so that we don't have to make these assumptions
961 
962   // Note the last entry of first_pass_stats is the total_stats, we don't need
963   // it.
964   for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
965     double *buf_start =
966         reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
967     // We use - 1 here because the last member in FIRSTPASS_STATS is not double
968     double *buf_end =
969         buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
970         1;
971     std::vector<double> this_stats(buf_start, buf_end);
972     output_stats.push_back(this_stats);
973   }
974   return output_stats;
975 }
976 
977 std::vector<std::vector<MotionVectorInfo>>
ObserveFirstPassMotionVectors()978 SimpleEncode::ObserveFirstPassMotionVectors() {
979   return fp_motion_vector_info_;
980 }
981 
SetExternalGroupOfPicturesMap(int * gop_map,int gop_map_size)982 void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
983                                                  int gop_map_size) {
984   for (int i = 0; i < gop_map_size; ++i) {
985     gop_map_.push_back(gop_map[i]);
986   }
987   // The following will check and modify gop_map_ to make sure the
988   // gop_map_ satisfies the constraints.
989   // 1) Each key frame position should be at the start of a gop.
990   // 2) The last gop should not use an alt ref.
991   assert(gop_map_.size() == key_frame_map_.size());
992   int last_gop_start = 0;
993   for (int i = 0; static_cast<size_t>(i) < gop_map_.size(); ++i) {
994     if (key_frame_map_[i] == 1 && gop_map_[i] == 0) {
995       fprintf(stderr, "Add an extra gop start at show_idx %d\n", i);
996       // Insert a gop start at key frame location.
997       gop_map_[i] |= kGopMapFlagStart;
998       gop_map_[i] |= kGopMapFlagUseAltRef;
999     }
1000     if (gop_map_[i] & kGopMapFlagStart) {
1001       last_gop_start = i;
1002     }
1003   }
1004   if (gop_map_[last_gop_start] & kGopMapFlagUseAltRef) {
1005     fprintf(stderr,
1006             "Last group of pictures starting at show_idx %d shouldn't use alt "
1007             "ref\n",
1008             last_gop_start);
1009     gop_map_[last_gop_start] &= ~kGopMapFlagUseAltRef;
1010   }
1011 }
1012 
ObserveExternalGroupOfPicturesMap()1013 std::vector<int> SimpleEncode::ObserveExternalGroupOfPicturesMap() {
1014   return gop_map_;
1015 }
1016 
1017 template <typename T>
GetVectorData(const std::vector<T> & v)1018 T *GetVectorData(const std::vector<T> &v) {
1019   if (v.empty()) {
1020     return nullptr;
1021   }
1022   return const_cast<T *>(v.data());
1023 }
1024 
GetGopCommand(const std::vector<int> & gop_map,int start_show_index)1025 static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,
1026                                  int start_show_index) {
1027   GOP_COMMAND gop_command;
1028   if (static_cast<size_t>(start_show_index) < gop_map.size()) {
1029     assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);
1030     int end_show_index = start_show_index + 1;
1031     // gop_map[end_show_index] & kGopMapFlagStart == 0 means this is
1032     // the start of a gop.
1033     while (static_cast<size_t>(end_show_index) < gop_map.size() &&
1034            (gop_map[end_show_index] & kGopMapFlagStart) == 0) {
1035       ++end_show_index;
1036     }
1037     const int show_frame_count = end_show_index - start_show_index;
1038     int use_alt_ref = (gop_map[start_show_index] & kGopMapFlagUseAltRef) != 0;
1039     if (static_cast<size_t>(end_show_index) == gop_map.size()) {
1040       // This is the last gop group, there must be no altref.
1041       use_alt_ref = 0;
1042     }
1043     gop_command_on(&gop_command, show_frame_count, use_alt_ref);
1044   } else {
1045     gop_command_off(&gop_command);
1046   }
1047   return gop_command;
1048 }
1049 
StartEncode()1050 void SimpleEncode::StartEncode() {
1051   assert(impl_ptr_->first_pass_stats.size() > 0);
1052   vpx_rational_t frame_rate =
1053       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1054   VP9EncoderConfig oxcf = GetEncodeConfig(
1055       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1056       target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1057 
1058   vpx_fixed_buf_t stats;
1059   stats.buf = GetVectorData(impl_ptr_->first_pass_stats);
1060   stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
1061              impl_ptr_->first_pass_stats.size();
1062 
1063   vp9_set_first_pass_stats(&oxcf, &stats);
1064   assert(impl_ptr_->cpi == nullptr);
1065   impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
1066   if (impl_ptr_->img_fmt == VPX_IMG_FMT_NV12) {
1067     fprintf(stderr, "VPX_IMG_FMT_NV12 is not supported\n");
1068     abort();
1069   }
1070   vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
1071                 frame_height_, 1);
1072 
1073   frame_coding_index_ = 0;
1074   show_frame_count_ = 0;
1075 
1076   assert(impl_ptr_->cpi != nullptr);
1077   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1078   unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1079   vp9_init_vizier_params(&impl_ptr_->cpi->twopass, screen_area);
1080 
1081   UpdateKeyFrameGroup(show_frame_count_);
1082 
1083   const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1084   encode_command_set_gop_command(&impl_ptr_->cpi->encode_command, gop_command);
1085   UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1086                        &group_of_picture_);
1087   rewind(in_file_);
1088 
1089   if (out_file_ != nullptr) {
1090     const char *fourcc = "VP90";
1091     // In SimpleEncode, we use time_base = 1 / TICKS_PER_SEC.
1092     // Based on that, the ivf_timestamp for each image is set to
1093     // show_idx * TICKS_PER_SEC / frame_rate
1094     // such that each image's actual timestamp in seconds can be computed as
1095     // ivf_timestamp * time_base == show_idx / frame_rate
1096     // TODO(angiebird): 1) Add unit test for ivf timestamp.
1097     // 2) Simplify the frame_rate setting process.
1098     vpx_rational_t time_base = make_vpx_rational(1, TICKS_PER_SEC);
1099     ivf_write_file_header_with_video_info(out_file_, *(const uint32_t *)fourcc,
1100                                           num_frames_, frame_width_,
1101                                           frame_height_, time_base);
1102   }
1103 }
1104 
EndEncode()1105 void SimpleEncode::EndEncode() {
1106   free_encoder(impl_ptr_->cpi);
1107   impl_ptr_->cpi = nullptr;
1108   vpx_img_free(&impl_ptr_->tmp_img);
1109   rewind(in_file_);
1110 }
1111 
UpdateKeyFrameGroup(int key_frame_show_index)1112 void SimpleEncode::UpdateKeyFrameGroup(int key_frame_show_index) {
1113   const VP9_COMP *cpi = impl_ptr_->cpi;
1114   key_frame_group_index_ = 0;
1115   key_frame_group_size_ = vp9_get_frames_to_next_key(
1116       &cpi->oxcf, &cpi->twopass, key_frame_show_index, cpi->rc.min_gf_interval);
1117   assert(key_frame_group_size_ > 0);
1118   // Init the reference frame info when a new key frame group appears.
1119   InitRefFrameInfo(&ref_frame_info_);
1120 }
1121 
PostUpdateKeyFrameGroupIndex(FrameType frame_type)1122 void SimpleEncode::PostUpdateKeyFrameGroupIndex(FrameType frame_type) {
1123   if (frame_type != kFrameTypeAltRef) {
1124     // key_frame_group_index_ only counts show frames
1125     ++key_frame_group_index_;
1126   }
1127 }
1128 
GetKeyFrameGroupSize() const1129 int SimpleEncode::GetKeyFrameGroupSize() const { return key_frame_group_size_; }
1130 
ObserveGroupOfPicture() const1131 GroupOfPicture SimpleEncode::ObserveGroupOfPicture() const {
1132   return group_of_picture_;
1133 }
1134 
GetNextEncodeFrameInfo() const1135 EncodeFrameInfo SimpleEncode::GetNextEncodeFrameInfo() const {
1136   return group_of_picture_
1137       .encode_frame_list[group_of_picture_.next_encode_frame_index];
1138 }
1139 
PostUpdateState(const EncodeFrameResult & encode_frame_result)1140 void SimpleEncode::PostUpdateState(
1141     const EncodeFrameResult &encode_frame_result) {
1142   // This function needs to be called before the increament of
1143   // frame_coding_index_
1144   PostUpdateRefFrameInfo(encode_frame_result.frame_type, frame_coding_index_,
1145                          &ref_frame_info_);
1146   ++frame_coding_index_;
1147   if (encode_frame_result.frame_type != kFrameTypeAltRef) {
1148     // Only kFrameTypeAltRef is not a show frame
1149     ++show_frame_count_;
1150   }
1151 
1152   PostUpdateKeyFrameGroupIndex(encode_frame_result.frame_type);
1153   if (key_frame_group_index_ == key_frame_group_size_) {
1154     UpdateKeyFrameGroup(show_frame_count_);
1155   }
1156 
1157   IncreaseGroupOfPictureIndex(&group_of_picture_);
1158   if (IsGroupOfPictureFinished(group_of_picture_)) {
1159     const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1160     encode_command_set_gop_command(&impl_ptr_->cpi->encode_command,
1161                                    gop_command);
1162     // This function needs to be called after ref_frame_info_ is updated
1163     // properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup().
1164     UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1165                          &group_of_picture_);
1166   }
1167 }
1168 
EncodeFrame(EncodeFrameResult * encode_frame_result)1169 void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
1170   VP9_COMP *cpi = impl_ptr_->cpi;
1171   struct lookahead_ctx *lookahead = cpi->lookahead;
1172   int use_highbitdepth = 0;
1173 #if CONFIG_VP9_HIGHBITDEPTH
1174   use_highbitdepth = cpi->common.use_highbitdepth;
1175 #endif
1176   // The lookahead's size is set to oxcf->lag_in_frames.
1177   // We want to fill lookahead to it's max capacity if possible so that the
1178   // encoder can construct alt ref frame in time.
1179   // In the other words, we hope vp9_get_compressed_data to encode a frame
1180   // every time in the function
1181   while (!vp9_lookahead_full(lookahead)) {
1182     // TODO(angiebird): Check whether we can move this file read logics to
1183     // lookahead
1184     if (img_read(&impl_ptr_->tmp_img, in_file_)) {
1185       int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
1186       int64_t ts_start =
1187           timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
1188       int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
1189                                                next_show_idx + 1);
1190       YV12_BUFFER_CONFIG sd;
1191       image2yuvconfig(&impl_ptr_->tmp_img, &sd);
1192       vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
1193     } else {
1194       break;
1195     }
1196   }
1197 
1198   if (init_encode_frame_result(encode_frame_result, frame_width_, frame_height_,
1199                                impl_ptr_->img_fmt)) {
1200     int64_t time_stamp;
1201     int64_t time_end;
1202     int flush = 1;  // Make vp9_get_compressed_data encode a frame
1203     unsigned int frame_flags = 0;
1204     ENCODE_FRAME_RESULT encode_frame_info;
1205     vp9_init_encode_frame_result(&encode_frame_info);
1206     ImageBuffer_to_IMAGE_BUFFER(encode_frame_result->coded_frame,
1207                                 &encode_frame_info.coded_frame);
1208     vp9_get_compressed_data(cpi, &frame_flags,
1209                             &encode_frame_result->coding_data_byte_size,
1210                             encode_frame_result->coding_data.get(),
1211                             encode_frame_result->max_coding_data_byte_size,
1212                             &time_stamp, &time_end, flush, &encode_frame_info);
1213     if (out_file_ != nullptr) {
1214       ivf_write_frame_header(out_file_, time_stamp,
1215                              encode_frame_result->coding_data_byte_size);
1216       fwrite(encode_frame_result->coding_data.get(), 1,
1217              encode_frame_result->coding_data_byte_size, out_file_);
1218     }
1219 
1220     // vp9_get_compressed_data is expected to encode a frame every time, so the
1221     // data size should be greater than zero.
1222     if (encode_frame_result->coding_data_byte_size <= 0) {
1223       fprintf(stderr, "Coding data size <= 0\n");
1224       abort();
1225     }
1226     if (encode_frame_result->coding_data_byte_size >
1227         encode_frame_result->max_coding_data_byte_size) {
1228       fprintf(stderr, "Coding data size exceeds the maximum.\n");
1229       abort();
1230     }
1231 
1232     const GroupOfPicture group_of_picture = this->ObserveGroupOfPicture();
1233     const int show_frame_count = group_of_picture.show_frame_count;
1234     update_encode_frame_result(encode_frame_result, show_frame_count,
1235                                &encode_frame_info);
1236     PostUpdateState(*encode_frame_result);
1237   } else {
1238     // TODO(angiebird): Clean up encode_frame_result.
1239     fprintf(stderr, "init_encode_frame_result() failed.\n");
1240     this->EndEncode();
1241   }
1242 }
1243 
EncodeFrameWithQuantizeIndex(EncodeFrameResult * encode_frame_result,int quantize_index)1244 void SimpleEncode::EncodeFrameWithQuantizeIndex(
1245     EncodeFrameResult *encode_frame_result, int quantize_index) {
1246   encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
1247                                              quantize_index);
1248   EncodeFrame(encode_frame_result);
1249   encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
1250 }
1251 
EncodeFrameWithTargetFrameBits(EncodeFrameResult * encode_frame_result,int target_frame_bits,double percent_diff)1252 void SimpleEncode::EncodeFrameWithTargetFrameBits(
1253     EncodeFrameResult *encode_frame_result, int target_frame_bits,
1254     double percent_diff) {
1255   encode_command_set_target_frame_bits(&impl_ptr_->cpi->encode_command,
1256                                        target_frame_bits, percent_diff);
1257   EncodeFrame(encode_frame_result);
1258   encode_command_reset_target_frame_bits(&impl_ptr_->cpi->encode_command);
1259 }
1260 
GetCodingFrameNumFromGopMap(const std::vector<int> & gop_map)1261 static int GetCodingFrameNumFromGopMap(const std::vector<int> &gop_map) {
1262   int start_show_index = 0;
1263   int coding_frame_count = 0;
1264   while (static_cast<size_t>(start_show_index) < gop_map.size()) {
1265     const GOP_COMMAND gop_command = GetGopCommand(gop_map, start_show_index);
1266     start_show_index += gop_command.show_frame_count;
1267     coding_frame_count += gop_command_coding_frame_count(&gop_command);
1268   }
1269   assert(static_cast<size_t>(start_show_index) == gop_map.size());
1270   return coding_frame_count;
1271 }
1272 
GetCodingFrameNum() const1273 int SimpleEncode::GetCodingFrameNum() const {
1274   assert(impl_ptr_->first_pass_stats.size() > 0);
1275   if (gop_map_.size() > 0) {
1276     return GetCodingFrameNumFromGopMap(gop_map_);
1277   }
1278 
1279   // These are the default settings for now.
1280   TWO_PASS twopass;
1281   const int multi_layer_arf = 0;
1282   const int allow_alt_ref = 1;
1283   vpx_rational_t frame_rate =
1284       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1285   const VP9EncoderConfig oxcf = GetEncodeConfig(
1286       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1287       target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1288   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1289   fps_init_first_pass_info(&twopass.first_pass_info,
1290                            GetVectorData(impl_ptr_->first_pass_stats),
1291                            num_frames_);
1292   unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1293   vp9_init_vizier_params(&twopass, screen_area);
1294   return vp9_get_coding_frame_num(&oxcf, &twopass, &frame_info, multi_layer_arf,
1295                                   allow_alt_ref);
1296 }
1297 
ComputeKeyFrameMap() const1298 std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {
1299   // The last entry of first_pass_stats is the overall stats.
1300   assert(impl_ptr_->first_pass_stats.size() ==
1301          static_cast<size_t>(num_frames_) + 1);
1302   vpx_rational_t frame_rate =
1303       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1304   const VP9EncoderConfig oxcf = GetEncodeConfig(
1305       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1306       target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1307   TWO_PASS twopass;
1308   fps_init_first_pass_info(&twopass.first_pass_info,
1309                            GetVectorData(impl_ptr_->first_pass_stats),
1310                            num_frames_);
1311   std::vector<int> key_frame_map(num_frames_, 0);
1312   vp9_get_key_frame_map(&oxcf, &twopass, GetVectorData(key_frame_map));
1313   return key_frame_map;
1314 }
1315 
ObserveKeyFrameMap() const1316 std::vector<int> SimpleEncode::ObserveKeyFrameMap() const {
1317   return key_frame_map_;
1318 }
1319 
GetFramePixelCount() const1320 uint64_t SimpleEncode::GetFramePixelCount() const {
1321   assert(frame_width_ % 2 == 0);
1322   assert(frame_height_ % 2 == 0);
1323   switch (impl_ptr_->img_fmt) {
1324     case VPX_IMG_FMT_I420: return frame_width_ * frame_height_ * 3 / 2;
1325     case VPX_IMG_FMT_I422: return frame_width_ * frame_height_ * 2;
1326     case VPX_IMG_FMT_I444: return frame_width_ * frame_height_ * 3;
1327     case VPX_IMG_FMT_I440: return frame_width_ * frame_height_ * 2;
1328     case VPX_IMG_FMT_I42016: return frame_width_ * frame_height_ * 3 / 2;
1329     case VPX_IMG_FMT_I42216: return frame_width_ * frame_height_ * 2;
1330     case VPX_IMG_FMT_I44416: return frame_width_ * frame_height_ * 3;
1331     case VPX_IMG_FMT_I44016: return frame_width_ * frame_height_ * 2;
1332     default: return 0;
1333   }
1334 }
1335 
~SimpleEncode()1336 SimpleEncode::~SimpleEncode() {
1337   if (in_file_ != nullptr) {
1338     fclose(in_file_);
1339   }
1340   if (out_file_ != nullptr) {
1341     fclose(out_file_);
1342   }
1343 }
1344 
1345 }  // namespace vp9
1346