xref: /aosp_15_r20/external/libaom/examples/svc_encoder_rtc.cc (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 //  This is an example demonstrating how to implement a multi-layer AOM
13 //  encoding scheme for RTC video applications.
14 
15 #include <assert.h>
16 #include <limits.h>
17 #include <math.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include <memory>
23 
24 #include "config/aom_config.h"
25 
26 #if CONFIG_AV1_DECODER
27 #include "aom/aom_decoder.h"
28 #endif
29 #include "aom/aom_encoder.h"
30 #include "aom/aomcx.h"
31 #include "common/args.h"
32 #include "common/tools_common.h"
33 #include "common/video_writer.h"
34 #include "examples/encoder_util.h"
35 #include "aom_ports/aom_timer.h"
36 #include "av1/ratectrl_rtc.h"
37 
38 #define OPTION_BUFFER_SIZE 1024
39 
40 typedef struct {
41   const char *output_filename;
42   char options[OPTION_BUFFER_SIZE];
43   struct AvxInputContext input_ctx;
44   int speed;
45   int aq_mode;
46   int layering_mode;
47   int output_obu;
48   int decode;
49   int tune_content;
50   int show_psnr;
51   bool use_external_rc;
52 } AppInput;
53 
54 typedef enum {
55   QUANTIZER = 0,
56   BITRATE,
57   SCALE_FACTOR,
58   AUTO_ALT_REF,
59   ALL_OPTION_TYPES
60 } LAYER_OPTION_TYPE;
61 
62 static const arg_def_t outputfile =
63     ARG_DEF("o", "output", 1, "Output filename");
64 static const arg_def_t frames_arg =
65     ARG_DEF("f", "frames", 1, "Number of frames to encode");
66 static const arg_def_t threads_arg =
67     ARG_DEF("th", "threads", 1, "Number of threads to use");
68 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
69 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
70 static const arg_def_t timebase_arg =
71     ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
72 static const arg_def_t bitrate_arg = ARG_DEF(
73     "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
74 static const arg_def_t spatial_layers_arg =
75     ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
76 static const arg_def_t temporal_layers_arg =
77     ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
78 static const arg_def_t layering_mode_arg =
79     ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
80 static const arg_def_t kf_dist_arg =
81     ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
82 static const arg_def_t scale_factors_arg =
83     ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
84 static const arg_def_t min_q_arg =
85     ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
86 static const arg_def_t max_q_arg =
87     ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
88 static const arg_def_t speed_arg =
89     ARG_DEF("sp", "speed", 1, "Speed configuration");
90 static const arg_def_t aqmode_arg =
91     ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
92 static const arg_def_t bitrates_arg =
93     ARG_DEF("bl", "bitrates", 1,
94             "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
95 static const arg_def_t dropframe_thresh_arg =
96     ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
97 static const arg_def_t error_resilient_arg =
98     ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
99 static const arg_def_t output_obu_arg =
100     ARG_DEF(NULL, "output-obu", 1,
101             "Write OBUs when set to 1. Otherwise write IVF files.");
102 static const arg_def_t test_decode_arg =
103     ARG_DEF(NULL, "test-decode", 1,
104             "Attempt to test decoding the output when set to 1. Default is 1.");
105 static const arg_def_t psnr_arg =
106     ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
107 static const arg_def_t ext_rc_arg =
108     ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
109 static const struct arg_enum_list tune_content_enum[] = {
110   { "default", AOM_CONTENT_DEFAULT },
111   { "screen", AOM_CONTENT_SCREEN },
112   { "film", AOM_CONTENT_FILM },
113   { NULL, 0 }
114 };
115 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
116     NULL, "tune-content", 1, "Tune content type", tune_content_enum);
117 
118 #if CONFIG_AV1_HIGHBITDEPTH
119 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
120                                                       { "10", AOM_BITS_10 },
121                                                       { NULL, 0 } };
122 
123 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
124     "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
125 #endif  // CONFIG_AV1_HIGHBITDEPTH
126 
127 static const arg_def_t *svc_args[] = {
128   &frames_arg,          &outputfile,     &width_arg,
129   &height_arg,          &timebase_arg,   &bitrate_arg,
130   &spatial_layers_arg,  &kf_dist_arg,    &scale_factors_arg,
131   &min_q_arg,           &max_q_arg,      &temporal_layers_arg,
132   &layering_mode_arg,   &threads_arg,    &aqmode_arg,
133 #if CONFIG_AV1_HIGHBITDEPTH
134   &bitdepth_arg,
135 #endif
136   &speed_arg,           &bitrates_arg,   &dropframe_thresh_arg,
137   &error_resilient_arg, &output_obu_arg, &test_decode_arg,
138   &tune_content_arg,    &psnr_arg,       NULL,
139 };
140 
141 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
142 
143 static const char *exec_name;
144 
usage_exit(void)145 void usage_exit(void) {
146   fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
147           exec_name);
148   fprintf(stderr, "Options:\n");
149   arg_show_usage(stderr, svc_args);
150   exit(EXIT_FAILURE);
151 }
152 
file_is_y4m(const char detect[4])153 static int file_is_y4m(const char detect[4]) {
154   return memcmp(detect, "YUV4", 4) == 0;
155 }
156 
fourcc_is_ivf(const char detect[4])157 static int fourcc_is_ivf(const char detect[4]) {
158   if (memcmp(detect, "DKIF", 4) == 0) {
159     return 1;
160   }
161   return 0;
162 }
163 
164 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
165                                                          1 };
166 
167 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
168 
open_input_file(struct AvxInputContext * input,aom_chroma_sample_position_t csp)169 static void open_input_file(struct AvxInputContext *input,
170                             aom_chroma_sample_position_t csp) {
171   /* Parse certain options from the input file, if possible */
172   input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
173                                              : set_binary_mode(stdin);
174 
175   if (!input->file) fatal("Failed to open input file");
176 
177   if (!fseeko(input->file, 0, SEEK_END)) {
178     /* Input file is seekable. Figure out how long it is, so we can get
179      * progress info.
180      */
181     input->length = ftello(input->file);
182     rewind(input->file);
183   }
184 
185   /* Default to 1:1 pixel aspect ratio. */
186   input->pixel_aspect_ratio.numerator = 1;
187   input->pixel_aspect_ratio.denominator = 1;
188 
189   /* For RAW input sources, these bytes will applied on the first frame
190    *  in read_frame().
191    */
192   input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
193   input->detect.position = 0;
194 
195   if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
196     if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
197                        input->only_i420) >= 0) {
198       input->file_type = FILE_TYPE_Y4M;
199       input->width = input->y4m.pic_w;
200       input->height = input->y4m.pic_h;
201       input->pixel_aspect_ratio.numerator = input->y4m.par_n;
202       input->pixel_aspect_ratio.denominator = input->y4m.par_d;
203       input->framerate.numerator = input->y4m.fps_n;
204       input->framerate.denominator = input->y4m.fps_d;
205       input->fmt = input->y4m.aom_fmt;
206       input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
207     } else {
208       fatal("Unsupported Y4M stream.");
209     }
210   } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
211     fatal("IVF is not supported as input.");
212   } else {
213     input->file_type = FILE_TYPE_RAW;
214   }
215 }
216 
extract_option(LAYER_OPTION_TYPE type,char * input,int * value0,int * value1)217 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
218                                       int *value0, int *value1) {
219   if (type == SCALE_FACTOR) {
220     *value0 = (int)strtol(input, &input, 10);
221     if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
222     *value1 = (int)strtol(input, &input, 10);
223 
224     if (*value0 < option_min_values[SCALE_FACTOR] ||
225         *value1 < option_min_values[SCALE_FACTOR] ||
226         *value0 > option_max_values[SCALE_FACTOR] ||
227         *value1 > option_max_values[SCALE_FACTOR] ||
228         *value0 > *value1)  // num shouldn't be greater than den
229       return AOM_CODEC_INVALID_PARAM;
230   } else {
231     *value0 = atoi(input);
232     if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
233       return AOM_CODEC_INVALID_PARAM;
234   }
235   return AOM_CODEC_OK;
236 }
237 
parse_layer_options_from_string(aom_svc_params_t * svc_params,LAYER_OPTION_TYPE type,const char * input,int * option0,int * option1)238 static aom_codec_err_t parse_layer_options_from_string(
239     aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
240     int *option0, int *option1) {
241   aom_codec_err_t res = AOM_CODEC_OK;
242   char *input_string;
243   char *token;
244   const char *delim = ",";
245   int num_layers = svc_params->number_spatial_layers;
246   int i = 0;
247 
248   if (type == BITRATE)
249     num_layers =
250         svc_params->number_spatial_layers * svc_params->number_temporal_layers;
251 
252   if (input == NULL || option0 == NULL ||
253       (option1 == NULL && type == SCALE_FACTOR))
254     return AOM_CODEC_INVALID_PARAM;
255 
256   const size_t input_length = strlen(input);
257   input_string = reinterpret_cast<char *>(malloc(input_length + 1));
258   if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
259   memcpy(input_string, input, input_length + 1);
260   token = strtok(input_string, delim);  // NOLINT
261   for (i = 0; i < num_layers; ++i) {
262     if (token != NULL) {
263       res = extract_option(type, token, option0 + i, option1 + i);
264       if (res != AOM_CODEC_OK) break;
265       token = strtok(NULL, delim);  // NOLINT
266     } else {
267       res = AOM_CODEC_INVALID_PARAM;
268       break;
269     }
270   }
271   free(input_string);
272   return res;
273 }
274 
parse_command_line(int argc,const char ** argv_,AppInput * app_input,aom_svc_params_t * svc_params,aom_codec_enc_cfg_t * enc_cfg)275 static void parse_command_line(int argc, const char **argv_,
276                                AppInput *app_input,
277                                aom_svc_params_t *svc_params,
278                                aom_codec_enc_cfg_t *enc_cfg) {
279   struct arg arg;
280   char **argv = NULL;
281   char **argi = NULL;
282   char **argj = NULL;
283   char string_options[1024] = { 0 };
284 
285   // Default settings
286   svc_params->number_spatial_layers = 1;
287   svc_params->number_temporal_layers = 1;
288   app_input->layering_mode = 0;
289   app_input->output_obu = 0;
290   app_input->decode = 1;
291   enc_cfg->g_threads = 1;
292   enc_cfg->rc_end_usage = AOM_CBR;
293 
294   // process command line options
295   argv = argv_dup(argc - 1, argv_ + 1);
296   if (!argv) {
297     fprintf(stderr, "Error allocating argument list\n");
298     exit(EXIT_FAILURE);
299   }
300   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
301     arg.argv_step = 1;
302 
303     if (arg_match(&arg, &outputfile, argi)) {
304       app_input->output_filename = arg.val;
305     } else if (arg_match(&arg, &width_arg, argi)) {
306       enc_cfg->g_w = arg_parse_uint(&arg);
307     } else if (arg_match(&arg, &height_arg, argi)) {
308       enc_cfg->g_h = arg_parse_uint(&arg);
309     } else if (arg_match(&arg, &timebase_arg, argi)) {
310       enc_cfg->g_timebase = arg_parse_rational(&arg);
311     } else if (arg_match(&arg, &bitrate_arg, argi)) {
312       enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
313     } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
314       svc_params->number_spatial_layers = arg_parse_uint(&arg);
315     } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
316       svc_params->number_temporal_layers = arg_parse_uint(&arg);
317     } else if (arg_match(&arg, &speed_arg, argi)) {
318       app_input->speed = arg_parse_uint(&arg);
319       if (app_input->speed > 11) {
320         aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
321       }
322     } else if (arg_match(&arg, &aqmode_arg, argi)) {
323       app_input->aq_mode = arg_parse_uint(&arg);
324     } else if (arg_match(&arg, &threads_arg, argi)) {
325       enc_cfg->g_threads = arg_parse_uint(&arg);
326     } else if (arg_match(&arg, &layering_mode_arg, argi)) {
327       app_input->layering_mode = arg_parse_int(&arg);
328     } else if (arg_match(&arg, &kf_dist_arg, argi)) {
329       enc_cfg->kf_min_dist = arg_parse_uint(&arg);
330       enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
331     } else if (arg_match(&arg, &scale_factors_arg, argi)) {
332       aom_codec_err_t res = parse_layer_options_from_string(
333           svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
334           svc_params->scaling_factor_den);
335       if (res != AOM_CODEC_OK) {
336         die("Failed to parse scale factors: %s\n",
337             aom_codec_err_to_string(res));
338       }
339     } else if (arg_match(&arg, &min_q_arg, argi)) {
340       enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
341     } else if (arg_match(&arg, &max_q_arg, argi)) {
342       enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
343 #if CONFIG_AV1_HIGHBITDEPTH
344     } else if (arg_match(&arg, &bitdepth_arg, argi)) {
345       enc_cfg->g_bit_depth =
346           static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
347       switch (enc_cfg->g_bit_depth) {
348         case AOM_BITS_8:
349           enc_cfg->g_input_bit_depth = 8;
350           enc_cfg->g_profile = 0;
351           break;
352         case AOM_BITS_10:
353           enc_cfg->g_input_bit_depth = 10;
354           enc_cfg->g_profile = 0;
355           break;
356         default:
357           die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
358       }
359 #endif  // CONFIG_VP9_HIGHBITDEPTH
360     } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
361       enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
362     } else if (arg_match(&arg, &error_resilient_arg, argi)) {
363       enc_cfg->g_error_resilient = arg_parse_uint(&arg);
364       if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
365         die("Invalid value for error resilient (0, 1): %d.",
366             enc_cfg->g_error_resilient);
367     } else if (arg_match(&arg, &output_obu_arg, argi)) {
368       app_input->output_obu = arg_parse_uint(&arg);
369       if (app_input->output_obu != 0 && app_input->output_obu != 1)
370         die("Invalid value for obu output flag (0, 1): %d.",
371             app_input->output_obu);
372     } else if (arg_match(&arg, &test_decode_arg, argi)) {
373       app_input->decode = arg_parse_uint(&arg);
374       if (app_input->decode != 0 && app_input->decode != 1)
375         die("Invalid value for test decode flag (0, 1): %d.",
376             app_input->decode);
377     } else if (arg_match(&arg, &tune_content_arg, argi)) {
378       app_input->tune_content = arg_parse_enum_or_int(&arg);
379       printf("tune content %d\n", app_input->tune_content);
380     } else if (arg_match(&arg, &psnr_arg, argi)) {
381       app_input->show_psnr = 1;
382     } else if (arg_match(&arg, &ext_rc_arg, argi)) {
383       app_input->use_external_rc = true;
384     } else {
385       ++argj;
386     }
387   }
388 
389   // Total bitrate needs to be parsed after the number of layers.
390   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
391     arg.argv_step = 1;
392     if (arg_match(&arg, &bitrates_arg, argi)) {
393       aom_codec_err_t res = parse_layer_options_from_string(
394           svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
395       if (res != AOM_CODEC_OK) {
396         die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
397       }
398     } else {
399       ++argj;
400     }
401   }
402 
403   // There will be a space in front of the string options
404   if (strlen(string_options) > 0)
405     strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
406 
407   // Check for unrecognized options
408   for (argi = argv; *argi; ++argi)
409     if (argi[0][0] == '-' && strlen(argi[0]) > 1)
410       die("Error: Unrecognized option %s\n", *argi);
411 
412   if (argv[0] == NULL) {
413     usage_exit();
414   }
415 
416   app_input->input_ctx.filename = argv[0];
417   free(argv);
418 
419   open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
420   if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
421     enc_cfg->g_w = app_input->input_ctx.width;
422     enc_cfg->g_h = app_input->input_ctx.height;
423   }
424 
425   if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
426       enc_cfg->g_h % 2)
427     die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
428 
429   printf(
430       "Codec %s\n"
431       "layers: %d\n"
432       "width %u, height: %u\n"
433       "num: %d, den: %d, bitrate: %u\n"
434       "gop size: %u\n",
435       aom_codec_iface_name(aom_codec_av1_cx()),
436       svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
437       enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
438       enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
439 }
440 
441 static int mode_to_num_temporal_layers[12] = {
442   1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
443 };
444 static int mode_to_num_spatial_layers[12] = {
445   1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
446 };
447 
448 // For rate control encoding stats.
449 struct RateControlMetrics {
450   // Number of input frames per layer.
451   int layer_input_frames[AOM_MAX_TS_LAYERS];
452   // Number of encoded non-key frames per layer.
453   int layer_enc_frames[AOM_MAX_TS_LAYERS];
454   // Framerate per layer layer (cumulative).
455   double layer_framerate[AOM_MAX_TS_LAYERS];
456   // Target average frame size per layer (per-frame-bandwidth per layer).
457   double layer_pfb[AOM_MAX_LAYERS];
458   // Actual average frame size per layer.
459   double layer_avg_frame_size[AOM_MAX_LAYERS];
460   // Average rate mismatch per layer (|target - actual| / target).
461   double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
462   // Actual encoding bitrate per layer (cumulative across temporal layers).
463   double layer_encoding_bitrate[AOM_MAX_LAYERS];
464   // Average of the short-time encoder actual bitrate.
465   // TODO(marpan): Should we add these short-time stats for each layer?
466   double avg_st_encoding_bitrate;
467   // Variance of the short-time encoder actual bitrate.
468   double variance_st_encoding_bitrate;
469   // Window (number of frames) for computing short-timee encoding bitrate.
470   int window_size;
471   // Number of window measurements.
472   int window_count;
473   int layer_target_bitrate[AOM_MAX_LAYERS];
474 };
475 
476 static const int REF_FRAMES = 8;
477 
478 static const int INTER_REFS_PER_FRAME = 7;
479 
480 // Reference frames used in this example encoder.
481 enum {
482   SVC_LAST_FRAME = 0,
483   SVC_LAST2_FRAME,
484   SVC_LAST3_FRAME,
485   SVC_GOLDEN_FRAME,
486   SVC_BWDREF_FRAME,
487   SVC_ALTREF2_FRAME,
488   SVC_ALTREF_FRAME
489 };
490 
read_frame(struct AvxInputContext * input_ctx,aom_image_t * img)491 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
492   FILE *f = input_ctx->file;
493   y4m_input *y4m = &input_ctx->y4m;
494   int shortread = 0;
495 
496   if (input_ctx->file_type == FILE_TYPE_Y4M) {
497     if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
498   } else {
499     shortread = read_yuv_frame(input_ctx, img);
500   }
501 
502   return !shortread;
503 }
504 
close_input_file(struct AvxInputContext * input)505 static void close_input_file(struct AvxInputContext *input) {
506   fclose(input->file);
507   if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
508 }
509 
510 // Note: these rate control metrics assume only 1 key frame in the
511 // sequence (i.e., first frame only). So for temporal pattern# 7
512 // (which has key frame for every frame on base layer), the metrics
513 // computation will be off/wrong.
514 // TODO(marpan): Update these metrics to account for multiple key frames
515 // in the stream.
set_rate_control_metrics(struct RateControlMetrics * rc,double framerate,int ss_number_layers,int ts_number_layers)516 static void set_rate_control_metrics(struct RateControlMetrics *rc,
517                                      double framerate, int ss_number_layers,
518                                      int ts_number_layers) {
519   int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
520   ts_rate_decimator[0] = 1;
521   if (ts_number_layers == 2) {
522     ts_rate_decimator[0] = 2;
523     ts_rate_decimator[1] = 1;
524   }
525   if (ts_number_layers == 3) {
526     ts_rate_decimator[0] = 4;
527     ts_rate_decimator[1] = 2;
528     ts_rate_decimator[2] = 1;
529   }
530   // Set the layer (cumulative) framerate and the target layer (non-cumulative)
531   // per-frame-bandwidth, for the rate control encoding stats below.
532   for (int sl = 0; sl < ss_number_layers; ++sl) {
533     int i = sl * ts_number_layers;
534     rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
535     rc->layer_pfb[i] =
536         1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
537     for (int tl = 0; tl < ts_number_layers; ++tl) {
538       i = sl * ts_number_layers + tl;
539       if (tl > 0) {
540         rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
541         rc->layer_pfb[i] =
542             1000.0 *
543             (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
544             (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
545       }
546       rc->layer_input_frames[tl] = 0;
547       rc->layer_enc_frames[tl] = 0;
548       rc->layer_encoding_bitrate[i] = 0.0;
549       rc->layer_avg_frame_size[i] = 0.0;
550       rc->layer_avg_rate_mismatch[i] = 0.0;
551     }
552   }
553   rc->window_count = 0;
554   rc->window_size = 15;
555   rc->avg_st_encoding_bitrate = 0.0;
556   rc->variance_st_encoding_bitrate = 0.0;
557 }
558 
printout_rate_control_summary(struct RateControlMetrics * rc,int frame_cnt,int ss_number_layers,int ts_number_layers)559 static void printout_rate_control_summary(struct RateControlMetrics *rc,
560                                           int frame_cnt, int ss_number_layers,
561                                           int ts_number_layers) {
562   int tot_num_frames = 0;
563   double perc_fluctuation = 0.0;
564   printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
565   printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
566   for (int sl = 0; sl < ss_number_layers; ++sl) {
567     tot_num_frames = 0;
568     for (int tl = 0; tl < ts_number_layers; ++tl) {
569       int i = sl * ts_number_layers + tl;
570       const int num_dropped =
571           tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
572                  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
573       tot_num_frames += rc->layer_input_frames[tl];
574       rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
575                                       rc->layer_encoding_bitrate[i] /
576                                       tot_num_frames;
577       rc->layer_avg_frame_size[i] =
578           rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
579       rc->layer_avg_rate_mismatch[i] =
580           100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
581       printf("For layer#: %d %d \n", sl, tl);
582       printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
583              rc->layer_encoding_bitrate[i]);
584       printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
585              rc->layer_avg_frame_size[i]);
586       printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
587       printf(
588           "Number of input frames, encoded (non-key) frames, "
589           "and perc dropped frames: %d %d %f\n",
590           rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
591           100.0 * num_dropped / rc->layer_input_frames[tl]);
592       printf("\n");
593     }
594   }
595   rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
596   rc->variance_st_encoding_bitrate =
597       rc->variance_st_encoding_bitrate / rc->window_count -
598       (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
599   perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
600                      rc->avg_st_encoding_bitrate;
601   printf("Short-time stats, for window of %d frames:\n", rc->window_size);
602   printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
603          rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
604          perc_fluctuation);
605   if (frame_cnt - 1 != tot_num_frames)
606     die("Error: Number of input frames not equal to output!\n");
607 }
608 
609 // Layer pattern configuration.
set_layer_pattern(int layering_mode,int superframe_cnt,aom_svc_layer_id_t * layer_id,aom_svc_ref_frame_config_t * ref_frame_config,aom_svc_ref_frame_comp_pred_t * ref_frame_comp_pred,int * use_svc_control,int spatial_layer_id,int is_key_frame,int ksvc_mode,int speed)610 static void set_layer_pattern(
611     int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
612     aom_svc_ref_frame_config_t *ref_frame_config,
613     aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
614     int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
615   // Setting this flag to 1 enables simplex example of
616   // RPS (Reference Picture Selection) for 1 layer.
617   int use_rps_example = 0;
618   int i;
619   int enable_longterm_temporal_ref = 1;
620   int shift = (layering_mode == 8) ? 2 : 0;
621   int simulcast_mode = (layering_mode == 11);
622   *use_svc_control = 1;
623   layer_id->spatial_layer_id = spatial_layer_id;
624   int lag_index = 0;
625   int base_count = superframe_cnt >> 2;
626   ref_frame_comp_pred->use_comp_pred[0] = 0;  // GOLDEN_LAST
627   ref_frame_comp_pred->use_comp_pred[1] = 0;  // LAST2_LAST
628   ref_frame_comp_pred->use_comp_pred[2] = 0;  // ALTREF_LAST
629   // Set the reference map buffer idx for the 7 references:
630   // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
631   // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
632   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
633   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
634   for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
635 
636   if (ksvc_mode) {
637     // Same pattern as case 9, but the reference strucutre will be constrained
638     // below.
639     layering_mode = 9;
640   }
641   switch (layering_mode) {
642     case 0:
643       if (use_rps_example == 0) {
644         // 1-layer: update LAST on every frame, reference LAST.
645         layer_id->temporal_layer_id = 0;
646         layer_id->spatial_layer_id = 0;
647         ref_frame_config->refresh[0] = 1;
648         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
649       } else {
650         // Pattern of 2 references (ALTREF and GOLDEN) trailing
651         // LAST by 4 and 8 frames, with some switching logic to
652         // sometimes only predict from the longer-term reference
653         //(golden here). This is simple example to test RPS
654         // (reference picture selection).
655         int last_idx = 0;
656         int last_idx_refresh = 0;
657         int gld_idx = 0;
658         int alt_ref_idx = 0;
659         int lag_alt = 4;
660         int lag_gld = 8;
661         layer_id->temporal_layer_id = 0;
662         layer_id->spatial_layer_id = 0;
663         int sh = 8;  // slots 0 - 7.
664         // Moving index slot for last: 0 - (sh - 1)
665         if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
666         // Moving index for refresh of last: one ahead for next frame.
667         last_idx_refresh = superframe_cnt % sh;
668         // Moving index for gld_ref, lag behind current by lag_gld
669         if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
670         // Moving index for alt_ref, lag behind LAST by lag_alt frames.
671         if (superframe_cnt > lag_alt)
672           alt_ref_idx = (superframe_cnt - lag_alt) % sh;
673         // Set the ref_idx.
674         // Default all references to slot for last.
675         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
676           ref_frame_config->ref_idx[i] = last_idx;
677         // Set the ref_idx for the relevant references.
678         ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
679         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
680         ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
681         ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
682         // Refresh this slot, which will become LAST on next frame.
683         ref_frame_config->refresh[last_idx_refresh] = 1;
684         // Reference LAST, ALTREF, and GOLDEN
685         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
686         ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
687         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
688         // Switch to only GOLDEN every 300 frames.
689         if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
690           ref_frame_config->reference[SVC_LAST_FRAME] = 0;
691           ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
692           ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
693           // Test if the long-term is LAST instead, this is just a renaming
694           // but its tests if encoder behaves the same, whether its
695           // LAST or GOLDEN.
696           if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
697             ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
698             ref_frame_config->reference[SVC_LAST_FRAME] = 1;
699             ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
700             ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
701           }
702         }
703       }
704       break;
705     case 1:
706       // 2-temporal layer.
707       //    1    3    5
708       //  0    2    4
709       // Keep golden fixed at slot 3.
710       base_count = superframe_cnt >> 1;
711       ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
712       // Cyclically refresh slots 5, 6, 7, for lag alt ref.
713       lag_index = 5;
714       if (base_count > 0) {
715         lag_index = 5 + (base_count % 3);
716         if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
717       }
718       // Set the altref slot to lag_index.
719       ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
720       if (superframe_cnt % 2 == 0) {
721         layer_id->temporal_layer_id = 0;
722         // Update LAST on layer 0, reference LAST.
723         ref_frame_config->refresh[0] = 1;
724         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
725         // Refresh lag_index slot, needed for lagging golen.
726         ref_frame_config->refresh[lag_index] = 1;
727         // Refresh GOLDEN every x base layer frames.
728         if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
729       } else {
730         layer_id->temporal_layer_id = 1;
731         // No updates on layer 1, reference LAST (TL0).
732         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
733       }
734       // Always reference golden and altref on TL0.
735       if (layer_id->temporal_layer_id == 0) {
736         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
737         ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
738       }
739       break;
740     case 2:
741       // 3-temporal layer:
742       //   1    3   5    7
743       //     2        6
744       // 0        4        8
745       if (superframe_cnt % 4 == 0) {
746         // Base layer.
747         layer_id->temporal_layer_id = 0;
748         // Update LAST on layer 0, reference LAST.
749         ref_frame_config->refresh[0] = 1;
750         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
751       } else if ((superframe_cnt - 1) % 4 == 0) {
752         layer_id->temporal_layer_id = 2;
753         // First top layer: no updates, only reference LAST (TL0).
754         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
755       } else if ((superframe_cnt - 2) % 4 == 0) {
756         layer_id->temporal_layer_id = 1;
757         // Middle layer (TL1): update LAST2, only reference LAST (TL0).
758         ref_frame_config->refresh[1] = 1;
759         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
760       } else if ((superframe_cnt - 3) % 4 == 0) {
761         layer_id->temporal_layer_id = 2;
762         // Second top layer: no updates, only reference LAST.
763         // Set buffer idx for LAST to slot 1, since that was the slot
764         // updated in previous frame. So LAST is TL1 frame.
765         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
766         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
767         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
768       }
769       break;
770     case 3:
771       // 3 TL, same as above, except allow for predicting
772       // off 2 more references (GOLDEN and ALTREF), with
773       // GOLDEN updated periodically, and ALTREF lagging from
774       // LAST from ~4 frames. Both GOLDEN and ALTREF
775       // can only be updated on base temporal layer.
776 
777       // Keep golden fixed at slot 3.
778       ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
779       // Cyclically refresh slots 5, 6, 7, for lag altref.
780       lag_index = 5;
781       if (base_count > 0) {
782         lag_index = 5 + (base_count % 3);
783         if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
784       }
785       // Set the altref slot to lag_index.
786       ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
787       if (superframe_cnt % 4 == 0) {
788         // Base layer.
789         layer_id->temporal_layer_id = 0;
790         // Update LAST on layer 0, reference LAST.
791         ref_frame_config->refresh[0] = 1;
792         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
793         // Refresh GOLDEN every x ~10 base layer frames.
794         if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
795         // Refresh lag_index slot, needed for lagging altref.
796         ref_frame_config->refresh[lag_index] = 1;
797       } else if ((superframe_cnt - 1) % 4 == 0) {
798         layer_id->temporal_layer_id = 2;
799         // First top layer: no updates, only reference LAST (TL0).
800         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
801       } else if ((superframe_cnt - 2) % 4 == 0) {
802         layer_id->temporal_layer_id = 1;
803         // Middle layer (TL1): update LAST2, only reference LAST (TL0).
804         ref_frame_config->refresh[1] = 1;
805         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
806       } else if ((superframe_cnt - 3) % 4 == 0) {
807         layer_id->temporal_layer_id = 2;
808         // Second top layer: no updates, only reference LAST.
809         // Set buffer idx for LAST to slot 1, since that was the slot
810         // updated in previous frame. So LAST is TL1 frame.
811         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
812         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
813         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
814       }
815       // Every frame can reference GOLDEN AND ALTREF.
816       ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
817       ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
818       // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
819       if (speed >= 7) {
820         ref_frame_comp_pred->use_comp_pred[2] = 1;
821         ref_frame_comp_pred->use_comp_pred[0] = 1;
822       }
823       break;
824     case 4:
825       // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
826       // only reference GF (not LAST). Other frames only reference LAST.
827       //   1    3   5    7
828       //     2        6
829       // 0        4        8
830       if (superframe_cnt % 4 == 0) {
831         // Base layer.
832         layer_id->temporal_layer_id = 0;
833         // Update LAST on layer 0, only reference LAST.
834         ref_frame_config->refresh[0] = 1;
835         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
836       } else if ((superframe_cnt - 1) % 4 == 0) {
837         layer_id->temporal_layer_id = 2;
838         // First top layer: no updates, only reference LAST (TL0).
839         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
840       } else if ((superframe_cnt - 2) % 4 == 0) {
841         layer_id->temporal_layer_id = 1;
842         // Middle layer (TL1): update GF, only reference LAST (TL0).
843         ref_frame_config->refresh[3] = 1;
844         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845       } else if ((superframe_cnt - 3) % 4 == 0) {
846         layer_id->temporal_layer_id = 2;
847         // Second top layer: no updates, only reference GF.
848         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
849       }
850       break;
851     case 5:
852       // 2 spatial layers, 1 temporal.
853       layer_id->temporal_layer_id = 0;
854       if (layer_id->spatial_layer_id == 0) {
855         // Reference LAST, update LAST.
856         ref_frame_config->refresh[0] = 1;
857         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
858       } else if (layer_id->spatial_layer_id == 1) {
859         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
860         // and GOLDEN to slot 0. Update slot 1 (LAST).
861         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
862         ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
863         ref_frame_config->refresh[1] = 1;
864         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
865         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
866       }
867       break;
868     case 6:
869       // 3 spatial layers, 1 temporal.
870       // Note for this case, we set the buffer idx for all references to be
871       // either LAST or GOLDEN, which are always valid references, since decoder
872       // will check if any of the 7 references is valid scale in
873       // valid_ref_frame_size().
874       layer_id->temporal_layer_id = 0;
875       if (layer_id->spatial_layer_id == 0) {
876         // Reference LAST, update LAST. Set all buffer_idx to 0.
877         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
878           ref_frame_config->ref_idx[i] = 0;
879         ref_frame_config->refresh[0] = 1;
880         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
881       } else if (layer_id->spatial_layer_id == 1) {
882         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
883         // and GOLDEN (and all other refs) to slot 0.
884         // Update slot 1 (LAST).
885         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
886           ref_frame_config->ref_idx[i] = 0;
887         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
888         ref_frame_config->refresh[1] = 1;
889         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
890         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
891       } else if (layer_id->spatial_layer_id == 2) {
892         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
893         // and GOLDEN (and all other refs) to slot 1.
894         // Update slot 2 (LAST).
895         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
896           ref_frame_config->ref_idx[i] = 1;
897         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
898         ref_frame_config->refresh[2] = 1;
899         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
900         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
901         // For 3 spatial layer case: allow for top spatial layer to use
902         // additional temporal reference. Update every 10 frames.
903         if (enable_longterm_temporal_ref) {
904           ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
905           ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
906           if (base_count % 10 == 0)
907             ref_frame_config->refresh[REF_FRAMES - 1] = 1;
908         }
909       }
910       break;
911     case 7:
912       // 2 spatial and 3 temporal layer.
913       ref_frame_config->reference[SVC_LAST_FRAME] = 1;
914       if (superframe_cnt % 4 == 0) {
915         // Base temporal layer
916         layer_id->temporal_layer_id = 0;
917         if (layer_id->spatial_layer_id == 0) {
918           // Reference LAST, update LAST
919           // Set all buffer_idx to 0
920           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
921             ref_frame_config->ref_idx[i] = 0;
922           ref_frame_config->refresh[0] = 1;
923         } else if (layer_id->spatial_layer_id == 1) {
924           // Reference LAST and GOLDEN.
925           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
926             ref_frame_config->ref_idx[i] = 0;
927           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
928           ref_frame_config->refresh[1] = 1;
929         }
930       } else if ((superframe_cnt - 1) % 4 == 0) {
931         // First top temporal enhancement layer.
932         layer_id->temporal_layer_id = 2;
933         if (layer_id->spatial_layer_id == 0) {
934           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
935             ref_frame_config->ref_idx[i] = 0;
936           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
937           ref_frame_config->refresh[3] = 1;
938         } else if (layer_id->spatial_layer_id == 1) {
939           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
940           // GOLDEN (and all other refs) to slot 3.
941           // No update.
942           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
943             ref_frame_config->ref_idx[i] = 3;
944           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
945         }
946       } else if ((superframe_cnt - 2) % 4 == 0) {
947         // Middle temporal enhancement layer.
948         layer_id->temporal_layer_id = 1;
949         if (layer_id->spatial_layer_id == 0) {
950           // Reference LAST.
951           // Set all buffer_idx to 0.
952           // Set GOLDEN to slot 5 and update slot 5.
953           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
954             ref_frame_config->ref_idx[i] = 0;
955           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
956           ref_frame_config->refresh[5 - shift] = 1;
957         } else if (layer_id->spatial_layer_id == 1) {
958           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
959           // GOLDEN (and all other refs) to slot 5.
960           // Set LAST3 to slot 6 and update slot 6.
961           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
962             ref_frame_config->ref_idx[i] = 5 - shift;
963           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
964           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
965           ref_frame_config->refresh[6 - shift] = 1;
966         }
967       } else if ((superframe_cnt - 3) % 4 == 0) {
968         // Second top temporal enhancement layer.
969         layer_id->temporal_layer_id = 2;
970         if (layer_id->spatial_layer_id == 0) {
971           // Set LAST to slot 5 and reference LAST.
972           // Set GOLDEN to slot 3 and update slot 3.
973           // Set all other buffer_idx to 0.
974           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
975             ref_frame_config->ref_idx[i] = 0;
976           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
977           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
978           ref_frame_config->refresh[3] = 1;
979         } else if (layer_id->spatial_layer_id == 1) {
980           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
981           // GOLDEN to slot 3. No update.
982           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
983             ref_frame_config->ref_idx[i] = 0;
984           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
985           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
986         }
987       }
988       break;
989     case 8:
990       // 3 spatial and 3 temporal layer.
991       // Same as case 9 but overalap in the buffer slot updates.
992       // (shift = 2). The slots 3 and 4 updated by first TL2 are
993       // reused for update in TL1 superframe.
994       // Note for this case, frame order hint must be disabled for
995       // lower resolutios (operating points > 0) to be decoedable.
996     case 9:
997       // 3 spatial and 3 temporal layer.
998       // No overlap in buffer updates between TL2 and TL1.
999       // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1000       // Set the references via the svc_ref_frame_config control.
1001       // Always reference LAST.
1002       ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1003       if (superframe_cnt % 4 == 0) {
1004         // Base temporal layer.
1005         layer_id->temporal_layer_id = 0;
1006         if (layer_id->spatial_layer_id == 0) {
1007           // Reference LAST, update LAST.
1008           // Set all buffer_idx to 0.
1009           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1010             ref_frame_config->ref_idx[i] = 0;
1011           ref_frame_config->refresh[0] = 1;
1012         } else if (layer_id->spatial_layer_id == 1) {
1013           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1014           // GOLDEN (and all other refs) to slot 0.
1015           // Update slot 1 (LAST).
1016           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1017             ref_frame_config->ref_idx[i] = 0;
1018           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1019           ref_frame_config->refresh[1] = 1;
1020         } else if (layer_id->spatial_layer_id == 2) {
1021           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1022           // GOLDEN (and all other refs) to slot 1.
1023           // Update slot 2 (LAST).
1024           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1025             ref_frame_config->ref_idx[i] = 1;
1026           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1027           ref_frame_config->refresh[2] = 1;
1028         }
1029       } else if ((superframe_cnt - 1) % 4 == 0) {
1030         // First top temporal enhancement layer.
1031         layer_id->temporal_layer_id = 2;
1032         if (layer_id->spatial_layer_id == 0) {
1033           // Reference LAST (slot 0).
1034           // Set GOLDEN to slot 3 and update slot 3.
1035           // Set all other buffer_idx to slot 0.
1036           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1037             ref_frame_config->ref_idx[i] = 0;
1038           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1039           ref_frame_config->refresh[3] = 1;
1040         } else if (layer_id->spatial_layer_id == 1) {
1041           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1042           // GOLDEN (and all other refs) to slot 3.
1043           // Set LAST2 to slot 4 and Update slot 4.
1044           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1045             ref_frame_config->ref_idx[i] = 3;
1046           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1047           ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1048           ref_frame_config->refresh[4] = 1;
1049         } else if (layer_id->spatial_layer_id == 2) {
1050           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1051           // GOLDEN (and all other refs) to slot 4.
1052           // No update.
1053           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1054             ref_frame_config->ref_idx[i] = 4;
1055           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1056         }
1057       } else if ((superframe_cnt - 2) % 4 == 0) {
1058         // Middle temporal enhancement layer.
1059         layer_id->temporal_layer_id = 1;
1060         if (layer_id->spatial_layer_id == 0) {
1061           // Reference LAST.
1062           // Set all buffer_idx to 0.
1063           // Set GOLDEN to slot 5 and update slot 5.
1064           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1065             ref_frame_config->ref_idx[i] = 0;
1066           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1067           ref_frame_config->refresh[5 - shift] = 1;
1068         } else if (layer_id->spatial_layer_id == 1) {
1069           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1070           // GOLDEN (and all other refs) to slot 5.
1071           // Set LAST3 to slot 6 and update slot 6.
1072           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1073             ref_frame_config->ref_idx[i] = 5 - shift;
1074           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1075           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1076           ref_frame_config->refresh[6 - shift] = 1;
1077         } else if (layer_id->spatial_layer_id == 2) {
1078           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1079           // GOLDEN (and all other refs) to slot 6.
1080           // Set LAST3 to slot 7 and update slot 7.
1081           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1082             ref_frame_config->ref_idx[i] = 6 - shift;
1083           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1084           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1085           ref_frame_config->refresh[7 - shift] = 1;
1086         }
1087       } else if ((superframe_cnt - 3) % 4 == 0) {
1088         // Second top temporal enhancement layer.
1089         layer_id->temporal_layer_id = 2;
1090         if (layer_id->spatial_layer_id == 0) {
1091           // Set LAST to slot 5 and reference LAST.
1092           // Set GOLDEN to slot 3 and update slot 3.
1093           // Set all other buffer_idx to 0.
1094           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1095             ref_frame_config->ref_idx[i] = 0;
1096           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1097           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1098           ref_frame_config->refresh[3] = 1;
1099         } else if (layer_id->spatial_layer_id == 1) {
1100           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1101           // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1102           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1103             ref_frame_config->ref_idx[i] = 0;
1104           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1105           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1106           ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1107           ref_frame_config->refresh[4] = 1;
1108         } else if (layer_id->spatial_layer_id == 2) {
1109           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1110           // GOLDEN to slot 4. No update.
1111           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1112             ref_frame_config->ref_idx[i] = 0;
1113           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1114           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1115         }
1116       }
1117       break;
1118     case 11:
1119       // Simulcast mode for 3 spatial and 3 temporal layers.
1120       // No inter-layer predicton, only prediction is temporal and single
1121       // reference (LAST).
1122       // No overlap in buffer slots between spatial layers. So for example,
1123       // SL0 only uses slots 0 and 1.
1124       // SL1 only uses slots 2 and 3.
1125       // SL2 only uses slots 4 and 5.
1126       // All 7 references for each inter-frame must only access buffer slots
1127       // for that spatial layer.
1128       // On key (super)frames: SL1 and SL2 must have no references set
1129       // and must refresh all the slots for that layer only (so 2 and 3
1130       // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1131       // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1132       // internally as Intra-only frames that allow that stream to be decoded.
1133       // These conditions will allow for each spatial stream to be
1134       // independently decodeable.
1135 
1136       // Initialize all references to 0 (don't use reference).
1137       for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1138         ref_frame_config->reference[i] = 0;
1139       // Initialize as no refresh/update for all slots.
1140       for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1141       for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1142         ref_frame_config->ref_idx[i] = 0;
1143 
1144       if (is_key_frame) {
1145         if (layer_id->spatial_layer_id == 0) {
1146           // Assign LAST/GOLDEN to slot 0/1.
1147           // Refesh slots 0 and 1 for SL0.
1148           // SL0: this will get set to KEY frame internally.
1149           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1150           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1151           ref_frame_config->refresh[0] = 1;
1152           ref_frame_config->refresh[1] = 1;
1153         } else if (layer_id->spatial_layer_id == 1) {
1154           // Assign LAST/GOLDEN to slot 2/3.
1155           // Refesh slots 2 and 3 for SL1.
1156           // This will get set to Intra-only frame internally.
1157           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1158           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1159           ref_frame_config->refresh[2] = 1;
1160           ref_frame_config->refresh[3] = 1;
1161         } else if (layer_id->spatial_layer_id == 2) {
1162           // Assign LAST/GOLDEN to slot 4/5.
1163           // Refresh slots 4 and 5 for SL2.
1164           // This will get set to Intra-only frame internally.
1165           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1166           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1167           ref_frame_config->refresh[4] = 1;
1168           ref_frame_config->refresh[5] = 1;
1169         }
1170       } else if (superframe_cnt % 4 == 0) {
1171         // Base temporal layer: TL0
1172         layer_id->temporal_layer_id = 0;
1173         if (layer_id->spatial_layer_id == 0) {  // SL0
1174           // Reference LAST. Assign all references to either slot
1175           // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1176           // Update slot 0 (LAST).
1177           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1178           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1179             ref_frame_config->ref_idx[i] = 1;
1180           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1181           ref_frame_config->refresh[0] = 1;
1182         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1183           // Reference LAST. Assign all references to either slot
1184           // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1185           // Update slot 2 (LAST).
1186           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1187           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1188             ref_frame_config->ref_idx[i] = 3;
1189           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1190           ref_frame_config->refresh[2] = 1;
1191         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1192           // Reference LAST. Assign all references to either slot
1193           // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1194           // Update slot 4 (LAST).
1195           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1196           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1197             ref_frame_config->ref_idx[i] = 5;
1198           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1199           ref_frame_config->refresh[4] = 1;
1200         }
1201       } else if ((superframe_cnt - 1) % 4 == 0) {
1202         // First top temporal enhancement layer: TL2
1203         layer_id->temporal_layer_id = 2;
1204         if (layer_id->spatial_layer_id == 0) {  // SL0
1205           // Reference LAST (slot 0). Assign other references to slot 1.
1206           // No update/refresh on any slots.
1207           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1208           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1209             ref_frame_config->ref_idx[i] = 1;
1210           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1211         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1212           // Reference LAST (slot 2). Assign other references to slot 3.
1213           // No update/refresh on any slots.
1214           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1215           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1216             ref_frame_config->ref_idx[i] = 3;
1217           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1218         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1219           // Reference LAST (slot 4). Assign other references to slot 4.
1220           // No update/refresh on any slots.
1221           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1222           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1223             ref_frame_config->ref_idx[i] = 5;
1224           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1225         }
1226       } else if ((superframe_cnt - 2) % 4 == 0) {
1227         // Middle temporal enhancement layer: TL1
1228         layer_id->temporal_layer_id = 1;
1229         if (layer_id->spatial_layer_id == 0) {  // SL0
1230           // Reference LAST (slot 0).
1231           // Set GOLDEN to slot 1 and update slot 1.
1232           // This will be used as reference for next TL2.
1233           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1234           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1235             ref_frame_config->ref_idx[i] = 1;
1236           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1237           ref_frame_config->refresh[1] = 1;
1238         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1239           // Reference LAST (slot 2).
1240           // Set GOLDEN to slot 3 and update slot 3.
1241           // This will be used as reference for next TL2.
1242           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1243           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1244             ref_frame_config->ref_idx[i] = 3;
1245           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1246           ref_frame_config->refresh[3] = 1;
1247         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1248           // Reference LAST (slot 4).
1249           // Set GOLDEN to slot 5 and update slot 5.
1250           // This will be used as reference for next TL2.
1251           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1252           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1253             ref_frame_config->ref_idx[i] = 5;
1254           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1255           ref_frame_config->refresh[5] = 1;
1256         }
1257       } else if ((superframe_cnt - 3) % 4 == 0) {
1258         // Second top temporal enhancement layer: TL2
1259         layer_id->temporal_layer_id = 2;
1260         if (layer_id->spatial_layer_id == 0) {  // SL0
1261           // Reference LAST (slot 1). Assign other references to slot 0.
1262           // No update/refresh on any slots.
1263           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1264           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1265             ref_frame_config->ref_idx[i] = 0;
1266           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1267         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1268           // Reference LAST (slot 3). Assign other references to slot 2.
1269           // No update/refresh on any slots.
1270           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1271           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1272             ref_frame_config->ref_idx[i] = 2;
1273           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1274         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1275           // Reference LAST (slot 5). Assign other references to slot 4.
1276           // No update/refresh on any slots.
1277           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1278           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1279             ref_frame_config->ref_idx[i] = 4;
1280           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1281         }
1282       }
1283       if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1284         // Always reference GOLDEN (inter-layer prediction).
1285         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1286         if (ksvc_mode) {
1287           // KSVC: only keep the inter-layer reference (GOLDEN) for
1288           // superframes whose base is key.
1289           if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1290         }
1291         if (is_key_frame && layer_id->spatial_layer_id > 1) {
1292           // On superframes whose base is key: remove LAST to avoid prediction
1293           // off layer two levels below.
1294           ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1295         }
1296       }
1297       // For 3 spatial layer case 8 (where there is free buffer slot):
1298       // allow for top spatial layer to use additional temporal reference.
1299       // Additional reference is only updated on base temporal layer, every
1300       // 10 TL0 frames here.
1301       if (!simulcast_mode && enable_longterm_temporal_ref &&
1302           layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1303         ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1304         if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1305         if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1306           ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1307       }
1308       break;
1309     default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1310   }
1311 }
1312 
1313 #if CONFIG_AV1_DECODER
1314 // Returns whether there is a mismatch between the encoder's new frame and the
1315 // decoder's new frame.
test_decode(aom_codec_ctx_t * encoder,aom_codec_ctx_t * decoder,const int frames_out)1316 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1317                        const int frames_out) {
1318   aom_image_t enc_img, dec_img;
1319   int mismatch = 0;
1320 
1321   /* Get the internal new frame */
1322   AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
1323   AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
1324 
1325 #if CONFIG_AV1_HIGHBITDEPTH
1326   if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1327       (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1328     if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1329       aom_image_t enc_hbd_img;
1330       aom_img_alloc(
1331           &enc_hbd_img,
1332           static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1333           enc_img.d_w, enc_img.d_h, 16);
1334       aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1335       enc_img = enc_hbd_img;
1336     }
1337     if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1338       aom_image_t dec_hbd_img;
1339       aom_img_alloc(
1340           &dec_hbd_img,
1341           static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1342           dec_img.d_w, dec_img.d_h, 16);
1343       aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1344       dec_img = dec_hbd_img;
1345     }
1346   }
1347 #endif
1348 
1349   if (!aom_compare_img(&enc_img, &dec_img)) {
1350     int y[4], u[4], v[4];
1351 #if CONFIG_AV1_HIGHBITDEPTH
1352     if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1353       aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1354     } else {
1355       aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1356     }
1357 #else
1358     aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1359 #endif
1360     fprintf(stderr,
1361             "Encode/decode mismatch on frame %d at"
1362             " Y[%d, %d] {%d/%d},"
1363             " U[%d, %d] {%d/%d},"
1364             " V[%d, %d] {%d/%d}\n",
1365             frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1366             v[1], v[2], v[3]);
1367     mismatch = 1;
1368   }
1369 
1370   aom_img_free(&enc_img);
1371   aom_img_free(&dec_img);
1372   return mismatch;
1373 }
1374 #endif  // CONFIG_AV1_DECODER
1375 
1376 struct psnr_stats {
1377   // The second element of these arrays is reserved for high bitdepth.
1378   uint64_t psnr_sse_total[2];
1379   uint64_t psnr_samples_total[2];
1380   double psnr_totals[2][4];
1381   int psnr_count[2];
1382 };
1383 
show_psnr(struct psnr_stats * psnr_stream,double peak)1384 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1385   double ovpsnr;
1386 
1387   if (!psnr_stream->psnr_count[0]) return;
1388 
1389   fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1390   ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1391                        (double)psnr_stream->psnr_sse_total[0]);
1392   fprintf(stderr, " %.3f", ovpsnr);
1393 
1394   for (int i = 0; i < 4; i++) {
1395     fprintf(stderr, " %.3f",
1396             psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1397   }
1398   fprintf(stderr, "\n");
1399 }
1400 
create_rtc_rc_config(const aom_codec_enc_cfg_t & cfg,const AppInput & app_input)1401 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1402     const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1403   aom::AV1RateControlRtcConfig rc_cfg;
1404   rc_cfg.width = cfg.g_w;
1405   rc_cfg.height = cfg.g_h;
1406   rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1407   rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1408   rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1409   rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1410   rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1411   rc_cfg.buf_sz = cfg.rc_buf_sz;
1412   rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1413   rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1414   // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1415   rc_cfg.max_intra_bitrate_pct = 300;
1416   rc_cfg.framerate = cfg.g_timebase.den;
1417   // TODO(jianj): Add suppor for SVC.
1418   rc_cfg.ss_number_layers = 1;
1419   rc_cfg.ts_number_layers = 1;
1420   rc_cfg.scaling_factor_num[0] = 1;
1421   rc_cfg.scaling_factor_den[0] = 1;
1422   rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1423   rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1424   rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1425   rc_cfg.aq_mode = app_input.aq_mode;
1426 
1427   return rc_cfg;
1428 }
1429 
qindex_to_quantizer(int qindex)1430 static int qindex_to_quantizer(int qindex) {
1431   // Table that converts 0-63 range Q values passed in outside to the 0-255
1432   // range Qindex used internally.
1433   static const int quantizer_to_qindex[] = {
1434     0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,
1435     52,  56,  60,  64,  68,  72,  76,  80,  84,  88,  92,  96,  100,
1436     104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1437     156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1438     208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1439   };
1440   for (int quantizer = 0; quantizer < 64; ++quantizer)
1441     if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1442 
1443   return 63;
1444 }
1445 
set_active_map(const aom_codec_enc_cfg_t * cfg,aom_codec_ctx_t * codec,int frame_cnt)1446 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1447                            aom_codec_ctx_t *codec, int frame_cnt) {
1448   aom_active_map_t map = { 0, 0, 0 };
1449 
1450   map.rows = (cfg->g_h + 15) / 16;
1451   map.cols = (cfg->g_w + 15) / 16;
1452 
1453   map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1454   if (!map.active_map) die("Failed to allocate active map");
1455 
1456   // Example map for testing.
1457   for (unsigned int i = 0; i < map.rows; ++i) {
1458     for (unsigned int j = 0; j < map.cols; ++j) {
1459       int index = map.cols * i + j;
1460       map.active_map[index] = 1;
1461       if (frame_cnt < 300) {
1462         if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1463       } else if (frame_cnt >= 300) {
1464         if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1465       }
1466     }
1467   }
1468 
1469   if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1470     die_codec(codec, "Failed to set active map");
1471 
1472   free(map.active_map);
1473 }
1474 
main(int argc,const char ** argv)1475 int main(int argc, const char **argv) {
1476   AppInput app_input;
1477   AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1478   FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1479   AvxVideoWriter *total_layer_file = NULL;
1480   FILE *total_layer_obu_file = NULL;
1481   aom_codec_enc_cfg_t cfg;
1482   int frame_cnt = 0;
1483   aom_image_t raw;
1484   int frame_avail;
1485   int got_data = 0;
1486   int flags = 0;
1487   int i;
1488   int pts = 0;             // PTS starts at 0.
1489   int frame_duration = 1;  // 1 timebase tick per frame.
1490   aom_svc_layer_id_t layer_id;
1491   aom_svc_params_t svc_params;
1492   aom_svc_ref_frame_config_t ref_frame_config;
1493   aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1494 
1495 #if CONFIG_INTERNAL_STATS
1496   FILE *stats_file = fopen("opsnr.stt", "a");
1497   if (stats_file == NULL) {
1498     die("Cannot open opsnr.stt\n");
1499   }
1500 #endif
1501 #if CONFIG_AV1_DECODER
1502   aom_codec_ctx_t decoder;
1503 #endif
1504 
1505   struct RateControlMetrics rc;
1506   int64_t cx_time = 0;
1507   int64_t cx_time_layer[AOM_MAX_LAYERS];  // max number of layers.
1508   int frame_cnt_layer[AOM_MAX_LAYERS];
1509   double sum_bitrate = 0.0;
1510   double sum_bitrate2 = 0.0;
1511   double framerate = 30.0;
1512   int use_svc_control = 1;
1513   int set_err_resil_frame = 0;
1514   int test_changing_bitrate = 0;
1515   zero(rc.layer_target_bitrate);
1516   memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1517   memset(&app_input, 0, sizeof(AppInput));
1518   memset(&svc_params, 0, sizeof(svc_params));
1519 
1520   // Flag to test dynamic scaling of source frames for single
1521   // spatial stream, using the scaling_mode control.
1522   const int test_dynamic_scaling_single_layer = 0;
1523 
1524   // Flag to test setting speed per layer.
1525   const int test_speed_per_layer = 0;
1526 
1527   // Flag for testing active maps.
1528   const int test_active_maps = 0;
1529 
1530   /* Setup default input stream settings */
1531   app_input.input_ctx.framerate.numerator = 30;
1532   app_input.input_ctx.framerate.denominator = 1;
1533   app_input.input_ctx.only_i420 = 0;
1534   app_input.input_ctx.bit_depth = AOM_BITS_8;
1535   app_input.speed = 7;
1536   exec_name = argv[0];
1537 
1538   // start with default encoder configuration
1539   aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
1540                                                      AOM_USAGE_REALTIME);
1541   if (res != AOM_CODEC_OK) {
1542     die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1543   }
1544 
1545   // Real time parameters.
1546   cfg.g_usage = AOM_USAGE_REALTIME;
1547 
1548   cfg.rc_end_usage = AOM_CBR;
1549   cfg.rc_min_quantizer = 2;
1550   cfg.rc_max_quantizer = 52;
1551   cfg.rc_undershoot_pct = 50;
1552   cfg.rc_overshoot_pct = 50;
1553   cfg.rc_buf_initial_sz = 600;
1554   cfg.rc_buf_optimal_sz = 600;
1555   cfg.rc_buf_sz = 1000;
1556   cfg.rc_resize_mode = 0;  // Set to RESIZE_DYNAMIC for dynamic resize.
1557   cfg.g_lag_in_frames = 0;
1558   cfg.kf_mode = AOM_KF_AUTO;
1559 
1560   parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1561 
1562   int ts_number_layers = svc_params.number_temporal_layers;
1563   int ss_number_layers = svc_params.number_spatial_layers;
1564 
1565   unsigned int width = cfg.g_w;
1566   unsigned int height = cfg.g_h;
1567 
1568   if (app_input.layering_mode >= 0) {
1569     if (ts_number_layers !=
1570             mode_to_num_temporal_layers[app_input.layering_mode] ||
1571         ss_number_layers !=
1572             mode_to_num_spatial_layers[app_input.layering_mode]) {
1573       die("Number of layers doesn't match layering mode.");
1574     }
1575   }
1576 
1577   // Y4M reader has its own allocation.
1578   if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1579     if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1580       die("Failed to allocate image (%dx%d)", width, height);
1581     }
1582   }
1583 
1584   aom_codec_iface_t *encoder = aom_codec_av1_cx();
1585 
1586   memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1587          sizeof(svc_params.layer_target_bitrate));
1588 
1589   unsigned int total_rate = 0;
1590   for (i = 0; i < ss_number_layers; i++) {
1591     total_rate +=
1592         svc_params
1593             .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1594   }
1595   if (total_rate != cfg.rc_target_bitrate) {
1596     die("Incorrect total target bitrate");
1597   }
1598 
1599   svc_params.framerate_factor[0] = 1;
1600   if (ts_number_layers == 2) {
1601     svc_params.framerate_factor[0] = 2;
1602     svc_params.framerate_factor[1] = 1;
1603   } else if (ts_number_layers == 3) {
1604     svc_params.framerate_factor[0] = 4;
1605     svc_params.framerate_factor[1] = 2;
1606     svc_params.framerate_factor[2] = 1;
1607   }
1608 
1609   if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1610     // Override these settings with the info from Y4M file.
1611     cfg.g_w = app_input.input_ctx.width;
1612     cfg.g_h = app_input.input_ctx.height;
1613     // g_timebase is the reciprocal of frame rate.
1614     cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1615     cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1616   }
1617   framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1618   set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1619 
1620   AvxVideoInfo info;
1621   info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1622   info.frame_width = cfg.g_w;
1623   info.frame_height = cfg.g_h;
1624   info.time_base.numerator = cfg.g_timebase.num;
1625   info.time_base.denominator = cfg.g_timebase.den;
1626   // Open an output file for each stream.
1627   for (int sl = 0; sl < ss_number_layers; ++sl) {
1628     for (int tl = 0; tl < ts_number_layers; ++tl) {
1629       i = sl * ts_number_layers + tl;
1630       char file_name[PATH_MAX];
1631       snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1632                app_input.output_filename, i);
1633       if (app_input.output_obu) {
1634         obu_files[i] = fopen(file_name, "wb");
1635         if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1636       } else {
1637         outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1638         if (!outfile[i]) die("Failed to open %s for writing", file_name);
1639       }
1640     }
1641   }
1642   if (app_input.output_obu) {
1643     total_layer_obu_file = fopen(app_input.output_filename, "wb");
1644     if (!total_layer_obu_file)
1645       die("Failed to open %s for writing", app_input.output_filename);
1646   } else {
1647     total_layer_file =
1648         aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1649     if (!total_layer_file)
1650       die("Failed to open %s for writing", app_input.output_filename);
1651   }
1652 
1653   // Initialize codec.
1654   aom_codec_ctx_t codec;
1655   aom_codec_flags_t flag = 0;
1656   flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH;
1657   flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1658   if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1659     die_codec(&codec, "Failed to initialize encoder");
1660 
1661 #if CONFIG_AV1_DECODER
1662   if (app_input.decode) {
1663     if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1664       die_codec(&decoder, "Failed to initialize decoder");
1665   }
1666 #endif
1667 
1668   aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1669   aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1670   aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
1671   aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1);
1672   aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1);
1673   aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0);
1674   aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0);
1675   aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
1676   aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0);
1677   aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0);
1678   aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0);
1679   aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3);
1680   aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3);
1681   aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3);
1682   aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3);
1683   aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1);
1684 
1685   // Settings to reduce key frame encoding time.
1686   aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0);
1687   aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
1688   aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0);
1689   aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0);
1690   aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
1691 
1692   aom_codec_control(&codec, AV1E_SET_AUTO_TILES, 1);
1693 
1694   aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1695   if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1696     aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
1697     // INTRABC is currently disabled for rt mode, as it's too slow.
1698     aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
1699   }
1700 
1701   if (app_input.use_external_rc) {
1702     aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1);
1703   }
1704 
1705   aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR, INT_MAX);
1706 
1707   aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE,
1708                     AOM_FULL_SUPERFRAME_DROP);
1709 
1710   aom_codec_control(&codec, AV1E_SET_POSTENCODE_DROP_RTC, 1);
1711 
1712   svc_params.number_spatial_layers = ss_number_layers;
1713   svc_params.number_temporal_layers = ts_number_layers;
1714   for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1715     svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1716     svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1717   }
1718   for (i = 0; i < ss_number_layers; ++i) {
1719     svc_params.scaling_factor_num[i] = 1;
1720     svc_params.scaling_factor_den[i] = 1;
1721   }
1722   if (ss_number_layers == 2) {
1723     svc_params.scaling_factor_num[0] = 1;
1724     svc_params.scaling_factor_den[0] = 2;
1725   } else if (ss_number_layers == 3) {
1726     svc_params.scaling_factor_num[0] = 1;
1727     svc_params.scaling_factor_den[0] = 4;
1728     svc_params.scaling_factor_num[1] = 1;
1729     svc_params.scaling_factor_den[1] = 2;
1730   }
1731   aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1732   // TODO(aomedia:3032): Configure KSVC in fixed mode.
1733 
1734   // This controls the maximum target size of the key frame.
1735   // For generating smaller key frames, use a smaller max_intra_size_pct
1736   // value, like 100 or 200.
1737   {
1738     const int max_intra_size_pct = 300;
1739     aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT,
1740                       max_intra_size_pct);
1741   }
1742 
1743   for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1744     cx_time_layer[lx] = 0;
1745     frame_cnt_layer[lx] = 0;
1746   }
1747 
1748   std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1749   if (app_input.use_external_rc) {
1750     const aom::AV1RateControlRtcConfig rc_cfg =
1751         create_rtc_rc_config(cfg, app_input);
1752     rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1753   }
1754 
1755   frame_avail = 1;
1756   struct psnr_stats psnr_stream;
1757   memset(&psnr_stream, 0, sizeof(psnr_stream));
1758   while (frame_avail || got_data) {
1759     struct aom_usec_timer timer;
1760     frame_avail = read_frame(&(app_input.input_ctx), &raw);
1761     // Loop over spatial layers.
1762     for (int slx = 0; slx < ss_number_layers; slx++) {
1763       aom_codec_iter_t iter = NULL;
1764       const aom_codec_cx_pkt_t *pkt;
1765       int layer = 0;
1766       // Flag for superframe whose base is key.
1767       int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1768       // For flexible mode:
1769       if (app_input.layering_mode >= 0) {
1770         // Set the reference/update flags, layer_id, and reference_map
1771         // buffer index.
1772         set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1773                           &ref_frame_config, &ref_frame_comp_pred,
1774                           &use_svc_control, slx, is_key_frame,
1775                           (app_input.layering_mode == 10), app_input.speed);
1776         aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1777         if (use_svc_control) {
1778           aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1779                             &ref_frame_config);
1780           aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1781                             &ref_frame_comp_pred);
1782         }
1783         // Set the speed per layer.
1784         if (test_speed_per_layer) {
1785           int speed_per_layer = 10;
1786           if (layer_id.spatial_layer_id == 0) {
1787             if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1788             if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1789             if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1790           } else if (layer_id.spatial_layer_id == 1) {
1791             if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1792             if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1793             if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1794           } else if (layer_id.spatial_layer_id == 2) {
1795             if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1796             if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1797             if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1798           }
1799           aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1800         }
1801       } else {
1802         // Only up to 3 temporal layers supported in fixed mode.
1803         // Only need to set spatial and temporal layer_id: reference
1804         // prediction, refresh, and buffer_idx are set internally.
1805         layer_id.spatial_layer_id = slx;
1806         layer_id.temporal_layer_id = 0;
1807         if (ts_number_layers == 2) {
1808           layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1809         } else if (ts_number_layers == 3) {
1810           if (frame_cnt % 2 != 0)
1811             layer_id.temporal_layer_id = 2;
1812           else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1813             layer_id.temporal_layer_id = 1;
1814         }
1815         aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1816       }
1817 
1818       if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1819         // Set error_resilient per frame: off/0 for base layer and
1820         // on/1 for enhancement layer frames.
1821         // Note that this is can only be done on the fly/per-frame/layer
1822         // if the config error_resilience is off/0. See the logic for updating
1823         // in set_encoder_config():
1824         // tool_cfg->error_resilient_mode =
1825         //     cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1826         const int err_resil_mode =
1827             layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1828         aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE,
1829                           err_resil_mode);
1830       }
1831 
1832       layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1833       if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1834 
1835       if (test_dynamic_scaling_single_layer) {
1836         // Example to scale source down by 2x2, then 4x4, and then back up to
1837         // 2x2, and then back to original.
1838         int frame_2x2 = 200;
1839         int frame_4x4 = 400;
1840         int frame_2x2up = 600;
1841         int frame_orig = 800;
1842         if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1843           // Scale source down by 2x2.
1844           struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1845           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1846         } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1847           // Scale source down by 4x4.
1848           struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1849           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1850         } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1851           // Source back up to 2x2.
1852           struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1853           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1854         } else if (frame_cnt >= frame_orig) {
1855           // Source back up to original resolution (no scaling).
1856           struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1857           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1858         }
1859         if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1860             frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1861           // For dynamic resize testing on single layer: refresh all references
1862           // on the resized frame: this is to avoid decode error:
1863           // if resize goes down by >= 4x4 then libaom decoder will throw an
1864           // error that some reference (even though not used) is beyond the
1865           // limit size (must be smaller than 4x4).
1866           for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1867           if (use_svc_control) {
1868             aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1869                               &ref_frame_config);
1870             aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1871                               &ref_frame_comp_pred);
1872           }
1873         }
1874       }
1875 
1876       // Change target_bitrate every other frame.
1877       if (test_changing_bitrate && frame_cnt % 2 == 0) {
1878         if (frame_cnt < 500)
1879           cfg.rc_target_bitrate += 10;
1880         else
1881           cfg.rc_target_bitrate -= 10;
1882         // Do big increase and decrease.
1883         if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1884         if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1885         if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1886         // Call change_config, or bypass with new control.
1887         // res = aom_codec_enc_config_set(&codec, &cfg);
1888         if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR,
1889                               cfg.rc_target_bitrate))
1890           die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1891       }
1892 
1893       if (rc_api) {
1894         aom::AV1FrameParamsRTC frame_params;
1895         // TODO(jianj): Add support for SVC.
1896         frame_params.spatial_layer_id = 0;
1897         frame_params.temporal_layer_id = 0;
1898         frame_params.frame_type =
1899             is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1900         rc_api->ComputeQP(frame_params);
1901         const int current_qp = rc_api->GetQP();
1902         if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS,
1903                               qindex_to_quantizer(current_qp))) {
1904           die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1905         }
1906       }
1907 
1908       if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
1909 
1910       // Do the layer encode.
1911       aom_usec_timer_start(&timer);
1912       if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1913         die_codec(&codec, "Failed to encode frame");
1914       aom_usec_timer_mark(&timer);
1915       cx_time += aom_usec_timer_elapsed(&timer);
1916       cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1917       frame_cnt_layer[layer] += 1;
1918 
1919       // Get the high motion content flag.
1920       int content_flag = 0;
1921       if (aom_codec_control(&codec, AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC,
1922                             &content_flag)) {
1923         die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
1924       }
1925 
1926       got_data = 0;
1927       // For simulcast (mode 11): write out each spatial layer to the file.
1928       int ss_layers_write = (app_input.layering_mode == 11)
1929                                 ? layer_id.spatial_layer_id + 1
1930                                 : ss_number_layers;
1931       while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1932         switch (pkt->kind) {
1933           case AOM_CODEC_CX_FRAME_PKT:
1934             for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1935                  ++sl) {
1936               for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1937                    ++tl) {
1938                 int j = sl * ts_number_layers + tl;
1939                 if (app_input.output_obu) {
1940                   fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1941                          obu_files[j]);
1942                 } else {
1943                   aom_video_writer_write_frame(
1944                       outfile[j],
1945                       reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1946                       pkt->data.frame.sz, pts);
1947                 }
1948                 if (sl == layer_id.spatial_layer_id)
1949                   rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1950               }
1951             }
1952             got_data = 1;
1953             // Write everything into the top layer.
1954             if (app_input.output_obu) {
1955               fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1956                      total_layer_obu_file);
1957             } else {
1958               aom_video_writer_write_frame(
1959                   total_layer_file,
1960                   reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1961                   pkt->data.frame.sz, pts);
1962             }
1963             // Keep count of rate control stats per layer (for non-key).
1964             if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1965               int j = layer_id.spatial_layer_id * ts_number_layers +
1966                       layer_id.temporal_layer_id;
1967               assert(j >= 0);
1968               rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1969               rc.layer_avg_rate_mismatch[j] +=
1970                   fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1971                   rc.layer_pfb[j];
1972               if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1973             }
1974 
1975             if (rc_api) {
1976               rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1977             }
1978             // Update for short-time encoding bitrate states, for moving window
1979             // of size rc->window, shifted by rc->window / 2.
1980             // Ignore first window segment, due to key frame.
1981             // For spatial layers: only do this for top/highest SL.
1982             if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1983               sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1984               rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1985               if (frame_cnt % rc.window_size == 0) {
1986                 rc.window_count += 1;
1987                 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1988                 rc.variance_st_encoding_bitrate +=
1989                     (sum_bitrate / rc.window_size) *
1990                     (sum_bitrate / rc.window_size);
1991                 sum_bitrate = 0.0;
1992               }
1993             }
1994             // Second shifted window.
1995             if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1996                 slx == ss_number_layers - 1) {
1997               sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1998               if (frame_cnt > 2 * rc.window_size &&
1999                   frame_cnt % rc.window_size == 0) {
2000                 rc.window_count += 1;
2001                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2002                 rc.variance_st_encoding_bitrate +=
2003                     (sum_bitrate2 / rc.window_size) *
2004                     (sum_bitrate2 / rc.window_size);
2005                 sum_bitrate2 = 0.0;
2006               }
2007             }
2008 
2009 #if CONFIG_AV1_DECODER
2010             if (app_input.decode) {
2011               if (aom_codec_decode(
2012                       &decoder,
2013                       reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2014                       pkt->data.frame.sz, NULL))
2015                 die_codec(&decoder, "Failed to decode frame");
2016             }
2017 #endif
2018 
2019             break;
2020           case AOM_CODEC_PSNR_PKT:
2021             if (app_input.show_psnr) {
2022               psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2023               psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2024               for (int plane = 0; plane < 4; plane++) {
2025                 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2026               }
2027               psnr_stream.psnr_count[0]++;
2028             }
2029             break;
2030           default: break;
2031         }
2032       }
2033 #if CONFIG_AV1_DECODER
2034       if (got_data && app_input.decode) {
2035         // Don't look for mismatch on top spatial and top temporal layers as
2036         // they are non reference frames.
2037         if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2038             !(layer_id.temporal_layer_id > 0 &&
2039               layer_id.temporal_layer_id == ts_number_layers - 1)) {
2040           if (test_decode(&codec, &decoder, frame_cnt)) {
2041 #if CONFIG_INTERNAL_STATS
2042             fprintf(stats_file, "First mismatch occurred in frame %d\n",
2043                     frame_cnt);
2044             fclose(stats_file);
2045 #endif
2046             fatal("Mismatch seen");
2047           }
2048         }
2049       }
2050 #endif
2051     }  // loop over spatial layers
2052     ++frame_cnt;
2053     pts += frame_duration;
2054   }
2055 
2056   close_input_file(&(app_input.input_ctx));
2057   printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2058                                 ts_number_layers);
2059 
2060   printf("\n");
2061   for (int slx = 0; slx < ss_number_layers; slx++)
2062     for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2063       int lx = slx * ts_number_layers + tlx;
2064       printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2065              slx, tlx, frame_cnt_layer[lx],
2066              (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2067              1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2068     }
2069 
2070   printf("\n");
2071   printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2072          frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2073          1000000 * (double)frame_cnt / (double)cx_time);
2074 
2075   if (app_input.show_psnr) {
2076     show_psnr(&psnr_stream, 255.0);
2077   }
2078 
2079   if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2080 
2081 #if CONFIG_AV1_DECODER
2082   if (app_input.decode) {
2083     if (aom_codec_destroy(&decoder))
2084       die_codec(&decoder, "Failed to destroy decoder");
2085   }
2086 #endif
2087 
2088 #if CONFIG_INTERNAL_STATS
2089   fprintf(stats_file, "No mismatch detected in recon buffers\n");
2090   fclose(stats_file);
2091 #endif
2092 
2093   // Try to rewrite the output file headers with the actual frame count.
2094   for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2095     aom_video_writer_close(outfile[i]);
2096   aom_video_writer_close(total_layer_file);
2097 
2098   if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2099     aom_img_free(&raw);
2100   }
2101   return EXIT_SUCCESS;
2102 }
2103