1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 // This is an example demonstrating how to implement a multi-layer AOM
13 // encoding scheme for RTC video applications.
14
15 #include <assert.h>
16 #include <limits.h>
17 #include <math.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21
22 #include <memory>
23
24 #include "config/aom_config.h"
25
26 #if CONFIG_AV1_DECODER
27 #include "aom/aom_decoder.h"
28 #endif
29 #include "aom/aom_encoder.h"
30 #include "aom/aomcx.h"
31 #include "common/args.h"
32 #include "common/tools_common.h"
33 #include "common/video_writer.h"
34 #include "examples/encoder_util.h"
35 #include "aom_ports/aom_timer.h"
36 #include "av1/ratectrl_rtc.h"
37
38 #define OPTION_BUFFER_SIZE 1024
39
40 typedef struct {
41 const char *output_filename;
42 char options[OPTION_BUFFER_SIZE];
43 struct AvxInputContext input_ctx;
44 int speed;
45 int aq_mode;
46 int layering_mode;
47 int output_obu;
48 int decode;
49 int tune_content;
50 int show_psnr;
51 bool use_external_rc;
52 } AppInput;
53
54 typedef enum {
55 QUANTIZER = 0,
56 BITRATE,
57 SCALE_FACTOR,
58 AUTO_ALT_REF,
59 ALL_OPTION_TYPES
60 } LAYER_OPTION_TYPE;
61
62 static const arg_def_t outputfile =
63 ARG_DEF("o", "output", 1, "Output filename");
64 static const arg_def_t frames_arg =
65 ARG_DEF("f", "frames", 1, "Number of frames to encode");
66 static const arg_def_t threads_arg =
67 ARG_DEF("th", "threads", 1, "Number of threads to use");
68 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
69 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
70 static const arg_def_t timebase_arg =
71 ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
72 static const arg_def_t bitrate_arg = ARG_DEF(
73 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
74 static const arg_def_t spatial_layers_arg =
75 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
76 static const arg_def_t temporal_layers_arg =
77 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
78 static const arg_def_t layering_mode_arg =
79 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
80 static const arg_def_t kf_dist_arg =
81 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
82 static const arg_def_t scale_factors_arg =
83 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
84 static const arg_def_t min_q_arg =
85 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
86 static const arg_def_t max_q_arg =
87 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
88 static const arg_def_t speed_arg =
89 ARG_DEF("sp", "speed", 1, "Speed configuration");
90 static const arg_def_t aqmode_arg =
91 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
92 static const arg_def_t bitrates_arg =
93 ARG_DEF("bl", "bitrates", 1,
94 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
95 static const arg_def_t dropframe_thresh_arg =
96 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
97 static const arg_def_t error_resilient_arg =
98 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
99 static const arg_def_t output_obu_arg =
100 ARG_DEF(NULL, "output-obu", 1,
101 "Write OBUs when set to 1. Otherwise write IVF files.");
102 static const arg_def_t test_decode_arg =
103 ARG_DEF(NULL, "test-decode", 1,
104 "Attempt to test decoding the output when set to 1. Default is 1.");
105 static const arg_def_t psnr_arg =
106 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
107 static const arg_def_t ext_rc_arg =
108 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
109 static const struct arg_enum_list tune_content_enum[] = {
110 { "default", AOM_CONTENT_DEFAULT },
111 { "screen", AOM_CONTENT_SCREEN },
112 { "film", AOM_CONTENT_FILM },
113 { NULL, 0 }
114 };
115 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
116 NULL, "tune-content", 1, "Tune content type", tune_content_enum);
117
118 #if CONFIG_AV1_HIGHBITDEPTH
119 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
120 { "10", AOM_BITS_10 },
121 { NULL, 0 } };
122
123 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
124 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
125 #endif // CONFIG_AV1_HIGHBITDEPTH
126
127 static const arg_def_t *svc_args[] = {
128 &frames_arg, &outputfile, &width_arg,
129 &height_arg, &timebase_arg, &bitrate_arg,
130 &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
131 &min_q_arg, &max_q_arg, &temporal_layers_arg,
132 &layering_mode_arg, &threads_arg, &aqmode_arg,
133 #if CONFIG_AV1_HIGHBITDEPTH
134 &bitdepth_arg,
135 #endif
136 &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
137 &error_resilient_arg, &output_obu_arg, &test_decode_arg,
138 &tune_content_arg, &psnr_arg, NULL,
139 };
140
141 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
142
143 static const char *exec_name;
144
usage_exit(void)145 void usage_exit(void) {
146 fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
147 exec_name);
148 fprintf(stderr, "Options:\n");
149 arg_show_usage(stderr, svc_args);
150 exit(EXIT_FAILURE);
151 }
152
file_is_y4m(const char detect[4])153 static int file_is_y4m(const char detect[4]) {
154 return memcmp(detect, "YUV4", 4) == 0;
155 }
156
fourcc_is_ivf(const char detect[4])157 static int fourcc_is_ivf(const char detect[4]) {
158 if (memcmp(detect, "DKIF", 4) == 0) {
159 return 1;
160 }
161 return 0;
162 }
163
164 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
165 1 };
166
167 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
168
open_input_file(struct AvxInputContext * input,aom_chroma_sample_position_t csp)169 static void open_input_file(struct AvxInputContext *input,
170 aom_chroma_sample_position_t csp) {
171 /* Parse certain options from the input file, if possible */
172 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
173 : set_binary_mode(stdin);
174
175 if (!input->file) fatal("Failed to open input file");
176
177 if (!fseeko(input->file, 0, SEEK_END)) {
178 /* Input file is seekable. Figure out how long it is, so we can get
179 * progress info.
180 */
181 input->length = ftello(input->file);
182 rewind(input->file);
183 }
184
185 /* Default to 1:1 pixel aspect ratio. */
186 input->pixel_aspect_ratio.numerator = 1;
187 input->pixel_aspect_ratio.denominator = 1;
188
189 /* For RAW input sources, these bytes will applied on the first frame
190 * in read_frame().
191 */
192 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
193 input->detect.position = 0;
194
195 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
196 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
197 input->only_i420) >= 0) {
198 input->file_type = FILE_TYPE_Y4M;
199 input->width = input->y4m.pic_w;
200 input->height = input->y4m.pic_h;
201 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
202 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
203 input->framerate.numerator = input->y4m.fps_n;
204 input->framerate.denominator = input->y4m.fps_d;
205 input->fmt = input->y4m.aom_fmt;
206 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
207 } else {
208 fatal("Unsupported Y4M stream.");
209 }
210 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
211 fatal("IVF is not supported as input.");
212 } else {
213 input->file_type = FILE_TYPE_RAW;
214 }
215 }
216
extract_option(LAYER_OPTION_TYPE type,char * input,int * value0,int * value1)217 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
218 int *value0, int *value1) {
219 if (type == SCALE_FACTOR) {
220 *value0 = (int)strtol(input, &input, 10);
221 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
222 *value1 = (int)strtol(input, &input, 10);
223
224 if (*value0 < option_min_values[SCALE_FACTOR] ||
225 *value1 < option_min_values[SCALE_FACTOR] ||
226 *value0 > option_max_values[SCALE_FACTOR] ||
227 *value1 > option_max_values[SCALE_FACTOR] ||
228 *value0 > *value1) // num shouldn't be greater than den
229 return AOM_CODEC_INVALID_PARAM;
230 } else {
231 *value0 = atoi(input);
232 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
233 return AOM_CODEC_INVALID_PARAM;
234 }
235 return AOM_CODEC_OK;
236 }
237
parse_layer_options_from_string(aom_svc_params_t * svc_params,LAYER_OPTION_TYPE type,const char * input,int * option0,int * option1)238 static aom_codec_err_t parse_layer_options_from_string(
239 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
240 int *option0, int *option1) {
241 aom_codec_err_t res = AOM_CODEC_OK;
242 char *input_string;
243 char *token;
244 const char *delim = ",";
245 int num_layers = svc_params->number_spatial_layers;
246 int i = 0;
247
248 if (type == BITRATE)
249 num_layers =
250 svc_params->number_spatial_layers * svc_params->number_temporal_layers;
251
252 if (input == NULL || option0 == NULL ||
253 (option1 == NULL && type == SCALE_FACTOR))
254 return AOM_CODEC_INVALID_PARAM;
255
256 const size_t input_length = strlen(input);
257 input_string = reinterpret_cast<char *>(malloc(input_length + 1));
258 if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
259 memcpy(input_string, input, input_length + 1);
260 token = strtok(input_string, delim); // NOLINT
261 for (i = 0; i < num_layers; ++i) {
262 if (token != NULL) {
263 res = extract_option(type, token, option0 + i, option1 + i);
264 if (res != AOM_CODEC_OK) break;
265 token = strtok(NULL, delim); // NOLINT
266 } else {
267 res = AOM_CODEC_INVALID_PARAM;
268 break;
269 }
270 }
271 free(input_string);
272 return res;
273 }
274
parse_command_line(int argc,const char ** argv_,AppInput * app_input,aom_svc_params_t * svc_params,aom_codec_enc_cfg_t * enc_cfg)275 static void parse_command_line(int argc, const char **argv_,
276 AppInput *app_input,
277 aom_svc_params_t *svc_params,
278 aom_codec_enc_cfg_t *enc_cfg) {
279 struct arg arg;
280 char **argv = NULL;
281 char **argi = NULL;
282 char **argj = NULL;
283 char string_options[1024] = { 0 };
284
285 // Default settings
286 svc_params->number_spatial_layers = 1;
287 svc_params->number_temporal_layers = 1;
288 app_input->layering_mode = 0;
289 app_input->output_obu = 0;
290 app_input->decode = 1;
291 enc_cfg->g_threads = 1;
292 enc_cfg->rc_end_usage = AOM_CBR;
293
294 // process command line options
295 argv = argv_dup(argc - 1, argv_ + 1);
296 if (!argv) {
297 fprintf(stderr, "Error allocating argument list\n");
298 exit(EXIT_FAILURE);
299 }
300 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
301 arg.argv_step = 1;
302
303 if (arg_match(&arg, &outputfile, argi)) {
304 app_input->output_filename = arg.val;
305 } else if (arg_match(&arg, &width_arg, argi)) {
306 enc_cfg->g_w = arg_parse_uint(&arg);
307 } else if (arg_match(&arg, &height_arg, argi)) {
308 enc_cfg->g_h = arg_parse_uint(&arg);
309 } else if (arg_match(&arg, &timebase_arg, argi)) {
310 enc_cfg->g_timebase = arg_parse_rational(&arg);
311 } else if (arg_match(&arg, &bitrate_arg, argi)) {
312 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
313 } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
314 svc_params->number_spatial_layers = arg_parse_uint(&arg);
315 } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
316 svc_params->number_temporal_layers = arg_parse_uint(&arg);
317 } else if (arg_match(&arg, &speed_arg, argi)) {
318 app_input->speed = arg_parse_uint(&arg);
319 if (app_input->speed > 11) {
320 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
321 }
322 } else if (arg_match(&arg, &aqmode_arg, argi)) {
323 app_input->aq_mode = arg_parse_uint(&arg);
324 } else if (arg_match(&arg, &threads_arg, argi)) {
325 enc_cfg->g_threads = arg_parse_uint(&arg);
326 } else if (arg_match(&arg, &layering_mode_arg, argi)) {
327 app_input->layering_mode = arg_parse_int(&arg);
328 } else if (arg_match(&arg, &kf_dist_arg, argi)) {
329 enc_cfg->kf_min_dist = arg_parse_uint(&arg);
330 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
331 } else if (arg_match(&arg, &scale_factors_arg, argi)) {
332 aom_codec_err_t res = parse_layer_options_from_string(
333 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
334 svc_params->scaling_factor_den);
335 if (res != AOM_CODEC_OK) {
336 die("Failed to parse scale factors: %s\n",
337 aom_codec_err_to_string(res));
338 }
339 } else if (arg_match(&arg, &min_q_arg, argi)) {
340 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
341 } else if (arg_match(&arg, &max_q_arg, argi)) {
342 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
343 #if CONFIG_AV1_HIGHBITDEPTH
344 } else if (arg_match(&arg, &bitdepth_arg, argi)) {
345 enc_cfg->g_bit_depth =
346 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
347 switch (enc_cfg->g_bit_depth) {
348 case AOM_BITS_8:
349 enc_cfg->g_input_bit_depth = 8;
350 enc_cfg->g_profile = 0;
351 break;
352 case AOM_BITS_10:
353 enc_cfg->g_input_bit_depth = 10;
354 enc_cfg->g_profile = 0;
355 break;
356 default:
357 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
358 }
359 #endif // CONFIG_VP9_HIGHBITDEPTH
360 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
361 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
362 } else if (arg_match(&arg, &error_resilient_arg, argi)) {
363 enc_cfg->g_error_resilient = arg_parse_uint(&arg);
364 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
365 die("Invalid value for error resilient (0, 1): %d.",
366 enc_cfg->g_error_resilient);
367 } else if (arg_match(&arg, &output_obu_arg, argi)) {
368 app_input->output_obu = arg_parse_uint(&arg);
369 if (app_input->output_obu != 0 && app_input->output_obu != 1)
370 die("Invalid value for obu output flag (0, 1): %d.",
371 app_input->output_obu);
372 } else if (arg_match(&arg, &test_decode_arg, argi)) {
373 app_input->decode = arg_parse_uint(&arg);
374 if (app_input->decode != 0 && app_input->decode != 1)
375 die("Invalid value for test decode flag (0, 1): %d.",
376 app_input->decode);
377 } else if (arg_match(&arg, &tune_content_arg, argi)) {
378 app_input->tune_content = arg_parse_enum_or_int(&arg);
379 printf("tune content %d\n", app_input->tune_content);
380 } else if (arg_match(&arg, &psnr_arg, argi)) {
381 app_input->show_psnr = 1;
382 } else if (arg_match(&arg, &ext_rc_arg, argi)) {
383 app_input->use_external_rc = true;
384 } else {
385 ++argj;
386 }
387 }
388
389 // Total bitrate needs to be parsed after the number of layers.
390 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
391 arg.argv_step = 1;
392 if (arg_match(&arg, &bitrates_arg, argi)) {
393 aom_codec_err_t res = parse_layer_options_from_string(
394 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
395 if (res != AOM_CODEC_OK) {
396 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
397 }
398 } else {
399 ++argj;
400 }
401 }
402
403 // There will be a space in front of the string options
404 if (strlen(string_options) > 0)
405 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
406
407 // Check for unrecognized options
408 for (argi = argv; *argi; ++argi)
409 if (argi[0][0] == '-' && strlen(argi[0]) > 1)
410 die("Error: Unrecognized option %s\n", *argi);
411
412 if (argv[0] == NULL) {
413 usage_exit();
414 }
415
416 app_input->input_ctx.filename = argv[0];
417 free(argv);
418
419 open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
420 if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
421 enc_cfg->g_w = app_input->input_ctx.width;
422 enc_cfg->g_h = app_input->input_ctx.height;
423 }
424
425 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
426 enc_cfg->g_h % 2)
427 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
428
429 printf(
430 "Codec %s\n"
431 "layers: %d\n"
432 "width %u, height: %u\n"
433 "num: %d, den: %d, bitrate: %u\n"
434 "gop size: %u\n",
435 aom_codec_iface_name(aom_codec_av1_cx()),
436 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
437 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
438 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
439 }
440
441 static int mode_to_num_temporal_layers[12] = {
442 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
443 };
444 static int mode_to_num_spatial_layers[12] = {
445 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
446 };
447
448 // For rate control encoding stats.
449 struct RateControlMetrics {
450 // Number of input frames per layer.
451 int layer_input_frames[AOM_MAX_TS_LAYERS];
452 // Number of encoded non-key frames per layer.
453 int layer_enc_frames[AOM_MAX_TS_LAYERS];
454 // Framerate per layer layer (cumulative).
455 double layer_framerate[AOM_MAX_TS_LAYERS];
456 // Target average frame size per layer (per-frame-bandwidth per layer).
457 double layer_pfb[AOM_MAX_LAYERS];
458 // Actual average frame size per layer.
459 double layer_avg_frame_size[AOM_MAX_LAYERS];
460 // Average rate mismatch per layer (|target - actual| / target).
461 double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
462 // Actual encoding bitrate per layer (cumulative across temporal layers).
463 double layer_encoding_bitrate[AOM_MAX_LAYERS];
464 // Average of the short-time encoder actual bitrate.
465 // TODO(marpan): Should we add these short-time stats for each layer?
466 double avg_st_encoding_bitrate;
467 // Variance of the short-time encoder actual bitrate.
468 double variance_st_encoding_bitrate;
469 // Window (number of frames) for computing short-timee encoding bitrate.
470 int window_size;
471 // Number of window measurements.
472 int window_count;
473 int layer_target_bitrate[AOM_MAX_LAYERS];
474 };
475
476 static const int REF_FRAMES = 8;
477
478 static const int INTER_REFS_PER_FRAME = 7;
479
480 // Reference frames used in this example encoder.
481 enum {
482 SVC_LAST_FRAME = 0,
483 SVC_LAST2_FRAME,
484 SVC_LAST3_FRAME,
485 SVC_GOLDEN_FRAME,
486 SVC_BWDREF_FRAME,
487 SVC_ALTREF2_FRAME,
488 SVC_ALTREF_FRAME
489 };
490
read_frame(struct AvxInputContext * input_ctx,aom_image_t * img)491 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
492 FILE *f = input_ctx->file;
493 y4m_input *y4m = &input_ctx->y4m;
494 int shortread = 0;
495
496 if (input_ctx->file_type == FILE_TYPE_Y4M) {
497 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
498 } else {
499 shortread = read_yuv_frame(input_ctx, img);
500 }
501
502 return !shortread;
503 }
504
close_input_file(struct AvxInputContext * input)505 static void close_input_file(struct AvxInputContext *input) {
506 fclose(input->file);
507 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
508 }
509
510 // Note: these rate control metrics assume only 1 key frame in the
511 // sequence (i.e., first frame only). So for temporal pattern# 7
512 // (which has key frame for every frame on base layer), the metrics
513 // computation will be off/wrong.
514 // TODO(marpan): Update these metrics to account for multiple key frames
515 // in the stream.
set_rate_control_metrics(struct RateControlMetrics * rc,double framerate,int ss_number_layers,int ts_number_layers)516 static void set_rate_control_metrics(struct RateControlMetrics *rc,
517 double framerate, int ss_number_layers,
518 int ts_number_layers) {
519 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
520 ts_rate_decimator[0] = 1;
521 if (ts_number_layers == 2) {
522 ts_rate_decimator[0] = 2;
523 ts_rate_decimator[1] = 1;
524 }
525 if (ts_number_layers == 3) {
526 ts_rate_decimator[0] = 4;
527 ts_rate_decimator[1] = 2;
528 ts_rate_decimator[2] = 1;
529 }
530 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
531 // per-frame-bandwidth, for the rate control encoding stats below.
532 for (int sl = 0; sl < ss_number_layers; ++sl) {
533 int i = sl * ts_number_layers;
534 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
535 rc->layer_pfb[i] =
536 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
537 for (int tl = 0; tl < ts_number_layers; ++tl) {
538 i = sl * ts_number_layers + tl;
539 if (tl > 0) {
540 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
541 rc->layer_pfb[i] =
542 1000.0 *
543 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
544 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
545 }
546 rc->layer_input_frames[tl] = 0;
547 rc->layer_enc_frames[tl] = 0;
548 rc->layer_encoding_bitrate[i] = 0.0;
549 rc->layer_avg_frame_size[i] = 0.0;
550 rc->layer_avg_rate_mismatch[i] = 0.0;
551 }
552 }
553 rc->window_count = 0;
554 rc->window_size = 15;
555 rc->avg_st_encoding_bitrate = 0.0;
556 rc->variance_st_encoding_bitrate = 0.0;
557 }
558
printout_rate_control_summary(struct RateControlMetrics * rc,int frame_cnt,int ss_number_layers,int ts_number_layers)559 static void printout_rate_control_summary(struct RateControlMetrics *rc,
560 int frame_cnt, int ss_number_layers,
561 int ts_number_layers) {
562 int tot_num_frames = 0;
563 double perc_fluctuation = 0.0;
564 printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
565 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
566 for (int sl = 0; sl < ss_number_layers; ++sl) {
567 tot_num_frames = 0;
568 for (int tl = 0; tl < ts_number_layers; ++tl) {
569 int i = sl * ts_number_layers + tl;
570 const int num_dropped =
571 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
572 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
573 tot_num_frames += rc->layer_input_frames[tl];
574 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
575 rc->layer_encoding_bitrate[i] /
576 tot_num_frames;
577 rc->layer_avg_frame_size[i] =
578 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
579 rc->layer_avg_rate_mismatch[i] =
580 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
581 printf("For layer#: %d %d \n", sl, tl);
582 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
583 rc->layer_encoding_bitrate[i]);
584 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
585 rc->layer_avg_frame_size[i]);
586 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
587 printf(
588 "Number of input frames, encoded (non-key) frames, "
589 "and perc dropped frames: %d %d %f\n",
590 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
591 100.0 * num_dropped / rc->layer_input_frames[tl]);
592 printf("\n");
593 }
594 }
595 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
596 rc->variance_st_encoding_bitrate =
597 rc->variance_st_encoding_bitrate / rc->window_count -
598 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
599 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
600 rc->avg_st_encoding_bitrate;
601 printf("Short-time stats, for window of %d frames:\n", rc->window_size);
602 printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
603 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
604 perc_fluctuation);
605 if (frame_cnt - 1 != tot_num_frames)
606 die("Error: Number of input frames not equal to output!\n");
607 }
608
609 // Layer pattern configuration.
set_layer_pattern(int layering_mode,int superframe_cnt,aom_svc_layer_id_t * layer_id,aom_svc_ref_frame_config_t * ref_frame_config,aom_svc_ref_frame_comp_pred_t * ref_frame_comp_pred,int * use_svc_control,int spatial_layer_id,int is_key_frame,int ksvc_mode,int speed)610 static void set_layer_pattern(
611 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
612 aom_svc_ref_frame_config_t *ref_frame_config,
613 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
614 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
615 // Setting this flag to 1 enables simplex example of
616 // RPS (Reference Picture Selection) for 1 layer.
617 int use_rps_example = 0;
618 int i;
619 int enable_longterm_temporal_ref = 1;
620 int shift = (layering_mode == 8) ? 2 : 0;
621 int simulcast_mode = (layering_mode == 11);
622 *use_svc_control = 1;
623 layer_id->spatial_layer_id = spatial_layer_id;
624 int lag_index = 0;
625 int base_count = superframe_cnt >> 2;
626 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
627 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
628 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
629 // Set the reference map buffer idx for the 7 references:
630 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
631 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
632 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
633 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
634 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
635
636 if (ksvc_mode) {
637 // Same pattern as case 9, but the reference strucutre will be constrained
638 // below.
639 layering_mode = 9;
640 }
641 switch (layering_mode) {
642 case 0:
643 if (use_rps_example == 0) {
644 // 1-layer: update LAST on every frame, reference LAST.
645 layer_id->temporal_layer_id = 0;
646 layer_id->spatial_layer_id = 0;
647 ref_frame_config->refresh[0] = 1;
648 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
649 } else {
650 // Pattern of 2 references (ALTREF and GOLDEN) trailing
651 // LAST by 4 and 8 frames, with some switching logic to
652 // sometimes only predict from the longer-term reference
653 //(golden here). This is simple example to test RPS
654 // (reference picture selection).
655 int last_idx = 0;
656 int last_idx_refresh = 0;
657 int gld_idx = 0;
658 int alt_ref_idx = 0;
659 int lag_alt = 4;
660 int lag_gld = 8;
661 layer_id->temporal_layer_id = 0;
662 layer_id->spatial_layer_id = 0;
663 int sh = 8; // slots 0 - 7.
664 // Moving index slot for last: 0 - (sh - 1)
665 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
666 // Moving index for refresh of last: one ahead for next frame.
667 last_idx_refresh = superframe_cnt % sh;
668 // Moving index for gld_ref, lag behind current by lag_gld
669 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
670 // Moving index for alt_ref, lag behind LAST by lag_alt frames.
671 if (superframe_cnt > lag_alt)
672 alt_ref_idx = (superframe_cnt - lag_alt) % sh;
673 // Set the ref_idx.
674 // Default all references to slot for last.
675 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
676 ref_frame_config->ref_idx[i] = last_idx;
677 // Set the ref_idx for the relevant references.
678 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
679 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
680 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
681 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
682 // Refresh this slot, which will become LAST on next frame.
683 ref_frame_config->refresh[last_idx_refresh] = 1;
684 // Reference LAST, ALTREF, and GOLDEN
685 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
686 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
687 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
688 // Switch to only GOLDEN every 300 frames.
689 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
690 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
691 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
692 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
693 // Test if the long-term is LAST instead, this is just a renaming
694 // but its tests if encoder behaves the same, whether its
695 // LAST or GOLDEN.
696 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
697 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
698 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
699 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
700 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
701 }
702 }
703 }
704 break;
705 case 1:
706 // 2-temporal layer.
707 // 1 3 5
708 // 0 2 4
709 // Keep golden fixed at slot 3.
710 base_count = superframe_cnt >> 1;
711 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
712 // Cyclically refresh slots 5, 6, 7, for lag alt ref.
713 lag_index = 5;
714 if (base_count > 0) {
715 lag_index = 5 + (base_count % 3);
716 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
717 }
718 // Set the altref slot to lag_index.
719 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
720 if (superframe_cnt % 2 == 0) {
721 layer_id->temporal_layer_id = 0;
722 // Update LAST on layer 0, reference LAST.
723 ref_frame_config->refresh[0] = 1;
724 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
725 // Refresh lag_index slot, needed for lagging golen.
726 ref_frame_config->refresh[lag_index] = 1;
727 // Refresh GOLDEN every x base layer frames.
728 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
729 } else {
730 layer_id->temporal_layer_id = 1;
731 // No updates on layer 1, reference LAST (TL0).
732 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
733 }
734 // Always reference golden and altref on TL0.
735 if (layer_id->temporal_layer_id == 0) {
736 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
737 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
738 }
739 break;
740 case 2:
741 // 3-temporal layer:
742 // 1 3 5 7
743 // 2 6
744 // 0 4 8
745 if (superframe_cnt % 4 == 0) {
746 // Base layer.
747 layer_id->temporal_layer_id = 0;
748 // Update LAST on layer 0, reference LAST.
749 ref_frame_config->refresh[0] = 1;
750 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
751 } else if ((superframe_cnt - 1) % 4 == 0) {
752 layer_id->temporal_layer_id = 2;
753 // First top layer: no updates, only reference LAST (TL0).
754 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
755 } else if ((superframe_cnt - 2) % 4 == 0) {
756 layer_id->temporal_layer_id = 1;
757 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
758 ref_frame_config->refresh[1] = 1;
759 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
760 } else if ((superframe_cnt - 3) % 4 == 0) {
761 layer_id->temporal_layer_id = 2;
762 // Second top layer: no updates, only reference LAST.
763 // Set buffer idx for LAST to slot 1, since that was the slot
764 // updated in previous frame. So LAST is TL1 frame.
765 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
766 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
767 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
768 }
769 break;
770 case 3:
771 // 3 TL, same as above, except allow for predicting
772 // off 2 more references (GOLDEN and ALTREF), with
773 // GOLDEN updated periodically, and ALTREF lagging from
774 // LAST from ~4 frames. Both GOLDEN and ALTREF
775 // can only be updated on base temporal layer.
776
777 // Keep golden fixed at slot 3.
778 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
779 // Cyclically refresh slots 5, 6, 7, for lag altref.
780 lag_index = 5;
781 if (base_count > 0) {
782 lag_index = 5 + (base_count % 3);
783 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
784 }
785 // Set the altref slot to lag_index.
786 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
787 if (superframe_cnt % 4 == 0) {
788 // Base layer.
789 layer_id->temporal_layer_id = 0;
790 // Update LAST on layer 0, reference LAST.
791 ref_frame_config->refresh[0] = 1;
792 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
793 // Refresh GOLDEN every x ~10 base layer frames.
794 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
795 // Refresh lag_index slot, needed for lagging altref.
796 ref_frame_config->refresh[lag_index] = 1;
797 } else if ((superframe_cnt - 1) % 4 == 0) {
798 layer_id->temporal_layer_id = 2;
799 // First top layer: no updates, only reference LAST (TL0).
800 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
801 } else if ((superframe_cnt - 2) % 4 == 0) {
802 layer_id->temporal_layer_id = 1;
803 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
804 ref_frame_config->refresh[1] = 1;
805 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
806 } else if ((superframe_cnt - 3) % 4 == 0) {
807 layer_id->temporal_layer_id = 2;
808 // Second top layer: no updates, only reference LAST.
809 // Set buffer idx for LAST to slot 1, since that was the slot
810 // updated in previous frame. So LAST is TL1 frame.
811 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
812 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
813 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
814 }
815 // Every frame can reference GOLDEN AND ALTREF.
816 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
817 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
818 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
819 if (speed >= 7) {
820 ref_frame_comp_pred->use_comp_pred[2] = 1;
821 ref_frame_comp_pred->use_comp_pred[0] = 1;
822 }
823 break;
824 case 4:
825 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
826 // only reference GF (not LAST). Other frames only reference LAST.
827 // 1 3 5 7
828 // 2 6
829 // 0 4 8
830 if (superframe_cnt % 4 == 0) {
831 // Base layer.
832 layer_id->temporal_layer_id = 0;
833 // Update LAST on layer 0, only reference LAST.
834 ref_frame_config->refresh[0] = 1;
835 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
836 } else if ((superframe_cnt - 1) % 4 == 0) {
837 layer_id->temporal_layer_id = 2;
838 // First top layer: no updates, only reference LAST (TL0).
839 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
840 } else if ((superframe_cnt - 2) % 4 == 0) {
841 layer_id->temporal_layer_id = 1;
842 // Middle layer (TL1): update GF, only reference LAST (TL0).
843 ref_frame_config->refresh[3] = 1;
844 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845 } else if ((superframe_cnt - 3) % 4 == 0) {
846 layer_id->temporal_layer_id = 2;
847 // Second top layer: no updates, only reference GF.
848 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
849 }
850 break;
851 case 5:
852 // 2 spatial layers, 1 temporal.
853 layer_id->temporal_layer_id = 0;
854 if (layer_id->spatial_layer_id == 0) {
855 // Reference LAST, update LAST.
856 ref_frame_config->refresh[0] = 1;
857 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
858 } else if (layer_id->spatial_layer_id == 1) {
859 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
860 // and GOLDEN to slot 0. Update slot 1 (LAST).
861 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
862 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
863 ref_frame_config->refresh[1] = 1;
864 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
865 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
866 }
867 break;
868 case 6:
869 // 3 spatial layers, 1 temporal.
870 // Note for this case, we set the buffer idx for all references to be
871 // either LAST or GOLDEN, which are always valid references, since decoder
872 // will check if any of the 7 references is valid scale in
873 // valid_ref_frame_size().
874 layer_id->temporal_layer_id = 0;
875 if (layer_id->spatial_layer_id == 0) {
876 // Reference LAST, update LAST. Set all buffer_idx to 0.
877 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
878 ref_frame_config->ref_idx[i] = 0;
879 ref_frame_config->refresh[0] = 1;
880 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
881 } else if (layer_id->spatial_layer_id == 1) {
882 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
883 // and GOLDEN (and all other refs) to slot 0.
884 // Update slot 1 (LAST).
885 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
886 ref_frame_config->ref_idx[i] = 0;
887 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
888 ref_frame_config->refresh[1] = 1;
889 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
890 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
891 } else if (layer_id->spatial_layer_id == 2) {
892 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
893 // and GOLDEN (and all other refs) to slot 1.
894 // Update slot 2 (LAST).
895 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
896 ref_frame_config->ref_idx[i] = 1;
897 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
898 ref_frame_config->refresh[2] = 1;
899 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
900 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
901 // For 3 spatial layer case: allow for top spatial layer to use
902 // additional temporal reference. Update every 10 frames.
903 if (enable_longterm_temporal_ref) {
904 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
905 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
906 if (base_count % 10 == 0)
907 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
908 }
909 }
910 break;
911 case 7:
912 // 2 spatial and 3 temporal layer.
913 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
914 if (superframe_cnt % 4 == 0) {
915 // Base temporal layer
916 layer_id->temporal_layer_id = 0;
917 if (layer_id->spatial_layer_id == 0) {
918 // Reference LAST, update LAST
919 // Set all buffer_idx to 0
920 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
921 ref_frame_config->ref_idx[i] = 0;
922 ref_frame_config->refresh[0] = 1;
923 } else if (layer_id->spatial_layer_id == 1) {
924 // Reference LAST and GOLDEN.
925 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
926 ref_frame_config->ref_idx[i] = 0;
927 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
928 ref_frame_config->refresh[1] = 1;
929 }
930 } else if ((superframe_cnt - 1) % 4 == 0) {
931 // First top temporal enhancement layer.
932 layer_id->temporal_layer_id = 2;
933 if (layer_id->spatial_layer_id == 0) {
934 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
935 ref_frame_config->ref_idx[i] = 0;
936 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
937 ref_frame_config->refresh[3] = 1;
938 } else if (layer_id->spatial_layer_id == 1) {
939 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
940 // GOLDEN (and all other refs) to slot 3.
941 // No update.
942 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
943 ref_frame_config->ref_idx[i] = 3;
944 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
945 }
946 } else if ((superframe_cnt - 2) % 4 == 0) {
947 // Middle temporal enhancement layer.
948 layer_id->temporal_layer_id = 1;
949 if (layer_id->spatial_layer_id == 0) {
950 // Reference LAST.
951 // Set all buffer_idx to 0.
952 // Set GOLDEN to slot 5 and update slot 5.
953 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
954 ref_frame_config->ref_idx[i] = 0;
955 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
956 ref_frame_config->refresh[5 - shift] = 1;
957 } else if (layer_id->spatial_layer_id == 1) {
958 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
959 // GOLDEN (and all other refs) to slot 5.
960 // Set LAST3 to slot 6 and update slot 6.
961 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
962 ref_frame_config->ref_idx[i] = 5 - shift;
963 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
964 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
965 ref_frame_config->refresh[6 - shift] = 1;
966 }
967 } else if ((superframe_cnt - 3) % 4 == 0) {
968 // Second top temporal enhancement layer.
969 layer_id->temporal_layer_id = 2;
970 if (layer_id->spatial_layer_id == 0) {
971 // Set LAST to slot 5 and reference LAST.
972 // Set GOLDEN to slot 3 and update slot 3.
973 // Set all other buffer_idx to 0.
974 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
975 ref_frame_config->ref_idx[i] = 0;
976 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
977 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
978 ref_frame_config->refresh[3] = 1;
979 } else if (layer_id->spatial_layer_id == 1) {
980 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
981 // GOLDEN to slot 3. No update.
982 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
983 ref_frame_config->ref_idx[i] = 0;
984 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
985 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
986 }
987 }
988 break;
989 case 8:
990 // 3 spatial and 3 temporal layer.
991 // Same as case 9 but overalap in the buffer slot updates.
992 // (shift = 2). The slots 3 and 4 updated by first TL2 are
993 // reused for update in TL1 superframe.
994 // Note for this case, frame order hint must be disabled for
995 // lower resolutios (operating points > 0) to be decoedable.
996 case 9:
997 // 3 spatial and 3 temporal layer.
998 // No overlap in buffer updates between TL2 and TL1.
999 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1000 // Set the references via the svc_ref_frame_config control.
1001 // Always reference LAST.
1002 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1003 if (superframe_cnt % 4 == 0) {
1004 // Base temporal layer.
1005 layer_id->temporal_layer_id = 0;
1006 if (layer_id->spatial_layer_id == 0) {
1007 // Reference LAST, update LAST.
1008 // Set all buffer_idx to 0.
1009 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1010 ref_frame_config->ref_idx[i] = 0;
1011 ref_frame_config->refresh[0] = 1;
1012 } else if (layer_id->spatial_layer_id == 1) {
1013 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1014 // GOLDEN (and all other refs) to slot 0.
1015 // Update slot 1 (LAST).
1016 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1017 ref_frame_config->ref_idx[i] = 0;
1018 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1019 ref_frame_config->refresh[1] = 1;
1020 } else if (layer_id->spatial_layer_id == 2) {
1021 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1022 // GOLDEN (and all other refs) to slot 1.
1023 // Update slot 2 (LAST).
1024 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1025 ref_frame_config->ref_idx[i] = 1;
1026 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1027 ref_frame_config->refresh[2] = 1;
1028 }
1029 } else if ((superframe_cnt - 1) % 4 == 0) {
1030 // First top temporal enhancement layer.
1031 layer_id->temporal_layer_id = 2;
1032 if (layer_id->spatial_layer_id == 0) {
1033 // Reference LAST (slot 0).
1034 // Set GOLDEN to slot 3 and update slot 3.
1035 // Set all other buffer_idx to slot 0.
1036 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1037 ref_frame_config->ref_idx[i] = 0;
1038 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1039 ref_frame_config->refresh[3] = 1;
1040 } else if (layer_id->spatial_layer_id == 1) {
1041 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1042 // GOLDEN (and all other refs) to slot 3.
1043 // Set LAST2 to slot 4 and Update slot 4.
1044 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1045 ref_frame_config->ref_idx[i] = 3;
1046 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1047 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1048 ref_frame_config->refresh[4] = 1;
1049 } else if (layer_id->spatial_layer_id == 2) {
1050 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1051 // GOLDEN (and all other refs) to slot 4.
1052 // No update.
1053 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1054 ref_frame_config->ref_idx[i] = 4;
1055 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1056 }
1057 } else if ((superframe_cnt - 2) % 4 == 0) {
1058 // Middle temporal enhancement layer.
1059 layer_id->temporal_layer_id = 1;
1060 if (layer_id->spatial_layer_id == 0) {
1061 // Reference LAST.
1062 // Set all buffer_idx to 0.
1063 // Set GOLDEN to slot 5 and update slot 5.
1064 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1065 ref_frame_config->ref_idx[i] = 0;
1066 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1067 ref_frame_config->refresh[5 - shift] = 1;
1068 } else if (layer_id->spatial_layer_id == 1) {
1069 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1070 // GOLDEN (and all other refs) to slot 5.
1071 // Set LAST3 to slot 6 and update slot 6.
1072 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1073 ref_frame_config->ref_idx[i] = 5 - shift;
1074 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1075 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1076 ref_frame_config->refresh[6 - shift] = 1;
1077 } else if (layer_id->spatial_layer_id == 2) {
1078 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1079 // GOLDEN (and all other refs) to slot 6.
1080 // Set LAST3 to slot 7 and update slot 7.
1081 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1082 ref_frame_config->ref_idx[i] = 6 - shift;
1083 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1084 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1085 ref_frame_config->refresh[7 - shift] = 1;
1086 }
1087 } else if ((superframe_cnt - 3) % 4 == 0) {
1088 // Second top temporal enhancement layer.
1089 layer_id->temporal_layer_id = 2;
1090 if (layer_id->spatial_layer_id == 0) {
1091 // Set LAST to slot 5 and reference LAST.
1092 // Set GOLDEN to slot 3 and update slot 3.
1093 // Set all other buffer_idx to 0.
1094 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1095 ref_frame_config->ref_idx[i] = 0;
1096 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1097 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1098 ref_frame_config->refresh[3] = 1;
1099 } else if (layer_id->spatial_layer_id == 1) {
1100 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1101 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1102 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1103 ref_frame_config->ref_idx[i] = 0;
1104 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1105 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1106 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1107 ref_frame_config->refresh[4] = 1;
1108 } else if (layer_id->spatial_layer_id == 2) {
1109 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1110 // GOLDEN to slot 4. No update.
1111 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1112 ref_frame_config->ref_idx[i] = 0;
1113 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1114 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1115 }
1116 }
1117 break;
1118 case 11:
1119 // Simulcast mode for 3 spatial and 3 temporal layers.
1120 // No inter-layer predicton, only prediction is temporal and single
1121 // reference (LAST).
1122 // No overlap in buffer slots between spatial layers. So for example,
1123 // SL0 only uses slots 0 and 1.
1124 // SL1 only uses slots 2 and 3.
1125 // SL2 only uses slots 4 and 5.
1126 // All 7 references for each inter-frame must only access buffer slots
1127 // for that spatial layer.
1128 // On key (super)frames: SL1 and SL2 must have no references set
1129 // and must refresh all the slots for that layer only (so 2 and 3
1130 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1131 // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1132 // internally as Intra-only frames that allow that stream to be decoded.
1133 // These conditions will allow for each spatial stream to be
1134 // independently decodeable.
1135
1136 // Initialize all references to 0 (don't use reference).
1137 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1138 ref_frame_config->reference[i] = 0;
1139 // Initialize as no refresh/update for all slots.
1140 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1141 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1142 ref_frame_config->ref_idx[i] = 0;
1143
1144 if (is_key_frame) {
1145 if (layer_id->spatial_layer_id == 0) {
1146 // Assign LAST/GOLDEN to slot 0/1.
1147 // Refesh slots 0 and 1 for SL0.
1148 // SL0: this will get set to KEY frame internally.
1149 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1150 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1151 ref_frame_config->refresh[0] = 1;
1152 ref_frame_config->refresh[1] = 1;
1153 } else if (layer_id->spatial_layer_id == 1) {
1154 // Assign LAST/GOLDEN to slot 2/3.
1155 // Refesh slots 2 and 3 for SL1.
1156 // This will get set to Intra-only frame internally.
1157 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1158 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1159 ref_frame_config->refresh[2] = 1;
1160 ref_frame_config->refresh[3] = 1;
1161 } else if (layer_id->spatial_layer_id == 2) {
1162 // Assign LAST/GOLDEN to slot 4/5.
1163 // Refresh slots 4 and 5 for SL2.
1164 // This will get set to Intra-only frame internally.
1165 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1166 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1167 ref_frame_config->refresh[4] = 1;
1168 ref_frame_config->refresh[5] = 1;
1169 }
1170 } else if (superframe_cnt % 4 == 0) {
1171 // Base temporal layer: TL0
1172 layer_id->temporal_layer_id = 0;
1173 if (layer_id->spatial_layer_id == 0) { // SL0
1174 // Reference LAST. Assign all references to either slot
1175 // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1176 // Update slot 0 (LAST).
1177 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1178 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1179 ref_frame_config->ref_idx[i] = 1;
1180 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1181 ref_frame_config->refresh[0] = 1;
1182 } else if (layer_id->spatial_layer_id == 1) { // SL1
1183 // Reference LAST. Assign all references to either slot
1184 // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1185 // Update slot 2 (LAST).
1186 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1187 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1188 ref_frame_config->ref_idx[i] = 3;
1189 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1190 ref_frame_config->refresh[2] = 1;
1191 } else if (layer_id->spatial_layer_id == 2) { // SL2
1192 // Reference LAST. Assign all references to either slot
1193 // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1194 // Update slot 4 (LAST).
1195 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1196 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1197 ref_frame_config->ref_idx[i] = 5;
1198 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1199 ref_frame_config->refresh[4] = 1;
1200 }
1201 } else if ((superframe_cnt - 1) % 4 == 0) {
1202 // First top temporal enhancement layer: TL2
1203 layer_id->temporal_layer_id = 2;
1204 if (layer_id->spatial_layer_id == 0) { // SL0
1205 // Reference LAST (slot 0). Assign other references to slot 1.
1206 // No update/refresh on any slots.
1207 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1208 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1209 ref_frame_config->ref_idx[i] = 1;
1210 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1211 } else if (layer_id->spatial_layer_id == 1) { // SL1
1212 // Reference LAST (slot 2). Assign other references to slot 3.
1213 // No update/refresh on any slots.
1214 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1215 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1216 ref_frame_config->ref_idx[i] = 3;
1217 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1218 } else if (layer_id->spatial_layer_id == 2) { // SL2
1219 // Reference LAST (slot 4). Assign other references to slot 4.
1220 // No update/refresh on any slots.
1221 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1222 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1223 ref_frame_config->ref_idx[i] = 5;
1224 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1225 }
1226 } else if ((superframe_cnt - 2) % 4 == 0) {
1227 // Middle temporal enhancement layer: TL1
1228 layer_id->temporal_layer_id = 1;
1229 if (layer_id->spatial_layer_id == 0) { // SL0
1230 // Reference LAST (slot 0).
1231 // Set GOLDEN to slot 1 and update slot 1.
1232 // This will be used as reference for next TL2.
1233 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1234 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1235 ref_frame_config->ref_idx[i] = 1;
1236 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1237 ref_frame_config->refresh[1] = 1;
1238 } else if (layer_id->spatial_layer_id == 1) { // SL1
1239 // Reference LAST (slot 2).
1240 // Set GOLDEN to slot 3 and update slot 3.
1241 // This will be used as reference for next TL2.
1242 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1243 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1244 ref_frame_config->ref_idx[i] = 3;
1245 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1246 ref_frame_config->refresh[3] = 1;
1247 } else if (layer_id->spatial_layer_id == 2) { // SL2
1248 // Reference LAST (slot 4).
1249 // Set GOLDEN to slot 5 and update slot 5.
1250 // This will be used as reference for next TL2.
1251 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1252 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1253 ref_frame_config->ref_idx[i] = 5;
1254 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1255 ref_frame_config->refresh[5] = 1;
1256 }
1257 } else if ((superframe_cnt - 3) % 4 == 0) {
1258 // Second top temporal enhancement layer: TL2
1259 layer_id->temporal_layer_id = 2;
1260 if (layer_id->spatial_layer_id == 0) { // SL0
1261 // Reference LAST (slot 1). Assign other references to slot 0.
1262 // No update/refresh on any slots.
1263 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1264 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1265 ref_frame_config->ref_idx[i] = 0;
1266 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1267 } else if (layer_id->spatial_layer_id == 1) { // SL1
1268 // Reference LAST (slot 3). Assign other references to slot 2.
1269 // No update/refresh on any slots.
1270 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1271 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1272 ref_frame_config->ref_idx[i] = 2;
1273 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1274 } else if (layer_id->spatial_layer_id == 2) { // SL2
1275 // Reference LAST (slot 5). Assign other references to slot 4.
1276 // No update/refresh on any slots.
1277 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1278 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1279 ref_frame_config->ref_idx[i] = 4;
1280 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1281 }
1282 }
1283 if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1284 // Always reference GOLDEN (inter-layer prediction).
1285 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1286 if (ksvc_mode) {
1287 // KSVC: only keep the inter-layer reference (GOLDEN) for
1288 // superframes whose base is key.
1289 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1290 }
1291 if (is_key_frame && layer_id->spatial_layer_id > 1) {
1292 // On superframes whose base is key: remove LAST to avoid prediction
1293 // off layer two levels below.
1294 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1295 }
1296 }
1297 // For 3 spatial layer case 8 (where there is free buffer slot):
1298 // allow for top spatial layer to use additional temporal reference.
1299 // Additional reference is only updated on base temporal layer, every
1300 // 10 TL0 frames here.
1301 if (!simulcast_mode && enable_longterm_temporal_ref &&
1302 layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1303 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1304 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1305 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1306 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1307 }
1308 break;
1309 default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1310 }
1311 }
1312
1313 #if CONFIG_AV1_DECODER
1314 // Returns whether there is a mismatch between the encoder's new frame and the
1315 // decoder's new frame.
test_decode(aom_codec_ctx_t * encoder,aom_codec_ctx_t * decoder,const int frames_out)1316 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1317 const int frames_out) {
1318 aom_image_t enc_img, dec_img;
1319 int mismatch = 0;
1320
1321 /* Get the internal new frame */
1322 AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
1323 AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
1324
1325 #if CONFIG_AV1_HIGHBITDEPTH
1326 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1327 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1328 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1329 aom_image_t enc_hbd_img;
1330 aom_img_alloc(
1331 &enc_hbd_img,
1332 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1333 enc_img.d_w, enc_img.d_h, 16);
1334 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1335 enc_img = enc_hbd_img;
1336 }
1337 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1338 aom_image_t dec_hbd_img;
1339 aom_img_alloc(
1340 &dec_hbd_img,
1341 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1342 dec_img.d_w, dec_img.d_h, 16);
1343 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1344 dec_img = dec_hbd_img;
1345 }
1346 }
1347 #endif
1348
1349 if (!aom_compare_img(&enc_img, &dec_img)) {
1350 int y[4], u[4], v[4];
1351 #if CONFIG_AV1_HIGHBITDEPTH
1352 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1353 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1354 } else {
1355 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1356 }
1357 #else
1358 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1359 #endif
1360 fprintf(stderr,
1361 "Encode/decode mismatch on frame %d at"
1362 " Y[%d, %d] {%d/%d},"
1363 " U[%d, %d] {%d/%d},"
1364 " V[%d, %d] {%d/%d}\n",
1365 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1366 v[1], v[2], v[3]);
1367 mismatch = 1;
1368 }
1369
1370 aom_img_free(&enc_img);
1371 aom_img_free(&dec_img);
1372 return mismatch;
1373 }
1374 #endif // CONFIG_AV1_DECODER
1375
1376 struct psnr_stats {
1377 // The second element of these arrays is reserved for high bitdepth.
1378 uint64_t psnr_sse_total[2];
1379 uint64_t psnr_samples_total[2];
1380 double psnr_totals[2][4];
1381 int psnr_count[2];
1382 };
1383
show_psnr(struct psnr_stats * psnr_stream,double peak)1384 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1385 double ovpsnr;
1386
1387 if (!psnr_stream->psnr_count[0]) return;
1388
1389 fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1390 ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1391 (double)psnr_stream->psnr_sse_total[0]);
1392 fprintf(stderr, " %.3f", ovpsnr);
1393
1394 for (int i = 0; i < 4; i++) {
1395 fprintf(stderr, " %.3f",
1396 psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1397 }
1398 fprintf(stderr, "\n");
1399 }
1400
create_rtc_rc_config(const aom_codec_enc_cfg_t & cfg,const AppInput & app_input)1401 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1402 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1403 aom::AV1RateControlRtcConfig rc_cfg;
1404 rc_cfg.width = cfg.g_w;
1405 rc_cfg.height = cfg.g_h;
1406 rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1407 rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1408 rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1409 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1410 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1411 rc_cfg.buf_sz = cfg.rc_buf_sz;
1412 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1413 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1414 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1415 rc_cfg.max_intra_bitrate_pct = 300;
1416 rc_cfg.framerate = cfg.g_timebase.den;
1417 // TODO(jianj): Add suppor for SVC.
1418 rc_cfg.ss_number_layers = 1;
1419 rc_cfg.ts_number_layers = 1;
1420 rc_cfg.scaling_factor_num[0] = 1;
1421 rc_cfg.scaling_factor_den[0] = 1;
1422 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1423 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1424 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1425 rc_cfg.aq_mode = app_input.aq_mode;
1426
1427 return rc_cfg;
1428 }
1429
qindex_to_quantizer(int qindex)1430 static int qindex_to_quantizer(int qindex) {
1431 // Table that converts 0-63 range Q values passed in outside to the 0-255
1432 // range Qindex used internally.
1433 static const int quantizer_to_qindex[] = {
1434 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1435 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1436 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1437 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1438 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1439 };
1440 for (int quantizer = 0; quantizer < 64; ++quantizer)
1441 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1442
1443 return 63;
1444 }
1445
set_active_map(const aom_codec_enc_cfg_t * cfg,aom_codec_ctx_t * codec,int frame_cnt)1446 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1447 aom_codec_ctx_t *codec, int frame_cnt) {
1448 aom_active_map_t map = { 0, 0, 0 };
1449
1450 map.rows = (cfg->g_h + 15) / 16;
1451 map.cols = (cfg->g_w + 15) / 16;
1452
1453 map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1454 if (!map.active_map) die("Failed to allocate active map");
1455
1456 // Example map for testing.
1457 for (unsigned int i = 0; i < map.rows; ++i) {
1458 for (unsigned int j = 0; j < map.cols; ++j) {
1459 int index = map.cols * i + j;
1460 map.active_map[index] = 1;
1461 if (frame_cnt < 300) {
1462 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1463 } else if (frame_cnt >= 300) {
1464 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1465 }
1466 }
1467 }
1468
1469 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1470 die_codec(codec, "Failed to set active map");
1471
1472 free(map.active_map);
1473 }
1474
main(int argc,const char ** argv)1475 int main(int argc, const char **argv) {
1476 AppInput app_input;
1477 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1478 FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1479 AvxVideoWriter *total_layer_file = NULL;
1480 FILE *total_layer_obu_file = NULL;
1481 aom_codec_enc_cfg_t cfg;
1482 int frame_cnt = 0;
1483 aom_image_t raw;
1484 int frame_avail;
1485 int got_data = 0;
1486 int flags = 0;
1487 int i;
1488 int pts = 0; // PTS starts at 0.
1489 int frame_duration = 1; // 1 timebase tick per frame.
1490 aom_svc_layer_id_t layer_id;
1491 aom_svc_params_t svc_params;
1492 aom_svc_ref_frame_config_t ref_frame_config;
1493 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1494
1495 #if CONFIG_INTERNAL_STATS
1496 FILE *stats_file = fopen("opsnr.stt", "a");
1497 if (stats_file == NULL) {
1498 die("Cannot open opsnr.stt\n");
1499 }
1500 #endif
1501 #if CONFIG_AV1_DECODER
1502 aom_codec_ctx_t decoder;
1503 #endif
1504
1505 struct RateControlMetrics rc;
1506 int64_t cx_time = 0;
1507 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1508 int frame_cnt_layer[AOM_MAX_LAYERS];
1509 double sum_bitrate = 0.0;
1510 double sum_bitrate2 = 0.0;
1511 double framerate = 30.0;
1512 int use_svc_control = 1;
1513 int set_err_resil_frame = 0;
1514 int test_changing_bitrate = 0;
1515 zero(rc.layer_target_bitrate);
1516 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1517 memset(&app_input, 0, sizeof(AppInput));
1518 memset(&svc_params, 0, sizeof(svc_params));
1519
1520 // Flag to test dynamic scaling of source frames for single
1521 // spatial stream, using the scaling_mode control.
1522 const int test_dynamic_scaling_single_layer = 0;
1523
1524 // Flag to test setting speed per layer.
1525 const int test_speed_per_layer = 0;
1526
1527 // Flag for testing active maps.
1528 const int test_active_maps = 0;
1529
1530 /* Setup default input stream settings */
1531 app_input.input_ctx.framerate.numerator = 30;
1532 app_input.input_ctx.framerate.denominator = 1;
1533 app_input.input_ctx.only_i420 = 0;
1534 app_input.input_ctx.bit_depth = AOM_BITS_8;
1535 app_input.speed = 7;
1536 exec_name = argv[0];
1537
1538 // start with default encoder configuration
1539 aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
1540 AOM_USAGE_REALTIME);
1541 if (res != AOM_CODEC_OK) {
1542 die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1543 }
1544
1545 // Real time parameters.
1546 cfg.g_usage = AOM_USAGE_REALTIME;
1547
1548 cfg.rc_end_usage = AOM_CBR;
1549 cfg.rc_min_quantizer = 2;
1550 cfg.rc_max_quantizer = 52;
1551 cfg.rc_undershoot_pct = 50;
1552 cfg.rc_overshoot_pct = 50;
1553 cfg.rc_buf_initial_sz = 600;
1554 cfg.rc_buf_optimal_sz = 600;
1555 cfg.rc_buf_sz = 1000;
1556 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1557 cfg.g_lag_in_frames = 0;
1558 cfg.kf_mode = AOM_KF_AUTO;
1559
1560 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1561
1562 int ts_number_layers = svc_params.number_temporal_layers;
1563 int ss_number_layers = svc_params.number_spatial_layers;
1564
1565 unsigned int width = cfg.g_w;
1566 unsigned int height = cfg.g_h;
1567
1568 if (app_input.layering_mode >= 0) {
1569 if (ts_number_layers !=
1570 mode_to_num_temporal_layers[app_input.layering_mode] ||
1571 ss_number_layers !=
1572 mode_to_num_spatial_layers[app_input.layering_mode]) {
1573 die("Number of layers doesn't match layering mode.");
1574 }
1575 }
1576
1577 // Y4M reader has its own allocation.
1578 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1579 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1580 die("Failed to allocate image (%dx%d)", width, height);
1581 }
1582 }
1583
1584 aom_codec_iface_t *encoder = aom_codec_av1_cx();
1585
1586 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1587 sizeof(svc_params.layer_target_bitrate));
1588
1589 unsigned int total_rate = 0;
1590 for (i = 0; i < ss_number_layers; i++) {
1591 total_rate +=
1592 svc_params
1593 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1594 }
1595 if (total_rate != cfg.rc_target_bitrate) {
1596 die("Incorrect total target bitrate");
1597 }
1598
1599 svc_params.framerate_factor[0] = 1;
1600 if (ts_number_layers == 2) {
1601 svc_params.framerate_factor[0] = 2;
1602 svc_params.framerate_factor[1] = 1;
1603 } else if (ts_number_layers == 3) {
1604 svc_params.framerate_factor[0] = 4;
1605 svc_params.framerate_factor[1] = 2;
1606 svc_params.framerate_factor[2] = 1;
1607 }
1608
1609 if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1610 // Override these settings with the info from Y4M file.
1611 cfg.g_w = app_input.input_ctx.width;
1612 cfg.g_h = app_input.input_ctx.height;
1613 // g_timebase is the reciprocal of frame rate.
1614 cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1615 cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1616 }
1617 framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1618 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1619
1620 AvxVideoInfo info;
1621 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1622 info.frame_width = cfg.g_w;
1623 info.frame_height = cfg.g_h;
1624 info.time_base.numerator = cfg.g_timebase.num;
1625 info.time_base.denominator = cfg.g_timebase.den;
1626 // Open an output file for each stream.
1627 for (int sl = 0; sl < ss_number_layers; ++sl) {
1628 for (int tl = 0; tl < ts_number_layers; ++tl) {
1629 i = sl * ts_number_layers + tl;
1630 char file_name[PATH_MAX];
1631 snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1632 app_input.output_filename, i);
1633 if (app_input.output_obu) {
1634 obu_files[i] = fopen(file_name, "wb");
1635 if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1636 } else {
1637 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1638 if (!outfile[i]) die("Failed to open %s for writing", file_name);
1639 }
1640 }
1641 }
1642 if (app_input.output_obu) {
1643 total_layer_obu_file = fopen(app_input.output_filename, "wb");
1644 if (!total_layer_obu_file)
1645 die("Failed to open %s for writing", app_input.output_filename);
1646 } else {
1647 total_layer_file =
1648 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1649 if (!total_layer_file)
1650 die("Failed to open %s for writing", app_input.output_filename);
1651 }
1652
1653 // Initialize codec.
1654 aom_codec_ctx_t codec;
1655 aom_codec_flags_t flag = 0;
1656 flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH;
1657 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1658 if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1659 die_codec(&codec, "Failed to initialize encoder");
1660
1661 #if CONFIG_AV1_DECODER
1662 if (app_input.decode) {
1663 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1664 die_codec(&decoder, "Failed to initialize decoder");
1665 }
1666 #endif
1667
1668 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1669 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1670 aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
1671 aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1);
1672 aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1);
1673 aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0);
1674 aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0);
1675 aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
1676 aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0);
1677 aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0);
1678 aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0);
1679 aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3);
1680 aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3);
1681 aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3);
1682 aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3);
1683 aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1);
1684
1685 // Settings to reduce key frame encoding time.
1686 aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0);
1687 aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
1688 aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0);
1689 aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0);
1690 aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
1691
1692 aom_codec_control(&codec, AV1E_SET_AUTO_TILES, 1);
1693
1694 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1695 if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1696 aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
1697 // INTRABC is currently disabled for rt mode, as it's too slow.
1698 aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
1699 }
1700
1701 if (app_input.use_external_rc) {
1702 aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1);
1703 }
1704
1705 aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR, INT_MAX);
1706
1707 aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE,
1708 AOM_FULL_SUPERFRAME_DROP);
1709
1710 aom_codec_control(&codec, AV1E_SET_POSTENCODE_DROP_RTC, 1);
1711
1712 svc_params.number_spatial_layers = ss_number_layers;
1713 svc_params.number_temporal_layers = ts_number_layers;
1714 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1715 svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1716 svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1717 }
1718 for (i = 0; i < ss_number_layers; ++i) {
1719 svc_params.scaling_factor_num[i] = 1;
1720 svc_params.scaling_factor_den[i] = 1;
1721 }
1722 if (ss_number_layers == 2) {
1723 svc_params.scaling_factor_num[0] = 1;
1724 svc_params.scaling_factor_den[0] = 2;
1725 } else if (ss_number_layers == 3) {
1726 svc_params.scaling_factor_num[0] = 1;
1727 svc_params.scaling_factor_den[0] = 4;
1728 svc_params.scaling_factor_num[1] = 1;
1729 svc_params.scaling_factor_den[1] = 2;
1730 }
1731 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1732 // TODO(aomedia:3032): Configure KSVC in fixed mode.
1733
1734 // This controls the maximum target size of the key frame.
1735 // For generating smaller key frames, use a smaller max_intra_size_pct
1736 // value, like 100 or 200.
1737 {
1738 const int max_intra_size_pct = 300;
1739 aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT,
1740 max_intra_size_pct);
1741 }
1742
1743 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1744 cx_time_layer[lx] = 0;
1745 frame_cnt_layer[lx] = 0;
1746 }
1747
1748 std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1749 if (app_input.use_external_rc) {
1750 const aom::AV1RateControlRtcConfig rc_cfg =
1751 create_rtc_rc_config(cfg, app_input);
1752 rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1753 }
1754
1755 frame_avail = 1;
1756 struct psnr_stats psnr_stream;
1757 memset(&psnr_stream, 0, sizeof(psnr_stream));
1758 while (frame_avail || got_data) {
1759 struct aom_usec_timer timer;
1760 frame_avail = read_frame(&(app_input.input_ctx), &raw);
1761 // Loop over spatial layers.
1762 for (int slx = 0; slx < ss_number_layers; slx++) {
1763 aom_codec_iter_t iter = NULL;
1764 const aom_codec_cx_pkt_t *pkt;
1765 int layer = 0;
1766 // Flag for superframe whose base is key.
1767 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1768 // For flexible mode:
1769 if (app_input.layering_mode >= 0) {
1770 // Set the reference/update flags, layer_id, and reference_map
1771 // buffer index.
1772 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1773 &ref_frame_config, &ref_frame_comp_pred,
1774 &use_svc_control, slx, is_key_frame,
1775 (app_input.layering_mode == 10), app_input.speed);
1776 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1777 if (use_svc_control) {
1778 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1779 &ref_frame_config);
1780 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1781 &ref_frame_comp_pred);
1782 }
1783 // Set the speed per layer.
1784 if (test_speed_per_layer) {
1785 int speed_per_layer = 10;
1786 if (layer_id.spatial_layer_id == 0) {
1787 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1788 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1789 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1790 } else if (layer_id.spatial_layer_id == 1) {
1791 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1792 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1793 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1794 } else if (layer_id.spatial_layer_id == 2) {
1795 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1796 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1797 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1798 }
1799 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1800 }
1801 } else {
1802 // Only up to 3 temporal layers supported in fixed mode.
1803 // Only need to set spatial and temporal layer_id: reference
1804 // prediction, refresh, and buffer_idx are set internally.
1805 layer_id.spatial_layer_id = slx;
1806 layer_id.temporal_layer_id = 0;
1807 if (ts_number_layers == 2) {
1808 layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1809 } else if (ts_number_layers == 3) {
1810 if (frame_cnt % 2 != 0)
1811 layer_id.temporal_layer_id = 2;
1812 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1813 layer_id.temporal_layer_id = 1;
1814 }
1815 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1816 }
1817
1818 if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1819 // Set error_resilient per frame: off/0 for base layer and
1820 // on/1 for enhancement layer frames.
1821 // Note that this is can only be done on the fly/per-frame/layer
1822 // if the config error_resilience is off/0. See the logic for updating
1823 // in set_encoder_config():
1824 // tool_cfg->error_resilient_mode =
1825 // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1826 const int err_resil_mode =
1827 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1828 aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE,
1829 err_resil_mode);
1830 }
1831
1832 layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1833 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1834
1835 if (test_dynamic_scaling_single_layer) {
1836 // Example to scale source down by 2x2, then 4x4, and then back up to
1837 // 2x2, and then back to original.
1838 int frame_2x2 = 200;
1839 int frame_4x4 = 400;
1840 int frame_2x2up = 600;
1841 int frame_orig = 800;
1842 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1843 // Scale source down by 2x2.
1844 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1845 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1846 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1847 // Scale source down by 4x4.
1848 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1849 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1850 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1851 // Source back up to 2x2.
1852 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1853 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1854 } else if (frame_cnt >= frame_orig) {
1855 // Source back up to original resolution (no scaling).
1856 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1857 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1858 }
1859 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1860 frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1861 // For dynamic resize testing on single layer: refresh all references
1862 // on the resized frame: this is to avoid decode error:
1863 // if resize goes down by >= 4x4 then libaom decoder will throw an
1864 // error that some reference (even though not used) is beyond the
1865 // limit size (must be smaller than 4x4).
1866 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1867 if (use_svc_control) {
1868 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1869 &ref_frame_config);
1870 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1871 &ref_frame_comp_pred);
1872 }
1873 }
1874 }
1875
1876 // Change target_bitrate every other frame.
1877 if (test_changing_bitrate && frame_cnt % 2 == 0) {
1878 if (frame_cnt < 500)
1879 cfg.rc_target_bitrate += 10;
1880 else
1881 cfg.rc_target_bitrate -= 10;
1882 // Do big increase and decrease.
1883 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1884 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1885 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1886 // Call change_config, or bypass with new control.
1887 // res = aom_codec_enc_config_set(&codec, &cfg);
1888 if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR,
1889 cfg.rc_target_bitrate))
1890 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1891 }
1892
1893 if (rc_api) {
1894 aom::AV1FrameParamsRTC frame_params;
1895 // TODO(jianj): Add support for SVC.
1896 frame_params.spatial_layer_id = 0;
1897 frame_params.temporal_layer_id = 0;
1898 frame_params.frame_type =
1899 is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1900 rc_api->ComputeQP(frame_params);
1901 const int current_qp = rc_api->GetQP();
1902 if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS,
1903 qindex_to_quantizer(current_qp))) {
1904 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1905 }
1906 }
1907
1908 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
1909
1910 // Do the layer encode.
1911 aom_usec_timer_start(&timer);
1912 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1913 die_codec(&codec, "Failed to encode frame");
1914 aom_usec_timer_mark(&timer);
1915 cx_time += aom_usec_timer_elapsed(&timer);
1916 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1917 frame_cnt_layer[layer] += 1;
1918
1919 // Get the high motion content flag.
1920 int content_flag = 0;
1921 if (aom_codec_control(&codec, AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC,
1922 &content_flag)) {
1923 die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
1924 }
1925
1926 got_data = 0;
1927 // For simulcast (mode 11): write out each spatial layer to the file.
1928 int ss_layers_write = (app_input.layering_mode == 11)
1929 ? layer_id.spatial_layer_id + 1
1930 : ss_number_layers;
1931 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1932 switch (pkt->kind) {
1933 case AOM_CODEC_CX_FRAME_PKT:
1934 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1935 ++sl) {
1936 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1937 ++tl) {
1938 int j = sl * ts_number_layers + tl;
1939 if (app_input.output_obu) {
1940 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1941 obu_files[j]);
1942 } else {
1943 aom_video_writer_write_frame(
1944 outfile[j],
1945 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1946 pkt->data.frame.sz, pts);
1947 }
1948 if (sl == layer_id.spatial_layer_id)
1949 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1950 }
1951 }
1952 got_data = 1;
1953 // Write everything into the top layer.
1954 if (app_input.output_obu) {
1955 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1956 total_layer_obu_file);
1957 } else {
1958 aom_video_writer_write_frame(
1959 total_layer_file,
1960 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1961 pkt->data.frame.sz, pts);
1962 }
1963 // Keep count of rate control stats per layer (for non-key).
1964 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1965 int j = layer_id.spatial_layer_id * ts_number_layers +
1966 layer_id.temporal_layer_id;
1967 assert(j >= 0);
1968 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1969 rc.layer_avg_rate_mismatch[j] +=
1970 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1971 rc.layer_pfb[j];
1972 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1973 }
1974
1975 if (rc_api) {
1976 rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1977 }
1978 // Update for short-time encoding bitrate states, for moving window
1979 // of size rc->window, shifted by rc->window / 2.
1980 // Ignore first window segment, due to key frame.
1981 // For spatial layers: only do this for top/highest SL.
1982 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1983 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1984 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1985 if (frame_cnt % rc.window_size == 0) {
1986 rc.window_count += 1;
1987 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1988 rc.variance_st_encoding_bitrate +=
1989 (sum_bitrate / rc.window_size) *
1990 (sum_bitrate / rc.window_size);
1991 sum_bitrate = 0.0;
1992 }
1993 }
1994 // Second shifted window.
1995 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1996 slx == ss_number_layers - 1) {
1997 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1998 if (frame_cnt > 2 * rc.window_size &&
1999 frame_cnt % rc.window_size == 0) {
2000 rc.window_count += 1;
2001 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2002 rc.variance_st_encoding_bitrate +=
2003 (sum_bitrate2 / rc.window_size) *
2004 (sum_bitrate2 / rc.window_size);
2005 sum_bitrate2 = 0.0;
2006 }
2007 }
2008
2009 #if CONFIG_AV1_DECODER
2010 if (app_input.decode) {
2011 if (aom_codec_decode(
2012 &decoder,
2013 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2014 pkt->data.frame.sz, NULL))
2015 die_codec(&decoder, "Failed to decode frame");
2016 }
2017 #endif
2018
2019 break;
2020 case AOM_CODEC_PSNR_PKT:
2021 if (app_input.show_psnr) {
2022 psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2023 psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2024 for (int plane = 0; plane < 4; plane++) {
2025 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2026 }
2027 psnr_stream.psnr_count[0]++;
2028 }
2029 break;
2030 default: break;
2031 }
2032 }
2033 #if CONFIG_AV1_DECODER
2034 if (got_data && app_input.decode) {
2035 // Don't look for mismatch on top spatial and top temporal layers as
2036 // they are non reference frames.
2037 if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2038 !(layer_id.temporal_layer_id > 0 &&
2039 layer_id.temporal_layer_id == ts_number_layers - 1)) {
2040 if (test_decode(&codec, &decoder, frame_cnt)) {
2041 #if CONFIG_INTERNAL_STATS
2042 fprintf(stats_file, "First mismatch occurred in frame %d\n",
2043 frame_cnt);
2044 fclose(stats_file);
2045 #endif
2046 fatal("Mismatch seen");
2047 }
2048 }
2049 }
2050 #endif
2051 } // loop over spatial layers
2052 ++frame_cnt;
2053 pts += frame_duration;
2054 }
2055
2056 close_input_file(&(app_input.input_ctx));
2057 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2058 ts_number_layers);
2059
2060 printf("\n");
2061 for (int slx = 0; slx < ss_number_layers; slx++)
2062 for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2063 int lx = slx * ts_number_layers + tlx;
2064 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2065 slx, tlx, frame_cnt_layer[lx],
2066 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2067 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2068 }
2069
2070 printf("\n");
2071 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2072 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2073 1000000 * (double)frame_cnt / (double)cx_time);
2074
2075 if (app_input.show_psnr) {
2076 show_psnr(&psnr_stream, 255.0);
2077 }
2078
2079 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2080
2081 #if CONFIG_AV1_DECODER
2082 if (app_input.decode) {
2083 if (aom_codec_destroy(&decoder))
2084 die_codec(&decoder, "Failed to destroy decoder");
2085 }
2086 #endif
2087
2088 #if CONFIG_INTERNAL_STATS
2089 fprintf(stats_file, "No mismatch detected in recon buffers\n");
2090 fclose(stats_file);
2091 #endif
2092
2093 // Try to rewrite the output file headers with the actual frame count.
2094 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2095 aom_video_writer_close(outfile[i]);
2096 aom_video_writer_close(total_layer_file);
2097
2098 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2099 aom_img_free(&raw);
2100 }
2101 return EXIT_SUCCESS;
2102 }
2103