1 /*
2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13
14 #include "config/aom_config.h"
15
16 #include "aom_util/aom_pthread.h"
17
18 #if CONFIG_TFLITE
19 #include "tensorflow/lite/c/c_api.h"
20 #include "av1/encoder/deltaq4_model.c"
21 #endif
22
23 #include "av1/common/common_data.h"
24 #include "av1/common/enums.h"
25 #include "av1/common/idct.h"
26 #include "av1/common/reconinter.h"
27 #include "av1/encoder/allintra_vis.h"
28 #include "av1/encoder/encoder.h"
29 #include "av1/encoder/ethread.h"
30 #include "av1/encoder/hybrid_fwd_txfm.h"
31 #include "av1/encoder/model_rd.h"
32 #include "av1/encoder/rdopt_utils.h"
33
34 #define MB_WIENER_PRED_BLOCK_SIZE BLOCK_128X128
35 #define MB_WIENER_PRED_BUF_STRIDE 128
36
av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON * cm,ThreadData * td)37 void av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON *cm, ThreadData *td) {
38 const int is_high_bitdepth = is_cur_buf_hbd(&td->mb.e_mbd);
39 assert(MB_WIENER_PRED_BLOCK_SIZE < BLOCK_SIZES_ALL);
40 const int buf_width = block_size_wide[MB_WIENER_PRED_BLOCK_SIZE];
41 const int buf_height = block_size_high[MB_WIENER_PRED_BLOCK_SIZE];
42 assert(buf_width == MB_WIENER_PRED_BUF_STRIDE);
43 const size_t buf_size =
44 (buf_width * buf_height * sizeof(*td->wiener_tmp_pred_buf))
45 << is_high_bitdepth;
46 CHECK_MEM_ERROR(cm, td->wiener_tmp_pred_buf, aom_memalign(32, buf_size));
47 }
48
av1_dealloc_mb_wiener_var_pred_buf(ThreadData * td)49 void av1_dealloc_mb_wiener_var_pred_buf(ThreadData *td) {
50 aom_free(td->wiener_tmp_pred_buf);
51 td->wiener_tmp_pred_buf = NULL;
52 }
53
av1_init_mb_wiener_var_buffer(AV1_COMP * cpi)54 void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) {
55 AV1_COMMON *cm = &cpi->common;
56
57 // This block size is also used to determine number of workers in
58 // multi-threading. If it is changed, one needs to change it accordingly in
59 // "compute_num_ai_workers()".
60 cpi->weber_bsize = BLOCK_8X8;
61
62 if (cpi->oxcf.enable_rate_guide_deltaq) {
63 if (cpi->mb_weber_stats && cpi->prep_rate_estimates &&
64 cpi->ext_rate_distribution)
65 return;
66 } else {
67 if (cpi->mb_weber_stats) return;
68 }
69
70 CHECK_MEM_ERROR(cm, cpi->mb_weber_stats,
71 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
72 sizeof(*cpi->mb_weber_stats)));
73
74 if (cpi->oxcf.enable_rate_guide_deltaq) {
75 CHECK_MEM_ERROR(
76 cm, cpi->prep_rate_estimates,
77 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
78 sizeof(*cpi->prep_rate_estimates)));
79
80 CHECK_MEM_ERROR(
81 cm, cpi->ext_rate_distribution,
82 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
83 sizeof(*cpi->ext_rate_distribution)));
84 }
85 }
86
get_satd(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)87 static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
88 int mi_col) {
89 AV1_COMMON *const cm = &cpi->common;
90 const int mi_wide = mi_size_wide[bsize];
91 const int mi_high = mi_size_high[bsize];
92
93 const int mi_step = mi_size_wide[cpi->weber_bsize];
94 int mb_stride = cpi->frame_info.mi_cols;
95 int mb_count = 0;
96 int64_t satd = 0;
97
98 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
99 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
100 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
101 continue;
102
103 satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
104 .satd;
105 ++mb_count;
106 }
107 }
108
109 if (mb_count) satd = (int)(satd / mb_count);
110 satd = AOMMAX(1, satd);
111
112 return (int)satd;
113 }
114
get_sse(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)115 static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
116 int mi_col) {
117 AV1_COMMON *const cm = &cpi->common;
118 const int mi_wide = mi_size_wide[bsize];
119 const int mi_high = mi_size_high[bsize];
120
121 const int mi_step = mi_size_wide[cpi->weber_bsize];
122 int mb_stride = cpi->frame_info.mi_cols;
123 int mb_count = 0;
124 int64_t distortion = 0;
125
126 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
127 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
128 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
129 continue;
130
131 distortion +=
132 cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
133 .distortion;
134 ++mb_count;
135 }
136 }
137
138 if (mb_count) distortion = (int)(distortion / mb_count);
139 distortion = AOMMAX(1, distortion);
140
141 return (int)distortion;
142 }
143
get_max_scale(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)144 static double get_max_scale(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
145 int mi_col) {
146 AV1_COMMON *const cm = &cpi->common;
147 const int mi_wide = mi_size_wide[bsize];
148 const int mi_high = mi_size_high[bsize];
149 const int mi_step = mi_size_wide[cpi->weber_bsize];
150 int mb_stride = cpi->frame_info.mi_cols;
151 double min_max_scale = 10.0;
152
153 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
154 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
155 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
156 continue;
157 WeberStats *weber_stats =
158 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
159 if (weber_stats->max_scale < 1.0) continue;
160 if (weber_stats->max_scale < min_max_scale)
161 min_max_scale = weber_stats->max_scale;
162 }
163 }
164 return min_max_scale;
165 }
166
get_window_wiener_var(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)167 static int get_window_wiener_var(AV1_COMP *const cpi, BLOCK_SIZE bsize,
168 int mi_row, int mi_col) {
169 AV1_COMMON *const cm = &cpi->common;
170 const int mi_wide = mi_size_wide[bsize];
171 const int mi_high = mi_size_high[bsize];
172
173 const int mi_step = mi_size_wide[cpi->weber_bsize];
174 int sb_wiener_var = 0;
175 int mb_stride = cpi->frame_info.mi_cols;
176 int mb_count = 0;
177 double base_num = 1;
178 double base_den = 1;
179 double base_reg = 1;
180
181 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
182 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
183 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
184 continue;
185
186 WeberStats *weber_stats =
187 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
188
189 base_num += ((double)weber_stats->distortion) *
190 sqrt((double)weber_stats->src_variance) *
191 weber_stats->rec_pix_max;
192
193 base_den += fabs(
194 weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) -
195 weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance));
196
197 base_reg += sqrt((double)weber_stats->distortion) *
198 sqrt((double)weber_stats->src_pix_max) * 0.1;
199 ++mb_count;
200 }
201 }
202
203 sb_wiener_var =
204 (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count);
205 sb_wiener_var = AOMMAX(1, sb_wiener_var);
206
207 return (int)sb_wiener_var;
208 }
209
get_var_perceptual_ai(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)210 static int get_var_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize,
211 int mi_row, int mi_col) {
212 AV1_COMMON *const cm = &cpi->common;
213 const int mi_wide = mi_size_wide[bsize];
214 const int mi_high = mi_size_high[bsize];
215
216 int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col);
217
218 if (mi_row >= (mi_high / 2)) {
219 sb_wiener_var =
220 AOMMIN(sb_wiener_var,
221 get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col));
222 }
223 if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) {
224 sb_wiener_var =
225 AOMMIN(sb_wiener_var,
226 get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col));
227 }
228 if (mi_col >= (mi_wide / 2)) {
229 sb_wiener_var =
230 AOMMIN(sb_wiener_var,
231 get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2));
232 }
233 if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) {
234 sb_wiener_var =
235 AOMMIN(sb_wiener_var,
236 get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2));
237 }
238
239 return sb_wiener_var;
240 }
241
rate_estimator(const tran_low_t * qcoeff,int eob,TX_SIZE tx_size)242 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
243 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
244
245 assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
246 int rate_cost = 1;
247
248 for (int idx = 0; idx < eob; ++idx) {
249 int abs_level = abs(qcoeff[scan_order->scan[idx]]);
250 rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1 + (abs_level > 0);
251 }
252
253 return (rate_cost << AV1_PROB_COST_SHIFT);
254 }
255
av1_calc_mb_wiener_var_row(AV1_COMP * const cpi,MACROBLOCK * x,MACROBLOCKD * xd,const int mi_row,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,double * sum_rec_distortion,double * sum_est_rate,uint8_t * pred_buffer)256 void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
257 MACROBLOCKD *xd, const int mi_row,
258 int16_t *src_diff, tran_low_t *coeff,
259 tran_low_t *qcoeff, tran_low_t *dqcoeff,
260 double *sum_rec_distortion,
261 double *sum_est_rate, uint8_t *pred_buffer) {
262 AV1_COMMON *const cm = &cpi->common;
263 uint8_t *buffer = cpi->source->y_buffer;
264 int buf_stride = cpi->source->y_stride;
265 MB_MODE_INFO mbmi;
266 memset(&mbmi, 0, sizeof(mbmi));
267 MB_MODE_INFO *mbmi_ptr = &mbmi;
268 xd->mi = &mbmi_ptr;
269 const BLOCK_SIZE bsize = cpi->weber_bsize;
270 const TX_SIZE tx_size = max_txsize_lookup[bsize];
271 const int block_size = tx_size_wide[tx_size];
272 const int coeff_count = block_size * block_size;
273 const int mb_step = mi_size_wide[bsize];
274 const BitDepthInfo bd_info = get_bit_depth_info(xd);
275 const MultiThreadInfo *const mt_info = &cpi->mt_info;
276 const AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
277 AV1EncRowMultiThreadSync *const intra_row_mt_sync =
278 &cpi->ppi->intra_row_mt_sync;
279 const int mi_cols = cm->mi_params.mi_cols;
280 const int mt_thread_id = mi_row / mb_step;
281 // TODO(chengchen): test different unit step size
282 const int mt_unit_step = mi_size_wide[MB_WIENER_MT_UNIT_SIZE];
283 const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step;
284 int mt_unit_col = 0;
285 const int is_high_bitdepth = is_cur_buf_hbd(xd);
286
287 uint8_t *dst_buffer = pred_buffer;
288 const int dst_buffer_stride = MB_WIENER_PRED_BUF_STRIDE;
289
290 if (is_high_bitdepth) {
291 uint16_t *pred_buffer_16 = (uint16_t *)pred_buffer;
292 dst_buffer = CONVERT_TO_BYTEPTR(pred_buffer_16);
293 }
294
295 for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) {
296 if (mi_col % mt_unit_step == 0) {
297 intra_mt->intra_sync_read_ptr(intra_row_mt_sync, mt_thread_id,
298 mt_unit_col);
299 #if CONFIG_MULTITHREAD
300 const int num_workers =
301 AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
302 if (num_workers > 1) {
303 const AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
304 pthread_mutex_lock(enc_row_mt->mutex_);
305 const bool exit = enc_row_mt->mb_wiener_mt_exit;
306 pthread_mutex_unlock(enc_row_mt->mutex_);
307 // Stop further processing in case any worker has encountered an error.
308 if (exit) break;
309 }
310 #endif
311 }
312
313 PREDICTION_MODE best_mode = DC_PRED;
314 int best_intra_cost = INT_MAX;
315 const int mi_width = mi_size_wide[bsize];
316 const int mi_height = mi_size_high[bsize];
317 set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
318 mi_row, mi_col);
319 set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width,
320 AOMMIN(mi_row + mi_height, cm->mi_params.mi_rows),
321 AOMMIN(mi_col + mi_width, cm->mi_params.mi_cols));
322 set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize],
323 av1_num_planes(cm));
324 xd->mi[0]->bsize = bsize;
325 xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
326 // Set above and left mbmi to NULL as they are not available in the
327 // preprocessing stage.
328 // They are used to detemine intra edge filter types in intra prediction.
329 if (xd->up_available) {
330 xd->above_mbmi = NULL;
331 }
332 if (xd->left_available) {
333 xd->left_mbmi = NULL;
334 }
335 uint8_t *mb_buffer =
336 buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
337 for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END;
338 ++mode) {
339 // TODO(chengchen): Here we use src instead of reconstructed frame as
340 // the intra predictor to make single and multithread version match.
341 // Ideally we want to use the reconstructed.
342 av1_predict_intra_block(
343 xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
344 block_size, block_size, tx_size, mode, 0, 0, FILTER_INTRA_MODES,
345 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
346 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
347 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
348 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
349 int intra_cost = aom_satd(coeff, coeff_count);
350 if (intra_cost < best_intra_cost) {
351 best_intra_cost = intra_cost;
352 best_mode = mode;
353 }
354 }
355
356 av1_predict_intra_block(
357 xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
358 block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES,
359 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
360 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
361 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
362 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
363
364 const struct macroblock_plane *const p = &x->plane[0];
365 uint16_t eob;
366 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
367 QUANT_PARAM quant_param;
368 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
369 av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param);
370 #if CONFIG_AV1_HIGHBITDEPTH
371 if (is_cur_buf_hbd(xd)) {
372 av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
373 scan_order, &quant_param);
374 } else {
375 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
376 scan_order, &quant_param);
377 }
378 #else
379 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order,
380 &quant_param);
381 #endif // CONFIG_AV1_HIGHBITDEPTH
382
383 if (cpi->oxcf.enable_rate_guide_deltaq) {
384 const int rate_cost = rate_estimator(qcoeff, eob, tx_size);
385 cpi->prep_rate_estimates[(mi_row / mb_step) * cpi->frame_info.mi_cols +
386 (mi_col / mb_step)] = rate_cost;
387 }
388
389 av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
390 dst_buffer_stride, eob, 0);
391 WeberStats *weber_stats =
392 &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols +
393 (mi_col / mb_step)];
394
395 weber_stats->rec_pix_max = 1;
396 weber_stats->rec_variance = 0;
397 weber_stats->src_pix_max = 1;
398 weber_stats->src_variance = 0;
399 weber_stats->distortion = 0;
400
401 int64_t src_mean = 0;
402 int64_t rec_mean = 0;
403 int64_t dist_mean = 0;
404
405 for (int pix_row = 0; pix_row < block_size; ++pix_row) {
406 for (int pix_col = 0; pix_col < block_size; ++pix_col) {
407 int src_pix, rec_pix;
408 #if CONFIG_AV1_HIGHBITDEPTH
409 if (is_cur_buf_hbd(xd)) {
410 uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer);
411 uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer);
412 src_pix = src[pix_row * buf_stride + pix_col];
413 rec_pix = rec[pix_row * dst_buffer_stride + pix_col];
414 } else {
415 src_pix = mb_buffer[pix_row * buf_stride + pix_col];
416 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
417 }
418 #else
419 src_pix = mb_buffer[pix_row * buf_stride + pix_col];
420 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
421 #endif
422 src_mean += src_pix;
423 rec_mean += rec_pix;
424 dist_mean += src_pix - rec_pix;
425 weber_stats->src_variance += src_pix * src_pix;
426 weber_stats->rec_variance += rec_pix * rec_pix;
427 weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix);
428 weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix);
429 weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix);
430 }
431 }
432
433 if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
434 *sum_rec_distortion += weber_stats->distortion;
435 int est_block_rate = 0;
436 int64_t est_block_dist = 0;
437 model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
438 pix_num, &est_block_rate,
439 &est_block_dist);
440 *sum_est_rate += est_block_rate;
441 }
442
443 weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
444 weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
445 weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
446 weber_stats->satd = best_intra_cost;
447
448 qcoeff[0] = 0;
449 int max_scale = 0;
450 for (int idx = 1; idx < coeff_count; ++idx) {
451 const int abs_qcoeff = abs(qcoeff[idx]);
452 max_scale = AOMMAX(max_scale, abs_qcoeff);
453 }
454 weber_stats->max_scale = max_scale;
455
456 if ((mi_col + mb_step) % mt_unit_step == 0 ||
457 (mi_col + mb_step) >= mi_cols) {
458 intra_mt->intra_sync_write_ptr(intra_row_mt_sync, mt_thread_id,
459 mt_unit_col, mt_unit_cols);
460 ++mt_unit_col;
461 }
462 }
463 // Set the pointer to null since mbmi is only allocated inside this function.
464 xd->mi = NULL;
465 }
466
calc_mb_wiener_var(AV1_COMP * const cpi,double * sum_rec_distortion,double * sum_est_rate)467 static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion,
468 double *sum_est_rate) {
469 MACROBLOCK *x = &cpi->td.mb;
470 MACROBLOCKD *xd = &x->e_mbd;
471 const BLOCK_SIZE bsize = cpi->weber_bsize;
472 const int mb_step = mi_size_wide[bsize];
473 DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
474 DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]);
475 DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
476 DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
477 for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
478 av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff,
479 dqcoeff, sum_rec_distortion, sum_est_rate,
480 cpi->td.wiener_tmp_pred_buf);
481 }
482 }
483
estimate_wiener_var_norm(AV1_COMP * const cpi,const BLOCK_SIZE norm_block_size)484 static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi,
485 const BLOCK_SIZE norm_block_size) {
486 const AV1_COMMON *const cm = &cpi->common;
487 int64_t norm_factor = 1;
488 assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128);
489 const int norm_step = mi_size_wide[norm_block_size];
490 double sb_wiener_log = 0;
491 double sb_count = 0;
492 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
493 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
494 const int sb_wiener_var =
495 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
496 const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
497 const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
498 const double scaled_satd = (double)satd / sqrt((double)sse);
499 sb_wiener_log += scaled_satd * log(sb_wiener_var);
500 sb_count += scaled_satd;
501 }
502 }
503 if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count));
504 norm_factor = AOMMAX(1, norm_factor);
505
506 return norm_factor;
507 }
508
automatic_intra_tools_off(AV1_COMP * cpi,const double sum_rec_distortion,const double sum_est_rate)509 static void automatic_intra_tools_off(AV1_COMP *cpi,
510 const double sum_rec_distortion,
511 const double sum_est_rate) {
512 if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
513
514 // Thresholds
515 const int high_quality_qindex = 128;
516 const double high_quality_bpp = 2.0;
517 const double high_quality_dist_per_pix = 4.0;
518
519 AV1_COMMON *const cm = &cpi->common;
520 const int qindex = cm->quant_params.base_qindex;
521 const double dist_per_pix =
522 (double)sum_rec_distortion / (cm->width * cm->height);
523 // The estimate bpp is not accurate, an empirical constant 100 is divided.
524 const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
525
526 if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
527 dist_per_pix < high_quality_dist_per_pix) {
528 cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
529 cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
530 cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
531 cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
532 }
533 }
534
ext_rate_guided_quantization(AV1_COMP * cpi)535 static void ext_rate_guided_quantization(AV1_COMP *cpi) {
536 // Calculation uses 8x8.
537 const int mb_step = mi_size_wide[cpi->weber_bsize];
538 // Accumulate to 16x16, step size is in the unit of mi.
539 const int block_step = 4;
540
541 const char *filename = cpi->oxcf.rate_distribution_info;
542 FILE *pfile = fopen(filename, "r");
543 if (pfile == NULL) {
544 assert(pfile != NULL);
545 return;
546 }
547
548 double ext_rate_sum = 0.0;
549 for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
550 for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
551 float val;
552 const int fields_converted = fscanf(pfile, "%f", &val);
553 if (fields_converted != 1) {
554 assert(fields_converted == 1);
555 fclose(pfile);
556 return;
557 }
558 ext_rate_sum += val;
559 cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
560 (col / mb_step)] = val;
561 }
562 }
563 fclose(pfile);
564
565 int uniform_rate_sum = 0;
566 for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
567 for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
568 int rate_sum = 0;
569 for (int r = 0; r < block_step; r += mb_step) {
570 for (int c = 0; c < block_step; c += mb_step) {
571 const int mi_row = row + r;
572 const int mi_col = col + c;
573 rate_sum += cpi->prep_rate_estimates[(mi_row / mb_step) *
574 cpi->frame_info.mi_cols +
575 (mi_col / mb_step)];
576 }
577 }
578 uniform_rate_sum += rate_sum;
579 }
580 }
581
582 const double scale = uniform_rate_sum / ext_rate_sum;
583 cpi->ext_rate_scale = scale;
584 }
585
av1_set_mb_wiener_variance(AV1_COMP * cpi)586 void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
587 AV1_COMMON *const cm = &cpi->common;
588 const SequenceHeader *const seq_params = cm->seq_params;
589 if (aom_realloc_frame_buffer(
590 &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
591 seq_params->subsampling_y, seq_params->use_highbitdepth,
592 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
593 NULL, cpi->alloc_pyramid, 0))
594 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
595 "Failed to allocate frame buffer");
596 av1_alloc_mb_wiener_var_pred_buf(&cpi->common, &cpi->td);
597 cpi->norm_wiener_variance = 0;
598
599 MACROBLOCK *x = &cpi->td.mb;
600 MACROBLOCKD *xd = &x->e_mbd;
601 // xd->mi needs to be setup since it is used in av1_frame_init_quantizer.
602 MB_MODE_INFO mbmi;
603 memset(&mbmi, 0, sizeof(mbmi));
604 MB_MODE_INFO *mbmi_ptr = &mbmi;
605 xd->mi = &mbmi_ptr;
606 cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
607 av1_frame_init_quantizer(cpi);
608
609 double sum_rec_distortion = 0.0;
610 double sum_est_rate = 0.0;
611
612 MultiThreadInfo *const mt_info = &cpi->mt_info;
613 const int num_workers =
614 AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
615 AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
616 intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read_dummy;
617 intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write_dummy;
618 // Calculate differential contrast for each block for the entire image.
619 // TODO(chengchen): properly accumulate the distortion and rate in
620 // av1_calc_mb_wiener_var_mt(). Until then, call calc_mb_wiener_var() if
621 // auto_intra_tools_off is true.
622 if (num_workers > 1 && !cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
623 intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read;
624 intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write;
625 av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion,
626 &sum_est_rate);
627 } else {
628 calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate);
629 }
630
631 // Determine whether to turn off several intra coding tools.
632 automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
633
634 // Read external rate distribution and use it to guide delta quantization
635 if (cpi->oxcf.enable_rate_guide_deltaq) ext_rate_guided_quantization(cpi);
636
637 const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size;
638 cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size);
639 const int norm_step = mi_size_wide[norm_block_size];
640
641 double sb_wiener_log = 0;
642 double sb_count = 0;
643 for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
644 sb_wiener_log = 0;
645 sb_count = 0;
646 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
647 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols;
648 mi_col += norm_step) {
649 int sb_wiener_var =
650 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
651
652 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
653 double min_max_scale = AOMMAX(
654 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
655
656 beta = AOMMIN(beta, 4);
657 beta = AOMMAX(beta, 0.25);
658
659 if (beta < 1 / min_max_scale) continue;
660
661 sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);
662
663 int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
664 int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
665 double scaled_satd = (double)satd / sqrt((double)sse);
666 sb_wiener_log += scaled_satd * log(sb_wiener_var);
667 sb_count += scaled_satd;
668 }
669 }
670
671 if (sb_count > 0)
672 cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count));
673 cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
674 }
675
676 // Set the pointer to null since mbmi is only allocated inside this function.
677 xd->mi = NULL;
678 aom_free_frame_buffer(&cm->cur_frame->buf);
679 av1_dealloc_mb_wiener_var_pred_buf(&cpi->td);
680 }
681
get_rate_guided_quantizer(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)682 static int get_rate_guided_quantizer(AV1_COMP *const cpi, BLOCK_SIZE bsize,
683 int mi_row, int mi_col) {
684 // Calculation uses 8x8.
685 const int mb_step = mi_size_wide[cpi->weber_bsize];
686 // Accumulate to 16x16
687 const int block_step = mi_size_wide[BLOCK_16X16];
688 double sb_rate_hific = 0.0;
689 double sb_rate_uniform = 0.0;
690 for (int row = mi_row; row < mi_row + mi_size_wide[bsize];
691 row += block_step) {
692 for (int col = mi_col; col < mi_col + mi_size_high[bsize];
693 col += block_step) {
694 sb_rate_hific +=
695 cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
696 (col / mb_step)];
697
698 for (int r = 0; r < block_step; r += mb_step) {
699 for (int c = 0; c < block_step; c += mb_step) {
700 const int this_row = row + r;
701 const int this_col = col + c;
702 sb_rate_uniform +=
703 cpi->prep_rate_estimates[(this_row / mb_step) *
704 cpi->frame_info.mi_cols +
705 (this_col / mb_step)];
706 }
707 }
708 }
709 }
710 sb_rate_hific *= cpi->ext_rate_scale;
711
712 const double weight = 1.0;
713 const double rate_diff =
714 weight * (sb_rate_hific - sb_rate_uniform) / sb_rate_uniform;
715 double scale = pow(2, rate_diff);
716
717 scale = scale * scale;
718 double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
719 scale = 1.0 / AOMMIN(1.0 / scale, min_max_scale);
720
721 AV1_COMMON *const cm = &cpi->common;
722 const int base_qindex = cm->quant_params.base_qindex;
723 int offset =
724 av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, scale);
725 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
726 const int max_offset = delta_q_info->delta_q_res * 10;
727 offset = AOMMIN(offset, max_offset - 1);
728 offset = AOMMAX(offset, -max_offset + 1);
729 int qindex = cm->quant_params.base_qindex + offset;
730 qindex = AOMMIN(qindex, MAXQ);
731 qindex = AOMMAX(qindex, MINQ);
732 if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
733
734 return qindex;
735 }
736
av1_get_sbq_perceptual_ai(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)737 int av1_get_sbq_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
738 int mi_col) {
739 if (cpi->oxcf.enable_rate_guide_deltaq) {
740 return get_rate_guided_quantizer(cpi, bsize, mi_row, mi_col);
741 }
742
743 AV1_COMMON *const cm = &cpi->common;
744 const int base_qindex = cm->quant_params.base_qindex;
745 int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col);
746 int offset = 0;
747 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
748 double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
749 beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
750
751 // Cap beta such that the delta q value is not much far away from the base q.
752 beta = AOMMIN(beta, 4);
753 beta = AOMMAX(beta, 0.25);
754 offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
755 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
756 offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
757 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1);
758 int qindex = cm->quant_params.base_qindex + offset;
759 qindex = AOMMIN(qindex, MAXQ);
760 qindex = AOMMAX(qindex, MINQ);
761 if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
762
763 return qindex;
764 }
765
av1_init_mb_ur_var_buffer(AV1_COMP * cpi)766 void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) {
767 AV1_COMMON *cm = &cpi->common;
768
769 if (cpi->mb_delta_q) return;
770
771 CHECK_MEM_ERROR(cm, cpi->mb_delta_q,
772 aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols,
773 sizeof(*cpi->mb_delta_q)));
774 }
775
776 #if CONFIG_TFLITE
model_predict(BLOCK_SIZE block_size,int num_cols,int num_rows,int bit_depth,uint8_t * y_buffer,int y_stride,float * predicts0,float * predicts1)777 static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows,
778 int bit_depth, uint8_t *y_buffer, int y_stride,
779 float *predicts0, float *predicts1) {
780 // Create the model and interpreter options.
781 TfLiteModel *model =
782 TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize);
783 if (model == NULL) return 1;
784
785 TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
786 TfLiteInterpreterOptionsSetNumThreads(options, 2);
787 if (options == NULL) {
788 TfLiteModelDelete(model);
789 return 1;
790 }
791
792 // Create the interpreter.
793 TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
794 if (interpreter == NULL) {
795 TfLiteInterpreterOptionsDelete(options);
796 TfLiteModelDelete(model);
797 return 1;
798 }
799
800 // Allocate tensors and populate the input tensor data.
801 TfLiteInterpreterAllocateTensors(interpreter);
802 TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
803 if (input_tensor == NULL) {
804 TfLiteInterpreterDelete(interpreter);
805 TfLiteInterpreterOptionsDelete(options);
806 TfLiteModelDelete(model);
807 return 1;
808 }
809
810 size_t input_size = TfLiteTensorByteSize(input_tensor);
811 float *input_data = aom_calloc(input_size, 1);
812 if (input_data == NULL) {
813 TfLiteInterpreterDelete(interpreter);
814 TfLiteInterpreterOptionsDelete(options);
815 TfLiteModelDelete(model);
816 return 1;
817 }
818
819 const int num_mi_w = mi_size_wide[block_size];
820 const int num_mi_h = mi_size_high[block_size];
821 for (int row = 0; row < num_rows; ++row) {
822 for (int col = 0; col < num_cols; ++col) {
823 const int row_offset = (row * num_mi_h) << 2;
824 const int col_offset = (col * num_mi_w) << 2;
825
826 uint8_t *buf = y_buffer + row_offset * y_stride + col_offset;
827 int r = row_offset, pos = 0;
828 const float base = (float)((1 << bit_depth) - 1);
829 while (r < row_offset + (num_mi_h << 2)) {
830 for (int c = 0; c < (num_mi_w << 2); ++c) {
831 input_data[pos++] = bit_depth > 8
832 ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base
833 : (float)*(buf + c) / base;
834 }
835 buf += y_stride;
836 ++r;
837 }
838 TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size);
839
840 // Execute inference.
841 if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) {
842 TfLiteInterpreterDelete(interpreter);
843 TfLiteInterpreterOptionsDelete(options);
844 TfLiteModelDelete(model);
845 return 1;
846 }
847
848 // Extract the output tensor data.
849 const TfLiteTensor *output_tensor =
850 TfLiteInterpreterGetOutputTensor(interpreter, 0);
851 if (output_tensor == NULL) {
852 TfLiteInterpreterDelete(interpreter);
853 TfLiteInterpreterOptionsDelete(options);
854 TfLiteModelDelete(model);
855 return 1;
856 }
857
858 size_t output_size = TfLiteTensorByteSize(output_tensor);
859 float output_data[2];
860
861 TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size);
862 predicts0[row * num_cols + col] = output_data[0];
863 predicts1[row * num_cols + col] = output_data[1];
864 }
865 }
866
867 // Dispose of the model and interpreter objects.
868 TfLiteInterpreterDelete(interpreter);
869 TfLiteInterpreterOptionsDelete(options);
870 TfLiteModelDelete(model);
871 aom_free(input_data);
872 return 0;
873 }
874
av1_set_mb_ur_variance(AV1_COMP * cpi)875 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
876 const AV1_COMMON *cm = &cpi->common;
877 const CommonModeInfoParams *const mi_params = &cm->mi_params;
878 uint8_t *y_buffer = cpi->source->y_buffer;
879 const int y_stride = cpi->source->y_stride;
880 const int block_size = cpi->common.seq_params->sb_size;
881 const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
882
883 const int num_mi_w = mi_size_wide[block_size];
884 const int num_mi_h = mi_size_high[block_size];
885 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
886 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
887
888 // TODO(sdeng): fit a better model_1; disable it at this time.
889 float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f;
890 CHECK_MEM_ERROR(cm, mb_delta_q0,
891 aom_calloc(num_rows * num_cols, sizeof(float)));
892 CHECK_MEM_ERROR(cm, mb_delta_q1,
893 aom_calloc(num_rows * num_cols, sizeof(float)));
894
895 if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer,
896 y_stride, mb_delta_q0, mb_delta_q1)) {
897 aom_internal_error(cm->error, AOM_CODEC_ERROR,
898 "Failed to call TFlite functions.");
899 }
900
901 // Loop through each SB block.
902 for (int row = 0; row < num_rows; ++row) {
903 for (int col = 0; col < num_cols; ++col) {
904 const int index = row * num_cols + col;
905 delta_q_avg0 += mb_delta_q0[index];
906 }
907 }
908
909 delta_q_avg0 /= (float)(num_rows * num_cols);
910
911 float scaling_factor;
912 const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ;
913 if (cq_level < delta_q_avg0) {
914 scaling_factor = cq_level / delta_q_avg0;
915 } else {
916 scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0);
917 }
918
919 for (int row = 0; row < num_rows; ++row) {
920 for (int col = 0; col < num_cols; ++col) {
921 const int index = row * num_cols + col;
922 cpi->mb_delta_q[index] =
923 RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ *
924 scaling_factor * (mb_delta_q0[index] - delta_q_avg0));
925 }
926 }
927
928 aom_free(mb_delta_q0);
929 aom_free(mb_delta_q1);
930 }
931 #else // !CONFIG_TFLITE
av1_set_mb_ur_variance(AV1_COMP * cpi)932 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
933 const AV1_COMMON *cm = &cpi->common;
934 const CommonModeInfoParams *const mi_params = &cm->mi_params;
935 const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
936 uint8_t *y_buffer = cpi->source->y_buffer;
937 const int y_stride = cpi->source->y_stride;
938 const int block_size = cpi->common.seq_params->sb_size;
939
940 const int num_mi_w = mi_size_wide[block_size];
941 const int num_mi_h = mi_size_high[block_size];
942 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
943 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
944
945 int *mb_delta_q[2];
946 CHECK_MEM_ERROR(cm, mb_delta_q[0],
947 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
948 CHECK_MEM_ERROR(cm, mb_delta_q[1],
949 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
950
951 // Approximates the model change between current version (Spet 2021) and the
952 // baseline (July 2021).
953 const double model_change[] = { 3.0, 3.0 };
954 // The following parameters are fitted from user labeled data.
955 const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
956 const double b[] = { 0.004898, 0.003093 };
957 const double c[] = { (29.932 + model_change[0]) * 4.0,
958 (42.100 + model_change[1]) * 4.0 };
959 int delta_q_avg[2] = { 0, 0 };
960 // Loop through each SB block.
961 for (int row = 0; row < num_rows; ++row) {
962 for (int col = 0; col < num_cols; ++col) {
963 double var = 0.0, num_of_var = 0.0;
964 const int index = row * num_cols + col;
965
966 // Loop through each 8x8 block.
967 for (int mi_row = row * num_mi_h;
968 mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h;
969 mi_row += 2) {
970 for (int mi_col = col * num_mi_w;
971 mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w;
972 mi_col += 2) {
973 struct buf_2d buf;
974 const int row_offset_y = mi_row << 2;
975 const int col_offset_y = mi_col << 2;
976
977 buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
978 buf.stride = y_stride;
979
980 unsigned int block_variance;
981 block_variance = av1_get_perpixel_variance_facade(
982 cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y);
983
984 block_variance = AOMMAX(block_variance, 1);
985 var += log((double)block_variance);
986 num_of_var += 1.0;
987 }
988 }
989 var = exp(var / num_of_var);
990 mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]);
991 mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]);
992 delta_q_avg[0] += mb_delta_q[0][index];
993 delta_q_avg[1] += mb_delta_q[1][index];
994 }
995 }
996
997 delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
998 delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
999
1000 int model_idx;
1001 double scaling_factor;
1002 const int cq_level = cpi->oxcf.rc_cfg.cq_level;
1003 if (cq_level < delta_q_avg[0]) {
1004 model_idx = 0;
1005 scaling_factor = (double)cq_level / delta_q_avg[0];
1006 } else if (cq_level < delta_q_avg[1]) {
1007 model_idx = 2;
1008 scaling_factor =
1009 (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
1010 } else {
1011 model_idx = 1;
1012 scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
1013 }
1014
1015 const double new_delta_q_avg =
1016 delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
1017 for (int row = 0; row < num_rows; ++row) {
1018 for (int col = 0; col < num_cols; ++col) {
1019 const int index = row * num_cols + col;
1020 if (model_idx == 2) {
1021 const double delta_q =
1022 mb_delta_q[0][index] +
1023 scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
1024 cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength /
1025 100.0 * (delta_q - new_delta_q_avg));
1026 } else {
1027 cpi->mb_delta_q[index] = RINT(
1028 (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor *
1029 (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
1030 }
1031 }
1032 }
1033
1034 aom_free(mb_delta_q[0]);
1035 aom_free(mb_delta_q[1]);
1036 }
1037 #endif
1038
av1_get_sbq_user_rating_based(AV1_COMP * const cpi,int mi_row,int mi_col)1039 int av1_get_sbq_user_rating_based(AV1_COMP *const cpi, int mi_row, int mi_col) {
1040 const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
1041 const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
1042 AV1_COMMON *const cm = &cpi->common;
1043 const int base_qindex = cm->quant_params.base_qindex;
1044 if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex;
1045
1046 const int num_mi_w = mi_size_wide[bsize];
1047 const int num_mi_h = mi_size_high[bsize];
1048 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
1049 const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w);
1050 const int delta_q = cpi->mb_delta_q[index];
1051
1052 int qindex = base_qindex + delta_q;
1053 qindex = AOMMIN(qindex, MAXQ);
1054 qindex = AOMMAX(qindex, MINQ + 1);
1055
1056 return qindex;
1057 }
1058