1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <math.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #include "config/aom_config.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "config/av1_rtcd.h"
20
21 #include "aom_dsp/aom_dsp_common.h"
22 #include "aom_dsp/binary_codes_writer.h"
23 #include "aom_ports/mem.h"
24 #include "aom_ports/aom_timer.h"
25
26 #include "av1/common/reconinter.h"
27 #include "av1/common/blockd.h"
28
29 #include "av1/encoder/encodeframe.h"
30 #include "av1/encoder/encodeframe_utils.h"
31 #include "av1/encoder/var_based_part.h"
32 #include "av1/encoder/reconinter_enc.h"
33 #include "av1/encoder/rdopt_utils.h"
34
35 // Possible values for the force_split variable while evaluating variance based
36 // partitioning.
37 enum {
38 // Evaluate all partition types
39 PART_EVAL_ALL = 0,
40 // Force PARTITION_SPLIT
41 PART_EVAL_ONLY_SPLIT = 1,
42 // Force PARTITION_NONE
43 PART_EVAL_ONLY_NONE = 2
44 } UENUM1BYTE(PART_EVAL_STATUS);
45
46 typedef struct {
47 VPVariance *part_variances;
48 VPartVar *split[4];
49 } variance_node;
50
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)51 static inline void tree_to_node(void *data, BLOCK_SIZE bsize,
52 variance_node *node) {
53 node->part_variances = NULL;
54 switch (bsize) {
55 case BLOCK_128X128: {
56 VP128x128 *vt = (VP128x128 *)data;
57 node->part_variances = &vt->part_variances;
58 for (int split_idx = 0; split_idx < 4; split_idx++)
59 node->split[split_idx] = &vt->split[split_idx].part_variances.none;
60 break;
61 }
62 case BLOCK_64X64: {
63 VP64x64 *vt = (VP64x64 *)data;
64 node->part_variances = &vt->part_variances;
65 for (int split_idx = 0; split_idx < 4; split_idx++)
66 node->split[split_idx] = &vt->split[split_idx].part_variances.none;
67 break;
68 }
69 case BLOCK_32X32: {
70 VP32x32 *vt = (VP32x32 *)data;
71 node->part_variances = &vt->part_variances;
72 for (int split_idx = 0; split_idx < 4; split_idx++)
73 node->split[split_idx] = &vt->split[split_idx].part_variances.none;
74 break;
75 }
76 case BLOCK_16X16: {
77 VP16x16 *vt = (VP16x16 *)data;
78 node->part_variances = &vt->part_variances;
79 for (int split_idx = 0; split_idx < 4; split_idx++)
80 node->split[split_idx] = &vt->split[split_idx].part_variances.none;
81 break;
82 }
83 case BLOCK_8X8: {
84 VP8x8 *vt = (VP8x8 *)data;
85 node->part_variances = &vt->part_variances;
86 for (int split_idx = 0; split_idx < 4; split_idx++)
87 node->split[split_idx] = &vt->split[split_idx].part_variances.none;
88 break;
89 }
90 default: {
91 VP4x4 *vt = (VP4x4 *)data;
92 assert(bsize == BLOCK_4X4);
93 node->part_variances = &vt->part_variances;
94 for (int split_idx = 0; split_idx < 4; split_idx++)
95 node->split[split_idx] = &vt->split[split_idx];
96 break;
97 }
98 }
99 }
100
101 // Set variance values given sum square error, sum error, count.
fill_variance(uint32_t s2,int32_t s,int c,VPartVar * v)102 static inline void fill_variance(uint32_t s2, int32_t s, int c, VPartVar *v) {
103 v->sum_square_error = s2;
104 v->sum_error = s;
105 v->log2_count = c;
106 }
107
get_variance(VPartVar * v)108 static inline void get_variance(VPartVar *v) {
109 v->variance =
110 (int)(256 * (v->sum_square_error -
111 (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
112 v->log2_count)) >>
113 v->log2_count);
114 }
115
sum_2_variances(const VPartVar * a,const VPartVar * b,VPartVar * r)116 static inline void sum_2_variances(const VPartVar *a, const VPartVar *b,
117 VPartVar *r) {
118 assert(a->log2_count == b->log2_count);
119 fill_variance(a->sum_square_error + b->sum_square_error,
120 a->sum_error + b->sum_error, a->log2_count + 1, r);
121 }
122
fill_variance_tree(void * data,BLOCK_SIZE bsize)123 static inline void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
124 variance_node node;
125 memset(&node, 0, sizeof(node));
126 tree_to_node(data, bsize, &node);
127 sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
128 sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
129 sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
130 sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
131 sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
132 &node.part_variances->none);
133 }
134
set_block_size(AV1_COMP * const cpi,int mi_row,int mi_col,BLOCK_SIZE bsize)135 static inline void set_block_size(AV1_COMP *const cpi, int mi_row, int mi_col,
136 BLOCK_SIZE bsize) {
137 if (cpi->common.mi_params.mi_cols > mi_col &&
138 cpi->common.mi_params.mi_rows > mi_row) {
139 CommonModeInfoParams *mi_params = &cpi->common.mi_params;
140 const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
141 const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col);
142 MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] =
143 &mi_params->mi_alloc[mi_alloc_idx];
144 mi->bsize = bsize;
145 }
146 }
147
set_vt_partitioning(AV1_COMP * cpi,MACROBLOCKD * const xd,const TileInfo * const tile,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,PART_EVAL_STATUS force_split)148 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd,
149 const TileInfo *const tile, void *data,
150 BLOCK_SIZE bsize, int mi_row, int mi_col,
151 int64_t threshold, BLOCK_SIZE bsize_min,
152 PART_EVAL_STATUS force_split) {
153 AV1_COMMON *const cm = &cpi->common;
154 variance_node vt;
155 const int block_width = mi_size_wide[bsize];
156 const int block_height = mi_size_high[bsize];
157 int bs_width_check = block_width;
158 int bs_height_check = block_height;
159 int bs_width_vert_check = block_width >> 1;
160 int bs_height_horiz_check = block_height >> 1;
161 // On the right and bottom boundary we only need to check
162 // if half the bsize fits, because boundary is extended
163 // up to 64. So do this check only for sb_size = 64X64.
164 if (cm->seq_params->sb_size == BLOCK_64X64) {
165 if (tile->mi_col_end == cm->mi_params.mi_cols) {
166 bs_width_check = (block_width >> 1) + 1;
167 bs_width_vert_check = (block_width >> 2) + 1;
168 }
169 if (tile->mi_row_end == cm->mi_params.mi_rows) {
170 bs_height_check = (block_height >> 1) + 1;
171 bs_height_horiz_check = (block_height >> 2) + 1;
172 }
173 }
174
175 assert(block_height == block_width);
176 tree_to_node(data, bsize, &vt);
177
178 if (mi_col + bs_width_check <= tile->mi_col_end &&
179 mi_row + bs_height_check <= tile->mi_row_end &&
180 force_split == PART_EVAL_ONLY_NONE) {
181 set_block_size(cpi, mi_row, mi_col, bsize);
182 return 1;
183 }
184 if (force_split == PART_EVAL_ONLY_SPLIT) return 0;
185
186 // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
187 // variance is below threshold, otherwise split will be selected.
188 // No check for vert/horiz split as too few samples for variance.
189 if (bsize == bsize_min) {
190 // Variance already computed to set the force_split.
191 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
192 if (mi_col + bs_width_check <= tile->mi_col_end &&
193 mi_row + bs_height_check <= tile->mi_row_end &&
194 vt.part_variances->none.variance < threshold) {
195 set_block_size(cpi, mi_row, mi_col, bsize);
196 return 1;
197 }
198 return 0;
199 } else if (bsize > bsize_min) {
200 // Variance already computed to set the force_split.
201 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
202 // For key frame: take split for bsize above 32X32 or very high variance.
203 if (frame_is_intra_only(cm) &&
204 (bsize > BLOCK_32X32 ||
205 vt.part_variances->none.variance > (threshold << 4))) {
206 return 0;
207 }
208 // If variance is low, take the bsize (no split).
209 if (mi_col + bs_width_check <= tile->mi_col_end &&
210 mi_row + bs_height_check <= tile->mi_row_end &&
211 vt.part_variances->none.variance < threshold) {
212 set_block_size(cpi, mi_row, mi_col, bsize);
213 return 1;
214 }
215 // Check vertical split.
216 if (mi_row + bs_height_check <= tile->mi_row_end &&
217 mi_col + bs_width_vert_check <= tile->mi_col_end) {
218 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
219 BLOCK_SIZE plane_bsize =
220 get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
221 xd->plane[AOM_PLANE_U].subsampling_y);
222 get_variance(&vt.part_variances->vert[0]);
223 get_variance(&vt.part_variances->vert[1]);
224 if (vt.part_variances->vert[0].variance < threshold &&
225 vt.part_variances->vert[1].variance < threshold &&
226 plane_bsize < BLOCK_INVALID) {
227 set_block_size(cpi, mi_row, mi_col, subsize);
228 set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
229 return 1;
230 }
231 }
232 // Check horizontal split.
233 if (mi_col + bs_width_check <= tile->mi_col_end &&
234 mi_row + bs_height_horiz_check <= tile->mi_row_end) {
235 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
236 BLOCK_SIZE plane_bsize =
237 get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
238 xd->plane[AOM_PLANE_U].subsampling_y);
239 get_variance(&vt.part_variances->horz[0]);
240 get_variance(&vt.part_variances->horz[1]);
241 if (vt.part_variances->horz[0].variance < threshold &&
242 vt.part_variances->horz[1].variance < threshold &&
243 plane_bsize < BLOCK_INVALID) {
244 set_block_size(cpi, mi_row, mi_col, subsize);
245 set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
246 return 1;
247 }
248 }
249 return 0;
250 }
251 return 0;
252 }
253
all_blks_inside(int x16_idx,int y16_idx,int pixels_wide,int pixels_high)254 static inline int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide,
255 int pixels_high) {
256 int all_inside = 1;
257 for (int idx = 0; idx < 4; idx++) {
258 all_inside &= ((x16_idx + GET_BLK_IDX_X(idx, 3)) < pixels_wide);
259 all_inside &= ((y16_idx + GET_BLK_IDX_Y(idx, 3)) < pixels_high);
260 }
261 return all_inside;
262 }
263
264 #if CONFIG_AV1_HIGHBITDEPTH
265 // TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd
fill_variance_8x8avg_highbd(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,VP16x16 * vst,int pixels_wide,int pixels_high)266 static inline void fill_variance_8x8avg_highbd(
267 const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
268 int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
269 int pixels_high) {
270 for (int idx = 0; idx < 4; idx++) {
271 const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
272 const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
273 unsigned int sse = 0;
274 int sum = 0;
275 if (x8_idx < pixels_wide && y8_idx < pixels_high) {
276 int src_avg = aom_highbd_avg_8x8(src_buf + y8_idx * src_stride + x8_idx,
277 src_stride);
278 int dst_avg = aom_highbd_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx,
279 dst_stride);
280
281 sum = src_avg - dst_avg;
282 sse = sum * sum;
283 }
284 fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
285 }
286 }
287 #endif
288
fill_variance_8x8avg_lowbd(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,VP16x16 * vst,int pixels_wide,int pixels_high)289 static inline void fill_variance_8x8avg_lowbd(
290 const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
291 int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
292 int pixels_high) {
293 unsigned int sse[4] = { 0 };
294 int sum[4] = { 0 };
295
296 if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) {
297 int src_avg[4];
298 int dst_avg[4];
299 aom_avg_8x8_quad(src_buf, src_stride, x16_idx, y16_idx, src_avg);
300 aom_avg_8x8_quad(dst_buf, dst_stride, x16_idx, y16_idx, dst_avg);
301 for (int idx = 0; idx < 4; idx++) {
302 sum[idx] = src_avg[idx] - dst_avg[idx];
303 sse[idx] = sum[idx] * sum[idx];
304 }
305 } else {
306 for (int idx = 0; idx < 4; idx++) {
307 const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
308 const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
309 if (x8_idx < pixels_wide && y8_idx < pixels_high) {
310 int src_avg =
311 aom_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride);
312 int dst_avg =
313 aom_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, dst_stride);
314 sum[idx] = src_avg - dst_avg;
315 sse[idx] = sum[idx] * sum[idx];
316 }
317 }
318 }
319
320 for (int idx = 0; idx < 4; idx++) {
321 fill_variance(sse[idx], sum[idx], 0, &vst->split[idx].part_variances.none);
322 }
323 }
324
325 // Obtain parameters required to calculate variance (such as sum, sse, etc,.)
326 // at 8x8 sub-block level for a given 16x16 block.
327 // The function can be called only when is_key_frame is false since sum is
328 // computed between source and reference frames.
fill_variance_8x8avg(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,VP16x16 * vst,int highbd_flag,int pixels_wide,int pixels_high)329 static inline void fill_variance_8x8avg(const uint8_t *src_buf, int src_stride,
330 const uint8_t *dst_buf, int dst_stride,
331 int x16_idx, int y16_idx, VP16x16 *vst,
332 int highbd_flag, int pixels_wide,
333 int pixels_high) {
334 #if CONFIG_AV1_HIGHBITDEPTH
335 if (highbd_flag) {
336 fill_variance_8x8avg_highbd(src_buf, src_stride, dst_buf, dst_stride,
337 x16_idx, y16_idx, vst, pixels_wide,
338 pixels_high);
339 return;
340 }
341 #else
342 (void)highbd_flag;
343 #endif // CONFIG_AV1_HIGHBITDEPTH
344 fill_variance_8x8avg_lowbd(src_buf, src_stride, dst_buf, dst_stride, x16_idx,
345 y16_idx, vst, pixels_wide, pixels_high);
346 }
347
compute_minmax_8x8(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,int highbd_flag,int pixels_wide,int pixels_high)348 static int compute_minmax_8x8(const uint8_t *src_buf, int src_stride,
349 const uint8_t *dst_buf, int dst_stride,
350 int x16_idx, int y16_idx,
351 #if CONFIG_AV1_HIGHBITDEPTH
352 int highbd_flag,
353 #endif
354 int pixels_wide, int pixels_high) {
355 int minmax_max = 0;
356 int minmax_min = 255;
357 // Loop over the 4 8x8 subblocks.
358 for (int idx = 0; idx < 4; idx++) {
359 const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
360 const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
361 int min = 0;
362 int max = 0;
363 if (x8_idx < pixels_wide && y8_idx < pixels_high) {
364 #if CONFIG_AV1_HIGHBITDEPTH
365 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
366 aom_highbd_minmax_8x8(
367 src_buf + y8_idx * src_stride + x8_idx, src_stride,
368 dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, &max);
369 } else {
370 aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
371 dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
372 &max);
373 }
374 #else
375 aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
376 dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
377 &max);
378 #endif
379 if ((max - min) > minmax_max) minmax_max = (max - min);
380 if ((max - min) < minmax_min) minmax_min = (max - min);
381 }
382 }
383 return (minmax_max - minmax_min);
384 }
385
386 // Function to compute average and variance of 4x4 sub-block.
387 // The function can be called only when is_key_frame is true since sum is
388 // computed using source frame only.
fill_variance_4x4avg(const uint8_t * src_buf,int src_stride,int x8_idx,int y8_idx,VP8x8 * vst,int highbd_flag,int pixels_wide,int pixels_high,int border_offset_4x4)389 static inline void fill_variance_4x4avg(const uint8_t *src_buf, int src_stride,
390 int x8_idx, int y8_idx, VP8x8 *vst,
391 #if CONFIG_AV1_HIGHBITDEPTH
392 int highbd_flag,
393 #endif
394 int pixels_wide, int pixels_high,
395 int border_offset_4x4) {
396 for (int idx = 0; idx < 4; idx++) {
397 const int x4_idx = x8_idx + GET_BLK_IDX_X(idx, 2);
398 const int y4_idx = y8_idx + GET_BLK_IDX_Y(idx, 2);
399 unsigned int sse = 0;
400 int sum = 0;
401 if (x4_idx < pixels_wide - border_offset_4x4 &&
402 y4_idx < pixels_high - border_offset_4x4) {
403 int src_avg;
404 int dst_avg = 128;
405 #if CONFIG_AV1_HIGHBITDEPTH
406 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
407 src_avg = aom_highbd_avg_4x4(src_buf + y4_idx * src_stride + x4_idx,
408 src_stride);
409 } else {
410 src_avg =
411 aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
412 }
413 #else
414 src_avg = aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
415 #endif
416
417 sum = src_avg - dst_avg;
418 sse = sum * sum;
419 }
420 fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
421 }
422 }
423
scale_part_thresh_content(int64_t threshold_base,int speed,int non_reference_frame,int is_static)424 static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
425 int non_reference_frame,
426 int is_static) {
427 int64_t threshold = threshold_base;
428 if (non_reference_frame && !is_static) threshold = (3 * threshold) >> 1;
429 if (speed >= 8) {
430 return (5 * threshold) >> 2;
431 }
432 return threshold;
433 }
434
435 // Tune thresholds less or more aggressively to prefer larger partitions
tune_thresh_based_on_qindex(AV1_COMP * cpi,int64_t thresholds[],uint64_t block_sad,int current_qindex,int num_pixels,bool is_segment_id_boosted,int source_sad_nonrd,int lighting_change)436 static inline void tune_thresh_based_on_qindex(
437 AV1_COMP *cpi, int64_t thresholds[], uint64_t block_sad, int current_qindex,
438 int num_pixels, bool is_segment_id_boosted, int source_sad_nonrd,
439 int lighting_change) {
440 double weight;
441 if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
442 const int win = 20;
443 if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
444 weight = 1.0;
445 else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
446 weight = 0.0;
447 else
448 weight =
449 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
450 if (num_pixels > RESOLUTION_480P) {
451 for (int i = 0; i < 4; i++) {
452 thresholds[i] <<= 1;
453 }
454 }
455 if (num_pixels <= RESOLUTION_288P) {
456 thresholds[3] = INT64_MAX;
457 if (is_segment_id_boosted == false) {
458 thresholds[1] <<= 2;
459 thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
460 } else {
461 thresholds[1] <<= 1;
462 thresholds[2] <<= 3;
463 }
464 // Allow for split to 8x8 for superblocks where part of it has
465 // moving boundary. So allow for sb with source_sad above threshold,
466 // and avoid very large source_sad or high source content, to avoid
467 // too many 8x8 within superblock.
468 uint64_t avg_source_sad_thresh = 25000;
469 uint64_t block_sad_low = 25000;
470 uint64_t block_sad_high = 50000;
471 if (cpi->svc.temporal_layer_id == 0 &&
472 cpi->svc.number_temporal_layers > 1) {
473 // Increase the sad thresholds for base TL0, as reference/LAST is
474 // 2/4 frames behind (for 2/3 #TL).
475 avg_source_sad_thresh = 40000;
476 block_sad_high = 70000;
477 }
478 if (is_segment_id_boosted == false &&
479 cpi->rc.avg_source_sad < avg_source_sad_thresh &&
480 block_sad > block_sad_low && block_sad < block_sad_high &&
481 !lighting_change) {
482 thresholds[2] = (3 * thresholds[2]) >> 2;
483 thresholds[3] = thresholds[2] << 3;
484 }
485 // Condition the increase of partition thresholds on the segment
486 // and the content. Avoid the increase for superblocks which have
487 // high source sad, unless the whole frame has very high motion
488 // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
489 // have high source sad).
490 } else if (num_pixels > RESOLUTION_480P && is_segment_id_boosted == false &&
491 (source_sad_nonrd != kHighSad ||
492 cpi->rc.avg_source_sad > 50000)) {
493 thresholds[0] = (3 * thresholds[0]) >> 1;
494 thresholds[3] = INT64_MAX;
495 if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
496 thresholds[1] =
497 (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
498 thresholds[2] =
499 (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
500 }
501 } else if (current_qindex > QINDEX_LARGE_BLOCK_THR &&
502 is_segment_id_boosted == false &&
503 (source_sad_nonrd != kHighSad ||
504 cpi->rc.avg_source_sad > 50000)) {
505 thresholds[1] =
506 (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
507 thresholds[2] =
508 (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
509 thresholds[3] = INT64_MAX;
510 }
511 } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
512 thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0;
513 thresholds[2] =
514 (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2];
515 } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
516 const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1;
517 if (current_qindex < QINDEX_LARGE_BLOCK_THR - 45)
518 weight = 1.0;
519 else if (current_qindex > QINDEX_LARGE_BLOCK_THR + 45)
520 weight = 0.0;
521 else
522 weight = 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + 45) / (2 * 45);
523 thresholds[1] =
524 (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
525 thresholds[2] =
526 (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
527 thresholds[3] =
528 (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
529 }
530 if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
531 thresholds[3] = INT64_MAX;
532 }
533
set_vbp_thresholds_key_frame(AV1_COMP * cpi,int64_t thresholds[],int64_t threshold_base,int threshold_left_shift,int num_pixels)534 static void set_vbp_thresholds_key_frame(AV1_COMP *cpi, int64_t thresholds[],
535 int64_t threshold_base,
536 int threshold_left_shift,
537 int num_pixels) {
538 if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
539 const int shift_steps =
540 threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8);
541 assert(shift_steps >= 0);
542 threshold_base <<= shift_steps;
543 }
544 thresholds[0] = threshold_base;
545 thresholds[1] = threshold_base;
546 if (num_pixels < RESOLUTION_720P) {
547 thresholds[2] = threshold_base / 3;
548 thresholds[3] = threshold_base >> 1;
549 } else {
550 int shift_val = 2;
551 if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
552 shift_val = 0;
553 }
554
555 thresholds[2] = threshold_base >> shift_val;
556 thresholds[3] = threshold_base >> shift_val;
557 }
558 thresholds[4] = threshold_base << 2;
559 }
560
tune_thresh_based_on_resolution(AV1_COMP * cpi,int64_t thresholds[],int64_t threshold_base,int current_qindex,int source_sad_rd,int num_pixels)561 static inline void tune_thresh_based_on_resolution(
562 AV1_COMP *cpi, int64_t thresholds[], int64_t threshold_base,
563 int current_qindex, int source_sad_rd, int num_pixels) {
564 if (num_pixels >= RESOLUTION_720P) thresholds[3] = thresholds[3] << 1;
565 if (num_pixels <= RESOLUTION_288P) {
566 const int qindex_thr[5][2] = {
567 { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 },
568 };
569 int th_idx = 0;
570 if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1)
571 th_idx =
572 (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0;
573 if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3)
574 th_idx = cpi->sf.rt_sf.var_part_based_on_qidx;
575 const int qindex_low_thr = qindex_thr[th_idx][0];
576 const int qindex_high_thr = qindex_thr[th_idx][1];
577 if (current_qindex >= qindex_high_thr) {
578 threshold_base = (5 * threshold_base) >> 1;
579 thresholds[1] = threshold_base >> 3;
580 thresholds[2] = threshold_base << 2;
581 thresholds[3] = threshold_base << 5;
582 } else if (current_qindex < qindex_low_thr) {
583 thresholds[1] = threshold_base >> 3;
584 thresholds[2] = threshold_base >> 1;
585 thresholds[3] = threshold_base << 3;
586 } else {
587 int64_t qi_diff_low = current_qindex - qindex_low_thr;
588 int64_t qi_diff_high = qindex_high_thr - current_qindex;
589 int64_t threshold_diff = qindex_high_thr - qindex_low_thr;
590 int64_t threshold_base_high = (5 * threshold_base) >> 1;
591
592 threshold_diff = threshold_diff > 0 ? threshold_diff : 1;
593 threshold_base =
594 (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) /
595 threshold_diff;
596 thresholds[1] = threshold_base >> 3;
597 thresholds[2] = ((qi_diff_low * threshold_base) +
598 qi_diff_high * (threshold_base >> 1)) /
599 threshold_diff;
600 thresholds[3] = ((qi_diff_low * (threshold_base << 5)) +
601 qi_diff_high * (threshold_base << 3)) /
602 threshold_diff;
603 }
604 } else if (num_pixels < RESOLUTION_720P) {
605 thresholds[2] = (5 * threshold_base) >> 2;
606 } else if (num_pixels < RESOLUTION_1080P) {
607 thresholds[2] = threshold_base << 1;
608 } else {
609 // num_pixels >= RESOLUTION_1080P
610 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
611 if (num_pixels < RESOLUTION_1440P) {
612 thresholds[2] = (5 * threshold_base) >> 1;
613 } else {
614 thresholds[2] = (7 * threshold_base) >> 1;
615 }
616 } else {
617 if (cpi->oxcf.speed > 7) {
618 thresholds[2] = 6 * threshold_base;
619 } else {
620 thresholds[2] = 3 * threshold_base;
621 }
622 }
623 }
624 }
625
626 // Increase the base partition threshold, based on content and noise level.
tune_base_thresh_content(AV1_COMP * cpi,int64_t threshold_base,int content_lowsumdiff,int source_sad_nonrd,int num_pixels)627 static inline int64_t tune_base_thresh_content(AV1_COMP *cpi,
628 int64_t threshold_base,
629 int content_lowsumdiff,
630 int source_sad_nonrd,
631 int num_pixels) {
632 AV1_COMMON *const cm = &cpi->common;
633 int64_t updated_thresh_base = threshold_base;
634 if (cpi->noise_estimate.enabled && content_lowsumdiff &&
635 num_pixels > RESOLUTION_480P && cm->current_frame.frame_number > 60) {
636 NOISE_LEVEL noise_level =
637 av1_noise_estimate_extract_level(&cpi->noise_estimate);
638 if (noise_level == kHigh)
639 updated_thresh_base = (5 * updated_thresh_base) >> 1;
640 else if (noise_level == kMedium &&
641 !cpi->sf.rt_sf.prefer_large_partition_blocks)
642 updated_thresh_base = (5 * updated_thresh_base) >> 2;
643 }
644 updated_thresh_base = scale_part_thresh_content(
645 updated_thresh_base, cpi->oxcf.speed,
646 cpi->ppi->rtc_ref.non_reference_frame, cpi->rc.frame_source_sad == 0);
647 if (cpi->oxcf.speed >= 11 && source_sad_nonrd > kLowSad &&
648 cpi->rc.high_motion_content_screen_rtc)
649 updated_thresh_base = updated_thresh_base << 5;
650 return updated_thresh_base;
651 }
652
set_vbp_thresholds(AV1_COMP * cpi,int64_t thresholds[],uint64_t blk_sad,int qindex,int content_lowsumdiff,int source_sad_nonrd,int source_sad_rd,bool is_segment_id_boosted,int lighting_change)653 static inline void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
654 uint64_t blk_sad, int qindex,
655 int content_lowsumdiff,
656 int source_sad_nonrd, int source_sad_rd,
657 bool is_segment_id_boosted,
658 int lighting_change) {
659 AV1_COMMON *const cm = &cpi->common;
660 const int is_key_frame = frame_is_intra_only(cm);
661 const int threshold_multiplier = is_key_frame ? 120 : 1;
662 const int ac_q = av1_ac_quant_QTX(qindex, 0, cm->seq_params->bit_depth);
663 int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q);
664 const int current_qindex = cm->quant_params.base_qindex;
665 const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift;
666 const int num_pixels = cm->width * cm->height;
667
668 if (is_key_frame) {
669 set_vbp_thresholds_key_frame(cpi, thresholds, threshold_base,
670 threshold_left_shift, num_pixels);
671 return;
672 }
673
674 threshold_base = tune_base_thresh_content(
675 cpi, threshold_base, content_lowsumdiff, source_sad_nonrd, num_pixels);
676 thresholds[0] = threshold_base >> 1;
677 thresholds[1] = threshold_base;
678 thresholds[3] = threshold_base << threshold_left_shift;
679
680 tune_thresh_based_on_resolution(cpi, thresholds, threshold_base,
681 current_qindex, source_sad_rd, num_pixels);
682
683 tune_thresh_based_on_qindex(cpi, thresholds, blk_sad, current_qindex,
684 num_pixels, is_segment_id_boosted,
685 source_sad_nonrd, lighting_change);
686 }
687
688 // Set temporal variance low flag for superblock 64x64.
689 // Only first 25 in the array are used in this case.
set_low_temp_var_flag_64x64(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP64x64 * vt,const int64_t thresholds[],int mi_col,int mi_row)690 static inline void set_low_temp_var_flag_64x64(CommonModeInfoParams *mi_params,
691 PartitionSearchInfo *part_info,
692 MACROBLOCKD *xd, VP64x64 *vt,
693 const int64_t thresholds[],
694 int mi_col, int mi_row) {
695 if (xd->mi[0]->bsize == BLOCK_64X64) {
696 if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
697 part_info->variance_low[0] = 1;
698 } else if (xd->mi[0]->bsize == BLOCK_64X32) {
699 for (int part_idx = 0; part_idx < 2; part_idx++) {
700 if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
701 part_info->variance_low[part_idx + 1] = 1;
702 }
703 } else if (xd->mi[0]->bsize == BLOCK_32X64) {
704 for (int part_idx = 0; part_idx < 2; part_idx++) {
705 if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
706 part_info->variance_low[part_idx + 3] = 1;
707 }
708 } else {
709 static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
710 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
711 const int idx_str = mi_params->mi_stride * (mi_row + idx[lvl1_idx][0]) +
712 mi_col + idx[lvl1_idx][1];
713 MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str;
714
715 if (mi_params->mi_cols <= mi_col + idx[lvl1_idx][1] ||
716 mi_params->mi_rows <= mi_row + idx[lvl1_idx][0])
717 continue;
718
719 if (*this_mi == NULL) continue;
720
721 if ((*this_mi)->bsize == BLOCK_32X32) {
722 int64_t threshold_32x32 = (5 * thresholds[1]) >> 3;
723 if (vt->split[lvl1_idx].part_variances.none.variance < threshold_32x32)
724 part_info->variance_low[lvl1_idx + 5] = 1;
725 } else {
726 // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
727 // inside.
728 if ((*this_mi)->bsize == BLOCK_16X16 ||
729 (*this_mi)->bsize == BLOCK_32X16 ||
730 (*this_mi)->bsize == BLOCK_16X32) {
731 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
732 if (vt->split[lvl1_idx]
733 .split[lvl2_idx]
734 .part_variances.none.variance < (thresholds[2] >> 8))
735 part_info->variance_low[(lvl1_idx << 2) + lvl2_idx + 9] = 1;
736 }
737 }
738 }
739 }
740 }
741 }
742
set_low_temp_var_flag_128x128(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,const int64_t thresholds[],int mi_col,int mi_row)743 static inline void set_low_temp_var_flag_128x128(
744 CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
745 MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col,
746 int mi_row) {
747 if (xd->mi[0]->bsize == BLOCK_128X128) {
748 if (vt->part_variances.none.variance < (thresholds[0] >> 1))
749 part_info->variance_low[0] = 1;
750 } else if (xd->mi[0]->bsize == BLOCK_128X64) {
751 for (int part_idx = 0; part_idx < 2; part_idx++) {
752 if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
753 part_info->variance_low[part_idx + 1] = 1;
754 }
755 } else if (xd->mi[0]->bsize == BLOCK_64X128) {
756 for (int part_idx = 0; part_idx < 2; part_idx++) {
757 if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
758 part_info->variance_low[part_idx + 3] = 1;
759 }
760 } else {
761 static const int idx64[4][2] = {
762 { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 }
763 };
764 static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
765 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
766 const int idx_str = mi_params->mi_stride * (mi_row + idx64[lvl1_idx][0]) +
767 mi_col + idx64[lvl1_idx][1];
768 MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str;
769 if (*mi_64 == NULL) continue;
770 if (mi_params->mi_cols <= mi_col + idx64[lvl1_idx][1] ||
771 mi_params->mi_rows <= mi_row + idx64[lvl1_idx][0])
772 continue;
773 const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
774 if ((*mi_64)->bsize == BLOCK_64X64) {
775 if (vt->split[lvl1_idx].part_variances.none.variance < threshold_64x64)
776 part_info->variance_low[5 + lvl1_idx] = 1;
777 } else if ((*mi_64)->bsize == BLOCK_64X32) {
778 for (int part_idx = 0; part_idx < 2; part_idx++)
779 if (vt->split[lvl1_idx].part_variances.horz[part_idx].variance <
780 (threshold_64x64 >> 1))
781 part_info->variance_low[9 + (lvl1_idx << 1) + part_idx] = 1;
782 } else if ((*mi_64)->bsize == BLOCK_32X64) {
783 for (int part_idx = 0; part_idx < 2; part_idx++)
784 if (vt->split[lvl1_idx].part_variances.vert[part_idx].variance <
785 (threshold_64x64 >> 1))
786 part_info->variance_low[17 + (lvl1_idx << 1) + part_idx] = 1;
787 } else {
788 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
789 const int idx_str1 =
790 mi_params->mi_stride * idx32[lvl2_idx][0] + idx32[lvl2_idx][1];
791 MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1;
792 if (*mi_32 == NULL) continue;
793
794 if (mi_params->mi_cols <=
795 mi_col + idx64[lvl1_idx][1] + idx32[lvl2_idx][1] ||
796 mi_params->mi_rows <=
797 mi_row + idx64[lvl1_idx][0] + idx32[lvl2_idx][0])
798 continue;
799 const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
800 if ((*mi_32)->bsize == BLOCK_32X32) {
801 if (vt->split[lvl1_idx]
802 .split[lvl2_idx]
803 .part_variances.none.variance < threshold_32x32)
804 part_info->variance_low[25 + (lvl1_idx << 2) + lvl2_idx] = 1;
805 } else {
806 // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
807 // inside.
808 if ((*mi_32)->bsize == BLOCK_16X16 ||
809 (*mi_32)->bsize == BLOCK_32X16 ||
810 (*mi_32)->bsize == BLOCK_16X32) {
811 for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
812 VPartVar *none_var = &vt->split[lvl1_idx]
813 .split[lvl2_idx]
814 .split[lvl3_idx]
815 .part_variances.none;
816 if (none_var->variance < (thresholds[3] >> 8))
817 part_info->variance_low[41 + (lvl1_idx << 4) +
818 (lvl2_idx << 2) + lvl3_idx] = 1;
819 }
820 }
821 }
822 }
823 }
824 }
825 }
826 }
827
set_low_temp_var_flag(AV1_COMP * cpi,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,int64_t thresholds[],MV_REFERENCE_FRAME ref_frame_partition,int mi_col,int mi_row,const bool is_small_sb)828 static inline void set_low_temp_var_flag(
829 AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd,
830 VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition,
831 int mi_col, int mi_row, const bool is_small_sb) {
832 AV1_COMMON *const cm = &cpi->common;
833 // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected.
834 // If the temporal variance is small set the flag
835 // variance_low for the block. The variance threshold can be adjusted, the
836 // higher the more aggressive.
837 if (ref_frame_partition == LAST_FRAME) {
838 if (is_small_sb)
839 set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
840 &(vt->split[0]), thresholds, mi_col, mi_row);
841 else
842 set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt,
843 thresholds, mi_col, mi_row);
844 }
845 }
846
847 static const int pos_shift_16x16[4][4] = {
848 { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
849 };
850
av1_get_force_skip_low_temp_var_small_sb(const uint8_t * variance_low,int mi_row,int mi_col,BLOCK_SIZE bsize)851 int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low,
852 int mi_row, int mi_col,
853 BLOCK_SIZE bsize) {
854 // Relative indices of MB inside the superblock.
855 const int mi_x = mi_row & 0xF;
856 const int mi_y = mi_col & 0xF;
857 // Relative indices of 16x16 block inside the superblock.
858 const int i = mi_x >> 2;
859 const int j = mi_y >> 2;
860 int force_skip_low_temp_var = 0;
861 // Set force_skip_low_temp_var based on the block size and block offset.
862 switch (bsize) {
863 case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break;
864 case BLOCK_64X32:
865 if (!mi_y && !mi_x) {
866 force_skip_low_temp_var = variance_low[1];
867 } else if (!mi_y && mi_x) {
868 force_skip_low_temp_var = variance_low[2];
869 }
870 break;
871 case BLOCK_32X64:
872 if (!mi_y && !mi_x) {
873 force_skip_low_temp_var = variance_low[3];
874 } else if (mi_y && !mi_x) {
875 force_skip_low_temp_var = variance_low[4];
876 }
877 break;
878 case BLOCK_32X32:
879 if (!mi_y && !mi_x) {
880 force_skip_low_temp_var = variance_low[5];
881 } else if (mi_y && !mi_x) {
882 force_skip_low_temp_var = variance_low[6];
883 } else if (!mi_y && mi_x) {
884 force_skip_low_temp_var = variance_low[7];
885 } else if (mi_y && mi_x) {
886 force_skip_low_temp_var = variance_low[8];
887 }
888 break;
889 case BLOCK_32X16:
890 case BLOCK_16X32:
891 case BLOCK_16X16:
892 force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
893 break;
894 default: break;
895 }
896
897 return force_skip_low_temp_var;
898 }
899
av1_get_force_skip_low_temp_var(const uint8_t * variance_low,int mi_row,int mi_col,BLOCK_SIZE bsize)900 int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row,
901 int mi_col, BLOCK_SIZE bsize) {
902 int force_skip_low_temp_var = 0;
903 int x, y;
904 x = (mi_col & 0x1F) >> 4;
905 // y = (mi_row & 0x1F) >> 4;
906 // const int idx64 = (y << 1) + x;
907 y = (mi_row & 0x17) >> 3;
908 const int idx64 = y + x;
909
910 x = (mi_col & 0xF) >> 3;
911 // y = (mi_row & 0xF) >> 3;
912 // const int idx32 = (y << 1) + x;
913 y = (mi_row & 0xB) >> 2;
914 const int idx32 = y + x;
915
916 x = (mi_col & 0x7) >> 2;
917 // y = (mi_row & 0x7) >> 2;
918 // const int idx16 = (y << 1) + x;
919 y = (mi_row & 0x5) >> 1;
920 const int idx16 = y + x;
921 // Set force_skip_low_temp_var based on the block size and block offset.
922 switch (bsize) {
923 case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break;
924 case BLOCK_128X64:
925 assert((mi_col & 0x1F) == 0);
926 force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)];
927 break;
928 case BLOCK_64X128:
929 assert((mi_row & 0x1F) == 0);
930 force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)];
931 break;
932 case BLOCK_64X64:
933 // Location of this 64x64 block inside the 128x128 superblock
934 force_skip_low_temp_var = variance_low[5 + idx64];
935 break;
936 case BLOCK_64X32:
937 x = (mi_col & 0x1F) >> 4;
938 y = (mi_row & 0x1F) >> 3;
939 /*
940 .---------------.---------------.
941 | x=0,y=0,idx=0 | x=0,y=0,idx=2 |
942 :---------------+---------------:
943 | x=0,y=1,idx=1 | x=1,y=1,idx=3 |
944 :---------------+---------------:
945 | x=0,y=2,idx=4 | x=1,y=2,idx=6 |
946 :---------------+---------------:
947 | x=0,y=3,idx=5 | x=1,y=3,idx=7 |
948 '---------------'---------------'
949 */
950 const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2);
951 force_skip_low_temp_var = variance_low[9 + idx64x32];
952 break;
953 case BLOCK_32X64:
954 x = (mi_col & 0x1F) >> 3;
955 y = (mi_row & 0x1F) >> 4;
956 const int idx32x64 = (y << 2) + x;
957 force_skip_low_temp_var = variance_low[17 + idx32x64];
958 break;
959 case BLOCK_32X32:
960 force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32];
961 break;
962 case BLOCK_32X16:
963 case BLOCK_16X32:
964 case BLOCK_16X16:
965 force_skip_low_temp_var =
966 variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16];
967 break;
968 default: break;
969 }
970 return force_skip_low_temp_var;
971 }
972
av1_set_variance_partition_thresholds(AV1_COMP * cpi,int qindex,int content_lowsumdiff)973 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int qindex,
974 int content_lowsumdiff) {
975 SPEED_FEATURES *const sf = &cpi->sf;
976 if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
977 return;
978 } else {
979 set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, 0, qindex,
980 content_lowsumdiff, 0, 0, 0, 0);
981 // The threshold below is not changed locally.
982 cpi->vbp_info.threshold_minmax = 15 + (qindex >> 3);
983 }
984 }
985
chroma_check(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int y_sad,unsigned int y_sad_g,unsigned int y_sad_alt,bool is_key_frame,bool zero_motion,unsigned int * uv_sad)986 static inline void chroma_check(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
987 unsigned int y_sad, unsigned int y_sad_g,
988 unsigned int y_sad_alt, bool is_key_frame,
989 bool zero_motion, unsigned int *uv_sad) {
990 MACROBLOCKD *xd = &x->e_mbd;
991 const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
992 int shift_upper_limit = 1;
993 int shift_lower_limit = 3;
994 int fac_uv = 6;
995 if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
996
997 // Use lower threshold (more conservative in setting color flag) for
998 // higher resolutions non-screen, which tend to have more camera noise.
999 // Since this may be used to skip compound mode in nonrd pickmode, which
1000 // is generally more effective for higher resolutions, better to be more
1001 // conservative.
1002 if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
1003 if (cpi->common.width * cpi->common.height >= RESOLUTION_1080P)
1004 fac_uv = 3;
1005 else
1006 fac_uv = 5;
1007 }
1008 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
1009 cpi->rc.high_source_sad) {
1010 shift_lower_limit = 7;
1011 } else if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
1012 cpi->rc.percent_blocks_with_motion > 90 &&
1013 cpi->rc.frame_source_sad > 10000 && source_sad_nonrd > kLowSad) {
1014 shift_lower_limit = 8;
1015 shift_upper_limit = 3;
1016 } else if (source_sad_nonrd >= kMedSad && x->source_variance > 500 &&
1017 cpi->common.width * cpi->common.height >= 640 * 360) {
1018 shift_upper_limit = 2;
1019 shift_lower_limit = source_sad_nonrd > kMedSad ? 5 : 4;
1020 }
1021
1022 MB_MODE_INFO *mi = xd->mi[0];
1023 const AV1_COMMON *const cm = &cpi->common;
1024 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
1025 const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
1026 const YV12_BUFFER_CONFIG *yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
1027 const struct scale_factors *const sf =
1028 get_ref_scale_factors_const(cm, LAST_FRAME);
1029 struct buf_2d dst;
1030 unsigned int uv_sad_g = 0;
1031 unsigned int uv_sad_alt = 0;
1032
1033 for (int plane = AOM_PLANE_U; plane < MAX_MB_PLANE; ++plane) {
1034 struct macroblock_plane *p = &x->plane[plane];
1035 struct macroblockd_plane *pd = &xd->plane[plane];
1036 const BLOCK_SIZE bs =
1037 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1038
1039 if (bs != BLOCK_INVALID) {
1040 // For last:
1041 if (zero_motion) {
1042 if (mi->ref_frame[0] == LAST_FRAME) {
1043 uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1044 p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride);
1045 } else {
1046 uint8_t *src = (plane == 1) ? yv12->u_buffer : yv12->v_buffer;
1047 setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width,
1048 yv12->uv_crop_height, yv12->uv_stride, xd->mi_row,
1049 xd->mi_col, sf, xd->plane[plane].subsampling_x,
1050 xd->plane[plane].subsampling_y);
1051
1052 uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1053 p->src.buf, p->src.stride, dst.buf, dst.stride);
1054 }
1055 } else {
1056 uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1057 p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride);
1058 }
1059
1060 // For golden:
1061 if (y_sad_g != UINT_MAX) {
1062 uint8_t *src = (plane == 1) ? yv12_g->u_buffer : yv12_g->v_buffer;
1063 setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width,
1064 yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row,
1065 xd->mi_col, sf, xd->plane[plane].subsampling_x,
1066 xd->plane[plane].subsampling_y);
1067 uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf,
1068 dst.stride);
1069 }
1070
1071 // For altref:
1072 if (y_sad_alt != UINT_MAX) {
1073 uint8_t *src = (plane == 1) ? yv12_alt->u_buffer : yv12_alt->v_buffer;
1074 setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_alt->uv_crop_width,
1075 yv12_alt->uv_crop_height, yv12_alt->uv_stride,
1076 xd->mi_row, xd->mi_col, sf,
1077 xd->plane[plane].subsampling_x,
1078 xd->plane[plane].subsampling_y);
1079 uv_sad_alt = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
1080 dst.buf, dst.stride);
1081 }
1082 }
1083
1084 if (uv_sad[plane - 1] > (y_sad >> shift_upper_limit))
1085 x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 1;
1086 else if (uv_sad[plane - 1] < (y_sad >> shift_lower_limit))
1087 x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 0;
1088 // Borderline case: to be refined at coding block level in nonrd_pickmode,
1089 // for coding block size < sb_size.
1090 else
1091 x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 2;
1092
1093 x->color_sensitivity_sb_g[COLOR_SENS_IDX(plane)] =
1094 uv_sad_g > y_sad_g / fac_uv;
1095 x->color_sensitivity_sb_alt[COLOR_SENS_IDX(plane)] =
1096 uv_sad_alt > y_sad_alt / fac_uv;
1097 }
1098 }
1099
fill_variance_tree_leaves(AV1_COMP * cpi,MACROBLOCK * x,VP128x128 * vt,PART_EVAL_STATUS * force_split,int avg_16x16[][4],int maxvar_16x16[][4],int minvar_16x16[][4],int64_t * thresholds,const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,bool is_key_frame,const bool is_small_sb)1100 static void fill_variance_tree_leaves(
1101 AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, PART_EVAL_STATUS *force_split,
1102 int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4],
1103 int64_t *thresholds, const uint8_t *src_buf, int src_stride,
1104 const uint8_t *dst_buf, int dst_stride, bool is_key_frame,
1105 const bool is_small_sb) {
1106 MACROBLOCKD *xd = &x->e_mbd;
1107 const int num_64x64_blocks = is_small_sb ? 1 : 4;
1108 // TODO(kyslov) Bring back compute_minmax_variance with content type detection
1109 const int compute_minmax_variance = 0;
1110 const int segment_id = xd->mi[0]->segment_id;
1111 int pixels_wide = 128, pixels_high = 128;
1112 int border_offset_4x4 = 0;
1113 int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf;
1114 // dst_buf pointer is not used for is_key_frame, so it should be NULL.
1115 assert(IMPLIES(is_key_frame, dst_buf == NULL));
1116 if (is_small_sb) {
1117 pixels_wide = 64;
1118 pixels_high = 64;
1119 }
1120 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
1121 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
1122 #if CONFIG_AV1_TEMPORAL_DENOISING
1123 temporal_denoising |= cpi->oxcf.noise_sensitivity;
1124 #endif
1125 // For temporal filtering or temporal denoiser enabled: since the source
1126 // is modified we need to avoid 4x4 avg along superblock boundary, since
1127 // simd code will load 8 pixels for 4x4 avg and so can access source
1128 // data outside superblock (while its being modified by temporal filter).
1129 // Temporal filtering is never done on key frames.
1130 if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4;
1131 for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; blk64_idx++) {
1132 const int x64_idx = GET_BLK_IDX_X(blk64_idx, 6);
1133 const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 6);
1134 const int blk64_scale_idx = blk64_idx << 2;
1135 force_split[blk64_idx + 1] = PART_EVAL_ALL;
1136
1137 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
1138 const int x32_idx = x64_idx + GET_BLK_IDX_X(lvl1_idx, 5);
1139 const int y32_idx = y64_idx + GET_BLK_IDX_Y(lvl1_idx, 5);
1140 const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1141 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ALL;
1142 avg_16x16[blk64_idx][lvl1_idx] = 0;
1143 maxvar_16x16[blk64_idx][lvl1_idx] = 0;
1144 minvar_16x16[blk64_idx][lvl1_idx] = INT_MAX;
1145 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
1146 const int x16_idx = x32_idx + GET_BLK_IDX_X(lvl2_idx, 4);
1147 const int y16_idx = y32_idx + GET_BLK_IDX_Y(lvl2_idx, 4);
1148 const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1149 VP16x16 *vst = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1150 force_split[split_index] = PART_EVAL_ALL;
1151 if (is_key_frame) {
1152 // Go down to 4x4 down-sampling for variance.
1153 for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
1154 const int x8_idx = x16_idx + GET_BLK_IDX_X(lvl3_idx, 3);
1155 const int y8_idx = y16_idx + GET_BLK_IDX_Y(lvl3_idx, 3);
1156 VP8x8 *vst2 = &vst->split[lvl3_idx];
1157 fill_variance_4x4avg(src_buf, src_stride, x8_idx, y8_idx, vst2,
1158 #if CONFIG_AV1_HIGHBITDEPTH
1159 xd->cur_buf->flags,
1160 #endif
1161 pixels_wide, pixels_high, border_offset_4x4);
1162 }
1163 } else {
1164 fill_variance_8x8avg(src_buf, src_stride, dst_buf, dst_stride,
1165 x16_idx, y16_idx, vst, is_cur_buf_hbd(xd),
1166 pixels_wide, pixels_high);
1167
1168 fill_variance_tree(vst, BLOCK_16X16);
1169 VPartVar *none_var = &vt->split[blk64_idx]
1170 .split[lvl1_idx]
1171 .split[lvl2_idx]
1172 .part_variances.none;
1173 get_variance(none_var);
1174 const int val_none_var = none_var->variance;
1175 avg_16x16[blk64_idx][lvl1_idx] += val_none_var;
1176 minvar_16x16[blk64_idx][lvl1_idx] =
1177 AOMMIN(minvar_16x16[blk64_idx][lvl1_idx], val_none_var);
1178 maxvar_16x16[blk64_idx][lvl1_idx] =
1179 AOMMAX(maxvar_16x16[blk64_idx][lvl1_idx], val_none_var);
1180 if (val_none_var > thresholds[3]) {
1181 // 16X16 variance is above threshold for split, so force split to
1182 // 8x8 for this 16x16 block (this also forces splits for upper
1183 // levels).
1184 force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1185 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1186 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1187 force_split[0] = PART_EVAL_ONLY_SPLIT;
1188 } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
1189 compute_minmax_variance && val_none_var > thresholds[2]) {
1190 // We have some nominal amount of 16x16 variance (based on average),
1191 // compute the minmax over the 8x8 sub-blocks, and if above
1192 // threshold, force split to 8x8 block for this 16x16 block.
1193 int minmax = compute_minmax_8x8(src_buf, src_stride, dst_buf,
1194 dst_stride, x16_idx, y16_idx,
1195 #if CONFIG_AV1_HIGHBITDEPTH
1196 xd->cur_buf->flags,
1197 #endif
1198 pixels_wide, pixels_high);
1199 const int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
1200 if (minmax > thresh_minmax) {
1201 force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1202 force_split[5 + blk64_scale_idx + lvl1_idx] =
1203 PART_EVAL_ONLY_SPLIT;
1204 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1205 force_split[0] = PART_EVAL_ONLY_SPLIT;
1206 }
1207 }
1208 }
1209 }
1210 }
1211 }
1212 }
1213
set_ref_frame_for_partition(AV1_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,MV_REFERENCE_FRAME * ref_frame_partition,MB_MODE_INFO * mi,unsigned int * y_sad,unsigned int * y_sad_g,unsigned int * y_sad_alt,const YV12_BUFFER_CONFIG * yv12_g,const YV12_BUFFER_CONFIG * yv12_alt,int mi_row,int mi_col,int num_planes)1214 static inline void set_ref_frame_for_partition(
1215 AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
1216 MV_REFERENCE_FRAME *ref_frame_partition, MB_MODE_INFO *mi,
1217 unsigned int *y_sad, unsigned int *y_sad_g, unsigned int *y_sad_alt,
1218 const YV12_BUFFER_CONFIG *yv12_g, const YV12_BUFFER_CONFIG *yv12_alt,
1219 int mi_row, int mi_col, int num_planes) {
1220 AV1_COMMON *const cm = &cpi->common;
1221 const bool is_set_golden_ref_frame =
1222 *y_sad_g < 0.9 * *y_sad && *y_sad_g < *y_sad_alt;
1223 const bool is_set_altref_ref_frame =
1224 *y_sad_alt < 0.9 * *y_sad && *y_sad_alt < *y_sad_g;
1225
1226 if (is_set_golden_ref_frame) {
1227 av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1228 get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
1229 mi->ref_frame[0] = GOLDEN_FRAME;
1230 mi->mv[0].as_int = 0;
1231 *y_sad = *y_sad_g;
1232 *ref_frame_partition = GOLDEN_FRAME;
1233 x->nonrd_prune_ref_frame_search = 0;
1234 x->sb_me_partition = 0;
1235 } else if (is_set_altref_ref_frame) {
1236 av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
1237 get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
1238 mi->ref_frame[0] = ALTREF_FRAME;
1239 mi->mv[0].as_int = 0;
1240 *y_sad = *y_sad_alt;
1241 *ref_frame_partition = ALTREF_FRAME;
1242 x->nonrd_prune_ref_frame_search = 0;
1243 x->sb_me_partition = 0;
1244 } else {
1245 *ref_frame_partition = LAST_FRAME;
1246 x->nonrd_prune_ref_frame_search =
1247 cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1248 }
1249 }
1250
mv_distance(const FULLPEL_MV * mv0,const FULLPEL_MV * mv1)1251 static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0,
1252 const FULLPEL_MV *mv1) {
1253 return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col);
1254 }
1255
evaluate_neighbour_mvs(AV1_COMP * cpi,MACROBLOCK * x,unsigned int * y_sad,bool is_small_sb,int est_motion)1256 static inline void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x,
1257 unsigned int *y_sad, bool is_small_sb,
1258 int est_motion) {
1259 const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
1260 // TODO([email protected]): test if this condition works with other
1261 // speeds.
1262 if (est_motion > 2 && source_sad_nonrd > kMedSad) return;
1263
1264 MACROBLOCKD *xd = &x->e_mbd;
1265 BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1266 MB_MODE_INFO *mi = xd->mi[0];
1267
1268 unsigned int above_y_sad = UINT_MAX;
1269 unsigned int left_y_sad = UINT_MAX;
1270 FULLPEL_MV above_mv = kZeroFullMv;
1271 FULLPEL_MV left_mv = kZeroFullMv;
1272 SubpelMvLimits subpel_mv_limits;
1273 const MV dummy_mv = { 0, 0 };
1274 av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, &dummy_mv);
1275
1276 // Current best MV
1277 FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv);
1278 const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8;
1279
1280 if (xd->up_available) {
1281 const MB_MODE_INFO *above_mbmi = xd->above_mbmi;
1282 if (above_mbmi->mode >= INTRA_MODE_END &&
1283 above_mbmi->ref_frame[0] == LAST_FRAME) {
1284 MV temp = above_mbmi->mv[0].as_mv;
1285 clamp_mv(&temp, &subpel_mv_limits);
1286 above_mv = get_fullmv_from_mv(&temp);
1287
1288 if (mv_distance(&best_mv, &above_mv) > 0) {
1289 uint8_t const *ref_buf =
1290 get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv);
1291 above_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1292 x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
1293 xd->plane[0].pre[0].stride);
1294 }
1295 }
1296 }
1297 if (xd->left_available) {
1298 const MB_MODE_INFO *left_mbmi = xd->left_mbmi;
1299 if (left_mbmi->mode >= INTRA_MODE_END &&
1300 left_mbmi->ref_frame[0] == LAST_FRAME) {
1301 MV temp = left_mbmi->mv[0].as_mv;
1302 clamp_mv(&temp, &subpel_mv_limits);
1303 left_mv = get_fullmv_from_mv(&temp);
1304
1305 if (mv_distance(&best_mv, &left_mv) > 0 &&
1306 mv_distance(&above_mv, &left_mv) > 0) {
1307 uint8_t const *ref_buf =
1308 get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv);
1309 left_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1310 x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
1311 xd->plane[0].pre[0].stride);
1312 }
1313 }
1314 }
1315
1316 if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) {
1317 *y_sad = above_y_sad;
1318 mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv);
1319 clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
1320 }
1321 if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) {
1322 *y_sad = left_y_sad;
1323 mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv);
1324 clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
1325 }
1326 }
1327
setup_planes(AV1_COMP * cpi,MACROBLOCK * x,unsigned int * y_sad,unsigned int * y_sad_g,unsigned int * y_sad_alt,unsigned int * y_sad_last,MV_REFERENCE_FRAME * ref_frame_partition,struct scale_factors * sf_no_scale,int mi_row,int mi_col,bool is_small_sb,bool scaled_ref_last)1328 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
1329 unsigned int *y_sad_g, unsigned int *y_sad_alt,
1330 unsigned int *y_sad_last,
1331 MV_REFERENCE_FRAME *ref_frame_partition,
1332 struct scale_factors *sf_no_scale, int mi_row,
1333 int mi_col, bool is_small_sb, bool scaled_ref_last) {
1334 AV1_COMMON *const cm = &cpi->common;
1335 MACROBLOCKD *xd = &x->e_mbd;
1336 const int num_planes = av1_num_planes(cm);
1337 bool scaled_ref_golden = false;
1338 bool scaled_ref_alt = false;
1339 BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1340 MB_MODE_INFO *mi = xd->mi[0];
1341 const YV12_BUFFER_CONFIG *yv12 =
1342 scaled_ref_last ? av1_get_scaled_ref_frame(cpi, LAST_FRAME)
1343 : get_ref_frame_yv12_buf(cm, LAST_FRAME);
1344 assert(yv12 != NULL);
1345 const YV12_BUFFER_CONFIG *yv12_g = NULL;
1346 const YV12_BUFFER_CONFIG *yv12_alt = NULL;
1347 // Check if LAST is a reference. For spatial layers always use it as
1348 // reference scaling.
1349 int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) ||
1350 cpi->svc.number_spatial_layers > 1;
1351 int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG;
1352 int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config ||
1353 cpi->sf.rt_sf.use_nonrd_altref_frame ||
1354 (cpi->sf.rt_sf.use_comp_ref_nonrd &&
1355 cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 1);
1356
1357 // For 1 spatial layer: GOLDEN is another temporal reference.
1358 // Check if it should be used as reference for partitioning.
1359 if (cpi->svc.number_spatial_layers == 1 && use_golden_ref &&
1360 (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1361 yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
1362 if (yv12_g && (yv12_g->y_crop_height != cm->height ||
1363 yv12_g->y_crop_width != cm->width)) {
1364 yv12_g = av1_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
1365 scaled_ref_golden = true;
1366 }
1367 if (yv12_g && yv12_g != yv12) {
1368 av1_setup_pre_planes(
1369 xd, 0, yv12_g, mi_row, mi_col,
1370 scaled_ref_golden ? NULL : get_ref_scale_factors(cm, GOLDEN_FRAME),
1371 num_planes);
1372 *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
1373 x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1374 xd->plane[AOM_PLANE_Y].pre[0].buf,
1375 xd->plane[AOM_PLANE_Y].pre[0].stride);
1376 }
1377 }
1378
1379 // For 1 spatial layer: ALTREF is another temporal reference.
1380 // Check if it should be used as reference for partitioning.
1381 if (cpi->svc.number_spatial_layers == 1 && use_alt_ref &&
1382 (cpi->ref_frame_flags & AOM_ALT_FLAG) &&
1383 (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1384 yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
1385 if (yv12_alt && (yv12_alt->y_crop_height != cm->height ||
1386 yv12_alt->y_crop_width != cm->width)) {
1387 yv12_alt = av1_get_scaled_ref_frame(cpi, ALTREF_FRAME);
1388 scaled_ref_alt = true;
1389 }
1390 if (yv12_alt && yv12_alt != yv12) {
1391 av1_setup_pre_planes(
1392 xd, 0, yv12_alt, mi_row, mi_col,
1393 scaled_ref_alt ? NULL : get_ref_scale_factors(cm, ALTREF_FRAME),
1394 num_planes);
1395 *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf(
1396 x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1397 xd->plane[AOM_PLANE_Y].pre[0].buf,
1398 xd->plane[AOM_PLANE_Y].pre[0].stride);
1399 }
1400 }
1401
1402 if (use_last_ref) {
1403 const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
1404 av1_setup_pre_planes(
1405 xd, 0, yv12, mi_row, mi_col,
1406 scaled_ref_last ? NULL : get_ref_scale_factors(cm, LAST_FRAME),
1407 num_planes);
1408 mi->ref_frame[0] = LAST_FRAME;
1409 mi->ref_frame[1] = NONE_FRAME;
1410 mi->bsize = cm->seq_params->sb_size;
1411 mi->mv[0].as_int = 0;
1412 mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
1413
1414 int est_motion = cpi->sf.rt_sf.estimate_motion_for_var_based_partition;
1415 // TODO(b/290596301): Look into adjusting this condition.
1416 // There is regression on color content when
1417 // estimate_motion_for_var_based_partition = 3 and high motion,
1418 // so for now force it to 2 based on superblock sad.
1419 if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2;
1420
1421 if (est_motion == 1 || est_motion == 2) {
1422 if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
1423 // For screen only do int_pro_motion for spatial variance above
1424 // threshold and motion level above LowSad.
1425 if (x->source_variance > 100 && source_sad_nonrd > kLowSad) {
1426 int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
1427 int me_search_size_col =
1428 is_screen ? 96 : block_size_wide[cm->seq_params->sb_size] >> 1;
1429 // For screen use larger search size row motion to capture
1430 // vertical scroll, which can be larger motion.
1431 int me_search_size_row =
1432 is_screen ? 192 : block_size_high[cm->seq_params->sb_size] >> 1;
1433 unsigned int y_sad_zero;
1434 *y_sad = av1_int_pro_motion_estimation(
1435 cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv,
1436 &y_sad_zero, me_search_size_col, me_search_size_row);
1437 // The logic below selects whether the motion estimated in the
1438 // int_pro_motion() will be used in nonrd_pickmode. Only do this
1439 // for screen for now.
1440 if (is_screen) {
1441 unsigned int thresh_sad =
1442 (cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000;
1443 if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) {
1444 x->sb_me_partition = 1;
1445 x->sb_me_mv.as_int = mi->mv[0].as_int;
1446 } else {
1447 x->sb_me_partition = 0;
1448 // Fall back to using zero motion.
1449 *y_sad = y_sad_zero;
1450 mi->mv[0].as_int = 0;
1451 }
1452 }
1453 }
1454 }
1455 }
1456
1457 if (*y_sad == UINT_MAX) {
1458 *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1459 x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1460 xd->plane[AOM_PLANE_Y].pre[0].buf,
1461 xd->plane[AOM_PLANE_Y].pre[0].stride);
1462 }
1463
1464 // Evaluate if neighbours' MVs give better predictions. Zero MV is tested
1465 // already, so only non-zero MVs are tested here. Here the neighbour blocks
1466 // are the first block above or left to this superblock.
1467 if (est_motion >= 2 && (xd->up_available || xd->left_available))
1468 evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion);
1469
1470 *y_sad_last = *y_sad;
1471 }
1472
1473 // Pick the ref frame for partitioning, use golden or altref frame only if
1474 // its lower sad, bias to LAST with factor 0.9.
1475 set_ref_frame_for_partition(cpi, x, xd, ref_frame_partition, mi, y_sad,
1476 y_sad_g, y_sad_alt, yv12_g, yv12_alt, mi_row,
1477 mi_col, num_planes);
1478
1479 // Only calculate the predictor for non-zero MV.
1480 if (mi->mv[0].as_int != 0) {
1481 if (!scaled_ref_last) {
1482 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1483 } else {
1484 xd->block_ref_scale_factors[0] = sf_no_scale;
1485 xd->block_ref_scale_factors[1] = sf_no_scale;
1486 }
1487 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
1488 cm->seq_params->sb_size, AOM_PLANE_Y,
1489 num_planes - 1);
1490 }
1491 }
1492
1493 // Decides whether to split or merge a 16x16 partition block in variance based
1494 // partitioning based on the 8x8 sub-block variances.
get_part_eval_based_on_sub_blk_var(VP16x16 * var_16x16_info,int64_t threshold16)1495 static inline PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
1496 VP16x16 *var_16x16_info, int64_t threshold16) {
1497 int max_8x8_var = 0, min_8x8_var = INT_MAX;
1498 for (int split_idx = 0; split_idx < 4; split_idx++) {
1499 get_variance(&var_16x16_info->split[split_idx].part_variances.none);
1500 int this_8x8_var =
1501 var_16x16_info->split[split_idx].part_variances.none.variance;
1502 max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var);
1503 min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var);
1504 }
1505 // If the difference between maximum and minimum sub-block variances is high,
1506 // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
1507 // only PARTITION_NONE. The shift factor for threshold16 has been derived
1508 // empirically.
1509 return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
1510 ? PART_EVAL_ONLY_SPLIT
1511 : PART_EVAL_ONLY_NONE;
1512 }
1513
is_set_force_zeromv_skip_based_on_src_sad(int set_zeromv_skip_based_on_source_sad,SOURCE_SAD source_sad_nonrd)1514 static inline bool is_set_force_zeromv_skip_based_on_src_sad(
1515 int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
1516 if (set_zeromv_skip_based_on_source_sad == 0) return false;
1517
1518 if (set_zeromv_skip_based_on_source_sad >= 3)
1519 return source_sad_nonrd <= kLowSad;
1520 else if (set_zeromv_skip_based_on_source_sad >= 2)
1521 return source_sad_nonrd <= kVeryLowSad;
1522 else if (set_zeromv_skip_based_on_source_sad >= 1)
1523 return source_sad_nonrd == kZeroSad;
1524
1525 return false;
1526 }
1527
set_force_zeromv_skip_for_sb(AV1_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,VP128x128 * vt,unsigned int * uv_sad,int mi_row,int mi_col,unsigned int y_sad,BLOCK_SIZE bsize)1528 static inline bool set_force_zeromv_skip_for_sb(
1529 AV1_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, VP128x128 *vt,
1530 unsigned int *uv_sad, int mi_row, int mi_col, unsigned int y_sad,
1531 BLOCK_SIZE bsize) {
1532 AV1_COMMON *const cm = &cpi->common;
1533 if (!is_set_force_zeromv_skip_based_on_src_sad(
1534 cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
1535 x->content_state_sb.source_sad_nonrd))
1536 return false;
1537 int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0;
1538 const int block_width = mi_size_wide[cm->seq_params->sb_size];
1539 const int block_height = mi_size_high[cm->seq_params->sb_size];
1540 const unsigned int thresh_exit_part_y =
1541 cpi->zeromv_skip_thresh_exit_part[bsize] << shift;
1542 unsigned int thresh_exit_part_uv =
1543 CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift;
1544 // Be more aggressive in UV threshold if source_sad >= VeryLowSad
1545 // to suppreess visual artifact caused by the speed feature:
1546 // set_zeromv_skip_based_on_source_sad = 2. For now only for
1547 // part_early_exit_zeromv = 1.
1548 if (x->content_state_sb.source_sad_nonrd >= kVeryLowSad &&
1549 cpi->sf.rt_sf.part_early_exit_zeromv == 1)
1550 thresh_exit_part_uv = thresh_exit_part_uv >> 3;
1551 if (mi_col + block_width <= tile->mi_col_end &&
1552 mi_row + block_height <= tile->mi_row_end && y_sad < thresh_exit_part_y &&
1553 uv_sad[0] < thresh_exit_part_uv && uv_sad[1] < thresh_exit_part_uv) {
1554 set_block_size(cpi, mi_row, mi_col, bsize);
1555 x->force_zeromv_skip_for_sb = 1;
1556 aom_free(vt);
1557 // Partition shape is set here at SB level.
1558 // Exit needs to happen from av1_choose_var_based_partitioning().
1559 return true;
1560 } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
1561 cpi->sf.rt_sf.part_early_exit_zeromv >= 2)
1562 x->force_zeromv_skip_for_sb = 2;
1563 return false;
1564 }
1565
av1_choose_var_based_partitioning(AV1_COMP * cpi,const TileInfo * const tile,ThreadData * td,MACROBLOCK * x,int mi_row,int mi_col)1566 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
1567 ThreadData *td, MACROBLOCK *x, int mi_row,
1568 int mi_col) {
1569 #if CONFIG_COLLECT_COMPONENT_TIMING
1570 start_timing(cpi, choose_var_based_partitioning_time);
1571 #endif
1572 AV1_COMMON *const cm = &cpi->common;
1573 MACROBLOCKD *xd = &x->e_mbd;
1574 const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds;
1575 PART_EVAL_STATUS force_split[85];
1576 int avg_64x64;
1577 int max_var_32x32[4];
1578 int min_var_32x32[4];
1579 int var_32x32;
1580 int var_64x64;
1581 int min_var_64x64 = INT_MAX;
1582 int max_var_64x64 = 0;
1583 int avg_16x16[4][4];
1584 int maxvar_16x16[4][4];
1585 int minvar_16x16[4][4];
1586 const uint8_t *src_buf;
1587 const uint8_t *dst_buf;
1588 int dst_stride;
1589 unsigned int uv_sad[MAX_MB_PLANE - 1];
1590 NOISE_LEVEL noise_level = kLow;
1591 bool is_zero_motion = true;
1592 bool scaled_ref_last = false;
1593 struct scale_factors sf_no_scale;
1594 av1_setup_scale_factors_for_frame(&sf_no_scale, cm->width, cm->height,
1595 cm->width, cm->height);
1596
1597 bool is_key_frame =
1598 (frame_is_intra_only(cm) ||
1599 (cpi->ppi->use_svc &&
1600 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
1601
1602 assert(cm->seq_params->sb_size == BLOCK_64X64 ||
1603 cm->seq_params->sb_size == BLOCK_128X128);
1604 const bool is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
1605 const int num_64x64_blocks = is_small_sb ? 1 : 4;
1606
1607 unsigned int y_sad = UINT_MAX;
1608 unsigned int y_sad_g = UINT_MAX;
1609 unsigned int y_sad_alt = UINT_MAX;
1610 unsigned int y_sad_last = UINT_MAX;
1611 BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1612
1613 // Force skip encoding for all superblocks on slide change for
1614 // non_reference_frames.
1615 if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
1616 cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
1617 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
1618 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
1619 av1_set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
1620 x->force_zeromv_skip_for_sb = 1;
1621 return 0;
1622 }
1623
1624 // Ref frame used in partitioning.
1625 MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
1626
1627 int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1],
1628 vbp_thresholds[2], vbp_thresholds[3],
1629 vbp_thresholds[4] };
1630
1631 const int segment_id = xd->mi[0]->segment_id;
1632 uint64_t blk_sad = 0;
1633 if (cpi->src_sad_blk_64x64 != NULL &&
1634 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
1635 const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
1636 ? (cm->seq_params->mib_size >> 1)
1637 : cm->seq_params->mib_size;
1638 const int sb_cols =
1639 (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
1640 const int sbi_col = mi_col / sb_size_by_mb;
1641 const int sbi_row = mi_row / sb_size_by_mb;
1642 blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
1643 }
1644
1645 const bool is_segment_id_boosted =
1646 cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
1647 cyclic_refresh_segment_id_boosted(segment_id);
1648 const int qindex =
1649 is_segment_id_boosted
1650 ? av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex)
1651 : cm->quant_params.base_qindex;
1652 set_vbp_thresholds(
1653 cpi, thresholds, blk_sad, qindex, x->content_state_sb.low_sumdiff,
1654 x->content_state_sb.source_sad_nonrd, x->content_state_sb.source_sad_rd,
1655 is_segment_id_boosted, x->content_state_sb.lighting_change);
1656
1657 src_buf = x->plane[AOM_PLANE_Y].src.buf;
1658 int src_stride = x->plane[AOM_PLANE_Y].src.stride;
1659
1660 // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
1661 // 5-20 for the 16x16 blocks.
1662 force_split[0] = PART_EVAL_ALL;
1663 memset(x->part_search_info.variance_low, 0,
1664 sizeof(x->part_search_info.variance_low));
1665
1666 // Check if LAST frame is NULL, and if so, treat this frame
1667 // as a key frame, for the purpose of the superblock partitioning.
1668 // LAST == NULL can happen in cases where enhancement spatial layers are
1669 // enabled dyanmically and the only reference is the spatial(GOLDEN).
1670 // If LAST frame has a different resolution: set the scaled_ref_last flag
1671 // and check if ref_scaled is NULL.
1672 if (!frame_is_intra_only(cm)) {
1673 const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, LAST_FRAME);
1674 if (ref == NULL) {
1675 is_key_frame = true;
1676 } else if (ref->y_crop_height != cm->height ||
1677 ref->y_crop_width != cm->width) {
1678 scaled_ref_last = true;
1679 const YV12_BUFFER_CONFIG *ref_scaled =
1680 av1_get_scaled_ref_frame(cpi, LAST_FRAME);
1681 if (ref_scaled == NULL) is_key_frame = true;
1682 }
1683 }
1684
1685 x->source_variance = UINT_MAX;
1686 // For nord_pickmode: compute source_variance, only for superblocks with
1687 // some motion for now. This input can then be used to bias the partitioning
1688 // or the chroma_check.
1689 if (cpi->sf.rt_sf.use_nonrd_pick_mode &&
1690 x->content_state_sb.source_sad_nonrd > kLowSad)
1691 x->source_variance = av1_get_perpixel_variance_facade(
1692 cpi, xd, &x->plane[0].src, cm->seq_params->sb_size, AOM_PLANE_Y);
1693
1694 if (!is_key_frame) {
1695 setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last,
1696 &ref_frame_partition, &sf_no_scale, mi_row, mi_col,
1697 is_small_sb, scaled_ref_last);
1698
1699 MB_MODE_INFO *mi = xd->mi[0];
1700 // Use reference SB directly for zero mv.
1701 if (mi->mv[0].as_int != 0) {
1702 dst_buf = xd->plane[AOM_PLANE_Y].dst.buf;
1703 dst_stride = xd->plane[AOM_PLANE_Y].dst.stride;
1704 is_zero_motion = false;
1705 } else {
1706 dst_buf = xd->plane[AOM_PLANE_Y].pre[0].buf;
1707 dst_stride = xd->plane[AOM_PLANE_Y].pre[0].stride;
1708 }
1709 } else {
1710 dst_buf = NULL;
1711 dst_stride = 0;
1712 }
1713
1714 // check and set the color sensitivity of sb.
1715 av1_zero(uv_sad);
1716 chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, y_sad_alt, is_key_frame,
1717 is_zero_motion, uv_sad);
1718
1719 x->force_zeromv_skip_for_sb = 0;
1720
1721 VP128x128 *vt;
1722 AOM_CHECK_MEM_ERROR(xd->error_info, vt, aom_malloc(sizeof(*vt)));
1723 vt->split = td->vt64x64;
1724
1725 // If the superblock is completely static (zero source sad) and
1726 // the y_sad (relative to LAST ref) is very small, take the sb_size partition
1727 // and exit, and force zeromv_last skip mode for nonrd_pickmode.
1728 // Only do this on the base segment (so the QP-boosted segment, if applied,
1729 // can still continue cleaning/ramping up the quality).
1730 // Condition on color uv_sad is also added.
1731 if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv &&
1732 cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE &&
1733 ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) {
1734 // Exit here, if zero mv skip flag is set at SB level.
1735 if (set_force_zeromv_skip_for_sb(cpi, x, tile, vt, uv_sad, mi_row, mi_col,
1736 y_sad, bsize))
1737 return 0;
1738 }
1739
1740 if (cpi->noise_estimate.enabled)
1741 noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
1742
1743 // Fill in the entire tree of 8x8 (for inter frames) or 4x4 (for key frames)
1744 // variances for splits.
1745 fill_variance_tree_leaves(cpi, x, vt, force_split, avg_16x16, maxvar_16x16,
1746 minvar_16x16, thresholds, src_buf, src_stride,
1747 dst_buf, dst_stride, is_key_frame, is_small_sb);
1748
1749 avg_64x64 = 0;
1750 for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
1751 max_var_32x32[blk64_idx] = 0;
1752 min_var_32x32[blk64_idx] = INT_MAX;
1753 const int blk64_scale_idx = blk64_idx << 2;
1754 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
1755 const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1756 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
1757 if (!is_key_frame) continue;
1758 VP16x16 *vtemp = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1759 for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++)
1760 fill_variance_tree(&vtemp->split[lvl3_idx], BLOCK_8X8);
1761 fill_variance_tree(vtemp, BLOCK_16X16);
1762 // If variance of this 16x16 block is above the threshold, force block
1763 // to split. This also forces a split on the upper levels.
1764 get_variance(&vtemp->part_variances.none);
1765 if (vtemp->part_variances.none.variance > thresholds[3]) {
1766 const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1767 force_split[split_index] =
1768 cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
1769 ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
1770 : PART_EVAL_ONLY_SPLIT;
1771 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1772 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1773 force_split[0] = PART_EVAL_ONLY_SPLIT;
1774 }
1775 }
1776 fill_variance_tree(&vt->split[blk64_idx].split[lvl1_idx], BLOCK_32X32);
1777 // If variance of this 32x32 block is above the threshold, or if its above
1778 // (some threshold of) the average variance over the sub-16x16 blocks,
1779 // then force this block to split. This also forces a split on the upper
1780 // (64x64) level.
1781 uint64_t frame_sad_thresh = 20000;
1782 const int is_360p_or_smaller = cm->width * cm->height <= RESOLUTION_360P;
1783 if (cpi->svc.number_temporal_layers > 2 &&
1784 cpi->svc.temporal_layer_id == 0)
1785 frame_sad_thresh = frame_sad_thresh << 1;
1786 if (force_split[5 + blk64_scale_idx + lvl1_idx] == PART_EVAL_ALL) {
1787 get_variance(&vt->split[blk64_idx].split[lvl1_idx].part_variances.none);
1788 var_32x32 =
1789 vt->split[blk64_idx].split[lvl1_idx].part_variances.none.variance;
1790 max_var_32x32[blk64_idx] = AOMMAX(var_32x32, max_var_32x32[blk64_idx]);
1791 min_var_32x32[blk64_idx] = AOMMIN(var_32x32, min_var_32x32[blk64_idx]);
1792 const int max_min_var_16X16_diff = (maxvar_16x16[blk64_idx][lvl1_idx] -
1793 minvar_16x16[blk64_idx][lvl1_idx]);
1794
1795 if (var_32x32 > thresholds[2] ||
1796 (!is_key_frame && var_32x32 > (thresholds[2] >> 1) &&
1797 var_32x32 > (avg_16x16[blk64_idx][lvl1_idx] >> 1))) {
1798 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1799 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1800 force_split[0] = PART_EVAL_ONLY_SPLIT;
1801 } else if (!is_key_frame && is_360p_or_smaller &&
1802 ((max_min_var_16X16_diff > (thresholds[2] >> 1) &&
1803 maxvar_16x16[blk64_idx][lvl1_idx] > thresholds[2]) ||
1804 (cpi->sf.rt_sf.prefer_large_partition_blocks &&
1805 x->content_state_sb.source_sad_nonrd > kLowSad &&
1806 cpi->rc.frame_source_sad < frame_sad_thresh &&
1807 maxvar_16x16[blk64_idx][lvl1_idx] > (thresholds[2] >> 4) &&
1808 maxvar_16x16[blk64_idx][lvl1_idx] >
1809 (minvar_16x16[blk64_idx][lvl1_idx] << 2)))) {
1810 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1811 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1812 force_split[0] = PART_EVAL_ONLY_SPLIT;
1813 }
1814 }
1815 }
1816 if (force_split[1 + blk64_idx] == PART_EVAL_ALL) {
1817 fill_variance_tree(&vt->split[blk64_idx], BLOCK_64X64);
1818 get_variance(&vt->split[blk64_idx].part_variances.none);
1819 var_64x64 = vt->split[blk64_idx].part_variances.none.variance;
1820 max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
1821 min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
1822 // If the difference of the max-min variances of sub-blocks or max
1823 // variance of a sub-block is above some threshold of then force this
1824 // block to split. Only checking this for noise level >= medium, if
1825 // encoder is in SVC or if we already forced large blocks.
1826 const int max_min_var_32x32_diff =
1827 max_var_32x32[blk64_idx] - min_var_32x32[blk64_idx];
1828 const int check_max_var = max_var_32x32[blk64_idx] > thresholds[1] >> 1;
1829 const bool check_noise_lvl = noise_level >= kMedium ||
1830 cpi->ppi->use_svc ||
1831 cpi->sf.rt_sf.prefer_large_partition_blocks;
1832 const int64_t set_threshold = 3 * (thresholds[1] >> 3);
1833
1834 if (!is_key_frame && max_min_var_32x32_diff > set_threshold &&
1835 check_max_var && check_noise_lvl) {
1836 force_split[1 + blk64_idx] = PART_EVAL_ONLY_SPLIT;
1837 force_split[0] = PART_EVAL_ONLY_SPLIT;
1838 }
1839 avg_64x64 += var_64x64;
1840 }
1841 if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT;
1842 }
1843
1844 if (force_split[0] == PART_EVAL_ALL) {
1845 fill_variance_tree(vt, BLOCK_128X128);
1846 get_variance(&vt->part_variances.none);
1847 const int set_avg_64x64 = (9 * avg_64x64) >> 5;
1848 if (!is_key_frame && vt->part_variances.none.variance > set_avg_64x64)
1849 force_split[0] = PART_EVAL_ONLY_SPLIT;
1850
1851 if (!is_key_frame &&
1852 (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
1853 max_var_64x64 > thresholds[0] >> 1)
1854 force_split[0] = PART_EVAL_ONLY_SPLIT;
1855 }
1856
1857 if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
1858 !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
1859 thresholds[0], BLOCK_16X16, force_split[0])) {
1860 for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
1861 const int x64_idx = GET_BLK_IDX_X(blk64_idx, 4);
1862 const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 4);
1863 const int blk64_scale_idx = blk64_idx << 2;
1864
1865 // Now go through the entire structure, splitting every block size until
1866 // we get to one that's got a variance lower than our threshold.
1867 if (set_vt_partitioning(cpi, xd, tile, &vt->split[blk64_idx], BLOCK_64X64,
1868 mi_row + y64_idx, mi_col + x64_idx, thresholds[1],
1869 BLOCK_16X16, force_split[1 + blk64_idx]))
1870 continue;
1871 for (int lvl1_idx = 0; lvl1_idx < 4; ++lvl1_idx) {
1872 const int x32_idx = GET_BLK_IDX_X(lvl1_idx, 3);
1873 const int y32_idx = GET_BLK_IDX_Y(lvl1_idx, 3);
1874 const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1875 if (set_vt_partitioning(
1876 cpi, xd, tile, &vt->split[blk64_idx].split[lvl1_idx],
1877 BLOCK_32X32, (mi_row + y64_idx + y32_idx),
1878 (mi_col + x64_idx + x32_idx), thresholds[2], BLOCK_16X16,
1879 force_split[5 + blk64_scale_idx + lvl1_idx]))
1880 continue;
1881 for (int lvl2_idx = 0; lvl2_idx < 4; ++lvl2_idx) {
1882 const int x16_idx = GET_BLK_IDX_X(lvl2_idx, 2);
1883 const int y16_idx = GET_BLK_IDX_Y(lvl2_idx, 2);
1884 const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1885 VP16x16 *vtemp =
1886 &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1887 if (set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16,
1888 mi_row + y64_idx + y32_idx + y16_idx,
1889 mi_col + x64_idx + x32_idx + x16_idx,
1890 thresholds[3], BLOCK_8X8,
1891 force_split[split_index]))
1892 continue;
1893 for (int lvl3_idx = 0; lvl3_idx < 4; ++lvl3_idx) {
1894 const int x8_idx = GET_BLK_IDX_X(lvl3_idx, 1);
1895 const int y8_idx = GET_BLK_IDX_Y(lvl3_idx, 1);
1896 set_block_size(cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
1897 (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
1898 BLOCK_8X8);
1899 }
1900 }
1901 }
1902 }
1903 }
1904
1905 if (cpi->sf.rt_sf.short_circuit_low_temp_var) {
1906 set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
1907 ref_frame_partition, mi_col, mi_row, is_small_sb);
1908 }
1909
1910 aom_free(vt);
1911 #if CONFIG_COLLECT_COMPONENT_TIMING
1912 end_timing(cpi, choose_var_based_partitioning_time);
1913 #endif
1914 return 0;
1915 }
1916