1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <stdbool.h>
14 #include <string.h>
15
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/aom_scale_rtcd.h"
18
19 #include "aom/aom_integer.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/encoder/encoder.h"
23 #include "av1/encoder/ethread.h"
24 #include "av1/encoder/pickcdef.h"
25 #include "av1/encoder/mcomp.h"
26
27 // Get primary and secondary filter strength for the given strength index and
28 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)29 static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
30 int *pri_strength,
31 int *sec_strength,
32 int strength_idx) {
33 const int tot_sec_filter =
34 (pick_method == CDEF_FAST_SEARCH_LVL5)
35 ? REDUCED_SEC_STRENGTHS_LVL5
36 : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
37 : CDEF_SEC_STRENGTHS);
38 const int pri_idx = strength_idx / tot_sec_filter;
39 const int sec_idx = strength_idx % tot_sec_filter;
40 *pri_strength = pri_idx;
41 *sec_strength = sec_idx;
42 if (pick_method == CDEF_FULL_SEARCH) return;
43
44 switch (pick_method) {
45 case CDEF_FAST_SEARCH_LVL1:
46 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
47 *pri_strength = priconv_lvl1[pri_idx];
48 break;
49 case CDEF_FAST_SEARCH_LVL2:
50 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
51 *pri_strength = priconv_lvl2[pri_idx];
52 break;
53 case CDEF_FAST_SEARCH_LVL3:
54 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
55 assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
56 *pri_strength = priconv_lvl2[pri_idx];
57 *sec_strength = secconv_lvl3[sec_idx];
58 break;
59 case CDEF_FAST_SEARCH_LVL4:
60 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
61 assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
62 *pri_strength = priconv_lvl4[pri_idx];
63 *sec_strength = secconv_lvl3[sec_idx];
64 break;
65 case CDEF_FAST_SEARCH_LVL5:
66 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
67 assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
68 *pri_strength = priconv_lvl5[pri_idx];
69 *sec_strength = secconv_lvl5[sec_idx];
70 break;
71 default: assert(0 && "Invalid CDEF search method");
72 }
73 }
74
75 // Store CDEF filter strength calculated from strength index for given search
76 // method
77 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
78 do { \
79 get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \
80 (strength_idx)); \
81 cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength; \
82 } while (0)
83
84 /* Search for the best strength to add as an option, knowing we
85 already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)86 static uint64_t search_one(int *lev, int nb_strengths,
87 uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
88 CDEF_PICK_METHOD pick_method) {
89 uint64_t tot_mse[TOTAL_STRENGTHS];
90 const int total_strengths = nb_cdef_strengths[pick_method];
91 int i, j;
92 uint64_t best_tot_mse = (uint64_t)1 << 63;
93 int best_id = 0;
94 memset(tot_mse, 0, sizeof(tot_mse));
95 for (i = 0; i < sb_count; i++) {
96 int gi;
97 uint64_t best_mse = (uint64_t)1 << 63;
98 /* Find best mse among already selected options. */
99 for (gi = 0; gi < nb_strengths; gi++) {
100 if (mse[i][lev[gi]] < best_mse) {
101 best_mse = mse[i][lev[gi]];
102 }
103 }
104 /* Find best mse when adding each possible new option. */
105 for (j = 0; j < total_strengths; j++) {
106 uint64_t best = best_mse;
107 if (mse[i][j] < best) best = mse[i][j];
108 tot_mse[j] += best;
109 }
110 }
111 for (j = 0; j < total_strengths; j++) {
112 if (tot_mse[j] < best_tot_mse) {
113 best_tot_mse = tot_mse[j];
114 best_id = j;
115 }
116 }
117 lev[nb_strengths] = best_id;
118 return best_tot_mse;
119 }
120
121 /* Search for the best luma+chroma strength to add as an option, knowing we
122 already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)123 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
124 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
125 CDEF_PICK_METHOD pick_method) {
126 uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
127 int i, j;
128 uint64_t best_tot_mse = (uint64_t)1 << 63;
129 int best_id0 = 0;
130 int best_id1 = 0;
131 const int total_strengths = nb_cdef_strengths[pick_method];
132 memset(tot_mse, 0, sizeof(tot_mse));
133 for (i = 0; i < sb_count; i++) {
134 int gi;
135 uint64_t best_mse = (uint64_t)1 << 63;
136 /* Find best mse among already selected options. */
137 for (gi = 0; gi < nb_strengths; gi++) {
138 uint64_t curr = mse[0][i][lev0[gi]];
139 curr += mse[1][i][lev1[gi]];
140 if (curr < best_mse) {
141 best_mse = curr;
142 }
143 }
144 /* Find best mse when adding each possible new option. */
145 for (j = 0; j < total_strengths; j++) {
146 int k;
147 for (k = 0; k < total_strengths; k++) {
148 uint64_t best = best_mse;
149 uint64_t curr = mse[0][i][j];
150 curr += mse[1][i][k];
151 if (curr < best) best = curr;
152 tot_mse[j][k] += best;
153 }
154 }
155 }
156 for (j = 0; j < total_strengths; j++) {
157 int k;
158 for (k = 0; k < total_strengths; k++) {
159 if (tot_mse[j][k] < best_tot_mse) {
160 best_tot_mse = tot_mse[j][k];
161 best_id0 = j;
162 best_id1 = k;
163 }
164 }
165 }
166 lev0[nb_strengths] = best_id0;
167 lev1[nb_strengths] = best_id1;
168 return best_tot_mse;
169 }
170
171 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)172 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
173 uint64_t mse[][TOTAL_STRENGTHS],
174 int sb_count,
175 CDEF_PICK_METHOD pick_method) {
176 uint64_t best_tot_mse;
177 int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
178 pick_method <= CDEF_FAST_SEARCH_LVL5);
179 int i;
180 best_tot_mse = (uint64_t)1 << 63;
181 /* Greedy search: add one strength options at a time. */
182 for (i = 0; i < nb_strengths; i++) {
183 best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
184 }
185 /* Trying to refine the greedy search by reconsidering each
186 already-selected option. */
187 if (!fast) {
188 for (i = 0; i < 4 * nb_strengths; i++) {
189 int j;
190 for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
191 best_tot_mse =
192 search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
193 }
194 }
195 return best_tot_mse;
196 }
197
198 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)199 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
200 int nb_strengths,
201 uint64_t (**mse)[TOTAL_STRENGTHS],
202 int sb_count,
203 CDEF_PICK_METHOD pick_method) {
204 uint64_t best_tot_mse;
205 int i;
206 best_tot_mse = (uint64_t)1 << 63;
207 /* Greedy search: add one strength options at a time. */
208 for (i = 0; i < nb_strengths; i++) {
209 best_tot_mse =
210 search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
211 }
212 /* Trying to refine the greedy search by reconsidering each
213 already-selected option. */
214 for (i = 0; i < 4 * nb_strengths; i++) {
215 int j;
216 for (j = 0; j < nb_strengths - 1; j++) {
217 best_lev0[j] = best_lev0[j + 1];
218 best_lev1[j] = best_lev1[j + 1];
219 }
220 best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
221 sb_count, pick_method);
222 }
223 return best_tot_mse;
224 }
225
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)226 static inline void init_src_params(int *src_stride, int *width, int *height,
227 int *width_log2, int *height_log2,
228 BLOCK_SIZE bsize) {
229 *src_stride = block_size_wide[bsize];
230 *width = block_size_wide[bsize];
231 *height = block_size_high[bsize];
232 *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
233 *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
234 }
235 #if CONFIG_AV1_HIGHBITDEPTH
236 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)237 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
238 cdef_list *dlist, int cdef_count,
239 BLOCK_SIZE bsize, int coeff_shift,
240 int row, int col) {
241 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
242 bsize == BLOCK_8X8);
243 uint64_t sum = 0;
244 int bi, bx, by;
245 uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
246 uint16_t *dst_buff = &dst16[row * dstride + col];
247 int src_stride, width, height, width_log2, height_log2;
248 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
249 bsize);
250 for (bi = 0; bi < cdef_count; bi++) {
251 by = dlist[bi].by;
252 bx = dlist[bi].bx;
253 sum += aom_mse_wxh_16bit_highbd(
254 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
255 &src[bi << (height_log2 + width_log2)], src_stride, width, height);
256 }
257 return sum >> 2 * coeff_shift;
258 }
259 #endif
260
261 // Checks dual and quad block processing is applicable for block widths 8 and 4
262 // respectively.
is_dual_or_quad_applicable(cdef_list * dlist,int width,int cdef_count,int bi,int iter)263 static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width,
264 int cdef_count, int bi, int iter) {
265 assert(width == 8 || width == 4);
266 const int blk_offset = (width == 8) ? 1 : 3;
267 if ((iter + blk_offset) >= cdef_count) return 0;
268
269 if (dlist[bi].by == dlist[bi + blk_offset].by &&
270 dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
271 return 1;
272
273 return 0;
274 }
275
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)276 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
277 cdef_list *dlist, int cdef_count,
278 BLOCK_SIZE bsize, int coeff_shift, int row,
279 int col) {
280 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
281 bsize == BLOCK_8X8);
282 uint64_t sum = 0;
283 int bi, bx, by;
284 int iter = 0;
285 int inc = 1;
286 uint8_t *dst8 = (uint8_t *)dst;
287 uint8_t *dst_buff = &dst8[row * dstride + col];
288 int src_stride, width, height, width_log2, height_log2;
289 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
290 bsize);
291
292 const int num_blks = 16 / width;
293 for (bi = 0; bi < cdef_count; bi += inc) {
294 by = dlist[bi].by;
295 bx = dlist[bi].bx;
296 uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
297 uint8_t *dst_tmp =
298 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
299
300 if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
301 sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
302 iter += num_blks;
303 inc = num_blks;
304 } else {
305 sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
306 height);
307 iter += 1;
308 inc = 1;
309 }
310 }
311
312 return sum >> 2 * coeff_shift;
313 }
314
315 // Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
316 // region is outside frame boundary
fill_borders_for_fbs_on_frame_boundary(uint16_t * inbuf,int hfilt_size,int vfilt_size,bool is_fb_on_frm_left_boundary,bool is_fb_on_frm_right_boundary,bool is_fb_on_frm_top_boundary,bool is_fb_on_frm_bottom_boundary)317 static inline void fill_borders_for_fbs_on_frame_boundary(
318 uint16_t *inbuf, int hfilt_size, int vfilt_size,
319 bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
320 bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
321 if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
322 !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
323 return;
324 if (is_fb_on_frm_bottom_boundary) {
325 // Fill bottom region of the block
326 const int buf_offset =
327 (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
328 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
329 CDEF_VERY_LARGE);
330 }
331 if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
332 const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
333 // Fill bottom-left region of the block
334 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
335 CDEF_VERY_LARGE);
336 }
337 if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
338 const int buf_offset =
339 (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
340 // Fill bottom-right region of the block
341 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
342 CDEF_VERY_LARGE);
343 }
344 if (is_fb_on_frm_top_boundary) {
345 // Fill top region of the block
346 fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
347 CDEF_VERY_LARGE);
348 }
349 if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
350 // Fill top-left region of the block
351 fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
352 }
353 if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
354 const int buf_offset = hfilt_size + CDEF_HBORDER;
355 // Fill top-right region of the block
356 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
357 CDEF_VERY_LARGE);
358 }
359 if (is_fb_on_frm_left_boundary) {
360 const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
361 // Fill left region of the block
362 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
363 CDEF_VERY_LARGE);
364 }
365 if (is_fb_on_frm_right_boundary) {
366 const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
367 // Fill right region of the block
368 fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
369 vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
370 }
371 }
372
373 // Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
374 // after CDEF filtering in single function call
get_error_calc_width_in_filt_units(cdef_list * dlist,int cdef_count,int bi,int subsampling_x,int subsampling_y)375 static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
376 cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
377 int subsampling_y) {
378 // TODO(Ranjit): Extend the optimization for 422
379 if (subsampling_x != subsampling_y) return 1;
380
381 // Combining more blocks seems to increase encode time due to increase in
382 // control code
383 if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
384 dlist[bi].bx + 3 == dlist[bi + 3].bx) {
385 /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
386 * logic if y co-ordinates match and x co-ordinates are
387 * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
388 return 4;
389 }
390 if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
391 dlist[bi].bx + 1 == dlist[bi + 1].bx) {
392 /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
393 * logic if their y co-ordinates match and x co-ordinates are
394 * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
395 return 2;
396 }
397 return 1;
398 }
399
400 // Returns the block error after CDEF filtering for a given strength
get_filt_error(const CdefSearchCtx * cdef_search_ctx,const struct macroblockd_plane * pd,cdef_list * dlist,int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],int * dirinit,int var[CDEF_NBLOCKS][CDEF_NBLOCKS],uint16_t * in,uint8_t * ref_buffer,int ref_stride,int row,int col,int pri_strength,int sec_strength,int cdef_count,int pli,int coeff_shift,BLOCK_SIZE bs)401 static inline uint64_t get_filt_error(
402 const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
403 cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
404 int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
405 int ref_stride, int row, int col, int pri_strength, int sec_strength,
406 int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
407 uint64_t curr_sse = 0;
408 const BLOCK_SIZE plane_bsize =
409 get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
410 const int bw_log2 = 3 - pd->subsampling_x;
411 const int bh_log2 = 3 - pd->subsampling_y;
412
413 // TODO(Ranjit): Extend this optimization for HBD
414 if (!cdef_search_ctx->use_highbitdepth) {
415 // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
416 // error at CDEF block level
417 const int tot_blk_count =
418 (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
419 (bw_log2 + bh_log2);
420 if (cdef_count == tot_blk_count) {
421 // Calculate the offset in the buffer based on block position
422 const FULLPEL_MV this_mv = { row, col };
423 const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
424 if (pri_strength == 0 && sec_strength == 0) {
425 // When CDEF strength is zero, filtering is not applied. Hence
426 // error is calculated between source and unfiltered pixels
427 curr_sse =
428 aom_sse(&ref_buffer[buf_offset], ref_stride,
429 get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
430 block_size_wide[plane_bsize], block_size_high[plane_bsize]);
431 } else {
432 DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
433
434 av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
435 cdef_search_ctx->xdec[pli],
436 cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
437 dlist, cdef_count, pri_strength,
438 sec_strength + (sec_strength == 3),
439 cdef_search_ctx->damping, coeff_shift);
440 curr_sse =
441 aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
442 (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
443 block_size_high[plane_bsize]);
444 }
445 } else {
446 // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
447 // functions produce 8-bit output and the error is calculated in 8-bit
448 // domain
449 if (pri_strength == 0 && sec_strength == 0) {
450 int num_error_calc_filt_units = 1;
451 for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
452 const uint8_t by = dlist[bi].by;
453 const uint8_t bx = dlist[bi].bx;
454 const int16_t by_pos = (by << bh_log2);
455 const int16_t bx_pos = (bx << bw_log2);
456 // Calculate the offset in the buffer based on block position
457 const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
458 const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
459 num_error_calc_filt_units = get_error_calc_width_in_filt_units(
460 dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
461 curr_sse += aom_sse(
462 &ref_buffer[buf_offset], ref_stride,
463 get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
464 num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
465 }
466 } else {
467 DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
468 av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
469 cdef_search_ctx->xdec[pli],
470 cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
471 dlist, cdef_count, pri_strength,
472 sec_strength + (sec_strength == 3),
473 cdef_search_ctx->damping, coeff_shift);
474 int num_error_calc_filt_units = 1;
475 for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
476 const uint8_t by = dlist[bi].by;
477 const uint8_t bx = dlist[bi].bx;
478 const int16_t by_pos = (by << bh_log2);
479 const int16_t bx_pos = (bx << bw_log2);
480 // Calculate the offset in the buffer based on block position
481 const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
482 const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
483 const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
484 const int tmp_buf_offset =
485 get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
486 num_error_calc_filt_units = get_error_calc_width_in_filt_units(
487 dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
488 curr_sse += aom_sse(
489 &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
490 (1 << MAX_SB_SIZE_LOG2),
491 num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
492 }
493 }
494 }
495 } else {
496 DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
497
498 av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
499 cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
500 dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
501 sec_strength + (sec_strength == 3),
502 cdef_search_ctx->damping, coeff_shift);
503 curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
504 ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
505 cdef_search_ctx->bsize[pli], coeff_shift, row, col);
506 }
507 return curr_sse;
508 }
509
510 // Calculates MSE at block level.
511 // Inputs:
512 // cdef_search_ctx: Pointer to the structure containing parameters related to
513 // CDEF search context.
514 // fbr: Row index in units of 64x64 block
515 // fbc: Column index in units of 64x64 block
516 // Returns:
517 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,struct aom_internal_error_info * error_info,int fbr,int fbc,int sb_count)518 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
519 struct aom_internal_error_info *error_info,
520 int fbr, int fbc, int sb_count) {
521 // TODO(aomedia:3276): Pass error_info to the low-level functions as required
522 // in future to handle error propagation.
523 (void)error_info;
524 const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
525 const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
526 const int coeff_shift = cdef_search_ctx->coeff_shift;
527 const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
528 const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
529
530 // Declare and initialize the temporary buffers.
531 DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
532 cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
533 int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
534 int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
535 uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
536 int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
537 int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
538 int hb_step = 1, vb_step = 1;
539 BLOCK_SIZE bs;
540
541 const MB_MODE_INFO *const mbmi =
542 mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
543 MI_SIZE_64X64 * fbc];
544
545 uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
546 ref->v_buffer };
547 int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
548 ref->uv_stride };
549
550 if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
551 mbmi->bsize == BLOCK_64X128) {
552 bs = mbmi->bsize;
553 if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
554 nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
555 hb_step = 2;
556 }
557 if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
558 nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
559 vb_step = 2;
560 }
561 } else {
562 bs = BLOCK_64X64;
563 }
564 // Get number of 8x8 blocks which are not skip. Cdef processing happens for
565 // 8x8 blocks which are not skip.
566 const int cdef_count = av1_cdef_compute_sb_list(
567 mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
568 const bool is_fb_on_frm_left_boundary = (fbc == 0);
569 const bool is_fb_on_frm_right_boundary =
570 (fbc + hb_step == cdef_search_ctx->nhfb);
571 const bool is_fb_on_frm_top_boundary = (fbr == 0);
572 const bool is_fb_on_frm_bottom_boundary =
573 (fbr + vb_step == cdef_search_ctx->nvfb);
574 const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
575 const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
576 int dirinit = 0;
577 for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
578 /* We avoid filtering the pixels for which some of the pixels to
579 average are outside the frame. We could change the filter instead,
580 but it would add special cases for any future vectorization. */
581 const int hfilt_size = (nhb << mi_wide_l2[pli]);
582 const int vfilt_size = (nvb << mi_high_l2[pli]);
583 const int ysize =
584 vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
585 const int xsize =
586 hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
587 const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
588 const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
589 struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
590 cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
591 pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
592 ysize, xsize);
593 fill_borders_for_fbs_on_frame_boundary(
594 inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
595 is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
596 is_fb_on_frm_bottom_boundary);
597 for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
598 int pri_strength, sec_strength;
599 get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
600 &sec_strength, gi);
601 const uint64_t curr_mse = get_filt_error(
602 cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
603 ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
604 pli, coeff_shift, bs);
605 if (pli < 2)
606 cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
607 else
608 cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
609 }
610 }
611 cdef_search_ctx->sb_index[sb_count] =
612 MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
613 }
614
615 // MSE calculation at frame level.
616 // Inputs:
617 // cdef_search_ctx: Pointer to the structure containing parameters related to
618 // CDEF search context.
619 // Returns:
620 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx,struct aom_internal_error_info * error_info)621 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
622 struct aom_internal_error_info *error_info) {
623 // Loop over each sb.
624 for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
625 for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
626 // Checks if cdef processing can be skipped for particular sb.
627 if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
628 // Calculate mse for each sb and store the relevant sb index.
629 av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
630 cdef_search_ctx->sb_count);
631 cdef_search_ctx->sb_count++;
632 }
633 }
634 }
635
636 // Allocates memory for members of CdefSearchCtx.
637 // Inputs:
638 // cdef_search_ctx: Pointer to the structure containing parameters
639 // related to CDEF search context.
640 // Returns:
641 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(AV1_COMMON * cm,CdefSearchCtx * cdef_search_ctx)642 static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) {
643 const int nvfb = cdef_search_ctx->nvfb;
644 const int nhfb = cdef_search_ctx->nhfb;
645 CHECK_MEM_ERROR(
646 cm, cdef_search_ctx->sb_index,
647 aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0])));
648 cdef_search_ctx->sb_count = 0;
649 CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0],
650 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
651 CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1],
652 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
653 }
654
655 // Deallocates the memory allocated for members of CdefSearchCtx.
656 // Inputs:
657 // cdef_search_ctx: Pointer to the structure containing parameters
658 // related to CDEF search context.
659 // Returns:
660 // Nothing will be returned.
av1_cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)661 void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
662 if (cdef_search_ctx) {
663 aom_free(cdef_search_ctx->mse[0]);
664 cdef_search_ctx->mse[0] = NULL;
665 aom_free(cdef_search_ctx->mse[1]);
666 cdef_search_ctx->mse[1] = NULL;
667 aom_free(cdef_search_ctx->sb_index);
668 cdef_search_ctx->sb_index = NULL;
669 }
670 }
671
672 // Initialize the parameters related to CDEF search context.
673 // Inputs:
674 // frame: Pointer to compressed frame buffer
675 // ref: Pointer to the frame buffer holding the source frame
676 // cm: Pointer to top level common structure
677 // xd: Pointer to common current coding block structure
678 // cdef_search_ctx: Pointer to the structure containing parameters related to
679 // CDEF search context.
680 // pick_method: Search method used to select CDEF parameters
681 // Returns:
682 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)683 static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
684 const YV12_BUFFER_CONFIG *ref,
685 AV1_COMMON *cm, MACROBLOCKD *xd,
686 CdefSearchCtx *cdef_search_ctx,
687 CDEF_PICK_METHOD pick_method) {
688 const CommonModeInfoParams *const mi_params = &cm->mi_params;
689 const int num_planes = av1_num_planes(cm);
690 cdef_search_ctx->mi_params = &cm->mi_params;
691 cdef_search_ctx->ref = ref;
692 cdef_search_ctx->nvfb =
693 (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
694 cdef_search_ctx->nhfb =
695 (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
696 cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
697 cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
698 cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
699 cdef_search_ctx->num_planes = num_planes;
700 cdef_search_ctx->pick_method = pick_method;
701 cdef_search_ctx->sb_count = 0;
702 cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
703 av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
704 num_planes);
705 // Initialize plane wise information.
706 for (int pli = 0; pli < num_planes; pli++) {
707 cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
708 cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
709 cdef_search_ctx->bsize[pli] =
710 cdef_search_ctx->ydec[pli]
711 ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
712 : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
713 cdef_search_ctx->mi_wide_l2[pli] =
714 MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
715 cdef_search_ctx->mi_high_l2[pli] =
716 MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
717 cdef_search_ctx->plane[pli] = xd->plane[pli];
718 }
719 // Function pointer initialization.
720 #if CONFIG_AV1_HIGHBITDEPTH
721 if (cm->seq_params->use_highbitdepth) {
722 cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
723 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
724 } else {
725 cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
726 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
727 }
728 #else
729 cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
730 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
731 #endif
732 }
733
av1_pick_cdef_from_qp(AV1_COMMON * const cm,int skip_cdef,int is_screen_content)734 void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
735 int is_screen_content) {
736 const int bd = cm->seq_params->bit_depth;
737 const int q =
738 av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
739 CdefInfo *const cdef_info = &cm->cdef_info;
740 // Check the speed feature to avoid extra signaling.
741 if (skip_cdef) {
742 cdef_info->cdef_bits = 1;
743 cdef_info->nb_cdef_strengths = 2;
744 } else {
745 cdef_info->cdef_bits = 0;
746 cdef_info->nb_cdef_strengths = 1;
747 }
748 cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
749
750 int predicted_y_f1 = 0;
751 int predicted_y_f2 = 0;
752 int predicted_uv_f1 = 0;
753 int predicted_uv_f2 = 0;
754 if (is_screen_content) {
755 predicted_y_f1 =
756 (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
757 predicted_y_f2 =
758 (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
759 predicted_uv_f1 =
760 (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
761 predicted_uv_f2 =
762 (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
763 predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
764 predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
765 predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
766 predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
767 } else {
768 if (!frame_is_intra_only(cm)) {
769 predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
770 q * 0.0068615186f + 0.02709886f),
771 0, 15);
772 predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
773 q * 0.0013993345f + 0.03831067f),
774 0, 3);
775 predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
776 q * 0.0034628846f + 0.00887099f),
777 0, 15);
778 predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
779 q * 0.00028223585f + 0.05576307f),
780 0, 3);
781 } else {
782 predicted_y_f1 = clamp(
783 (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
784 0, 15);
785 predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
786 q * 0.0027798624f + 0.0079405f),
787 0, 3);
788 predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
789 q * 0.012892405f - 0.00748388f),
790 0, 15);
791 predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
792 q * 0.00035520183f + 0.00228092f),
793 0, 3);
794 }
795 }
796 cdef_info->cdef_strengths[0] =
797 predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
798 cdef_info->cdef_uv_strengths[0] =
799 predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
800
801 // mbmi->cdef_strength is already set in the encoding stage. We don't need to
802 // set it again here.
803 if (skip_cdef) {
804 cdef_info->cdef_strengths[1] = 0;
805 cdef_info->cdef_uv_strengths[1] = 0;
806 return;
807 }
808
809 const CommonModeInfoParams *const mi_params = &cm->mi_params;
810 const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
811 const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
812 MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
813 // mbmi is NULL when real-time rate control library is used.
814 if (!mbmi) return;
815 for (int r = 0; r < nvfb; ++r) {
816 for (int c = 0; c < nhfb; ++c) {
817 MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
818 current_mbmi->cdef_strength = 0;
819 }
820 mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
821 }
822 }
823
av1_cdef_search(AV1_COMP * cpi)824 void av1_cdef_search(AV1_COMP *cpi) {
825 AV1_COMMON *cm = &cpi->common;
826 CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
827
828 assert(cdef_control != CDEF_NONE);
829 if (cdef_control == CDEF_REFERENCE && cpi->ppi->rtc_ref.non_reference_frame) {
830 CdefInfo *const cdef_info = &cm->cdef_info;
831 cdef_info->nb_cdef_strengths = 1;
832 cdef_info->cdef_bits = 0;
833 cdef_info->cdef_strengths[0] = 0;
834 cdef_info->cdef_uv_strengths[0] = 0;
835 return;
836 }
837
838 // Indicate if external RC is used for testing
839 const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl;
840 if (rtc_ext_rc) {
841 av1_pick_cdef_from_qp(cm, 0, 0);
842 return;
843 }
844 CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method;
845 if (pick_method == CDEF_PICK_FROM_Q) {
846 const int use_screen_content_model =
847 cm->quant_params.base_qindex >
848 AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
849 cpi->rc.best_quality + 5) &&
850 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
851 av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb,
852 use_screen_content_model);
853 return;
854 }
855 const CommonModeInfoParams *const mi_params = &cm->mi_params;
856 const int damping = 3 + (cm->quant_params.base_qindex >> 6);
857 const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
858 pick_method <= CDEF_FAST_SEARCH_LVL5);
859 const int num_planes = av1_num_planes(cm);
860 MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
861
862 if (!cpi->cdef_search_ctx)
863 CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx,
864 aom_malloc(sizeof(*cpi->cdef_search_ctx)));
865 CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx;
866
867 // Initialize parameters related to CDEF search context.
868 cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx,
869 pick_method);
870 // Allocate CDEF search context buffers.
871 cdef_alloc_data(cm, cdef_search_ctx);
872 // Frame level mse calculation.
873 if (cpi->mt_info.num_workers > 1) {
874 av1_cdef_mse_calc_frame_mt(cpi);
875 } else {
876 cdef_mse_calc_frame(cdef_search_ctx, cm->error);
877 }
878
879 /* Search for different number of signaling bits. */
880 int nb_strength_bits = 0;
881 uint64_t best_rd = UINT64_MAX;
882 CdefInfo *const cdef_info = &cm->cdef_info;
883 int sb_count = cdef_search_ctx->sb_count;
884 uint64_t(*mse[2])[TOTAL_STRENGTHS];
885 mse[0] = cdef_search_ctx->mse[0];
886 mse[1] = cdef_search_ctx->mse[1];
887 /* Calculate the maximum number of bits required to signal CDEF strengths at
888 * block level */
889 const int total_strengths = nb_cdef_strengths[pick_method];
890 const int joint_strengths =
891 num_planes > 1 ? total_strengths * total_strengths : total_strengths;
892 const int max_signaling_bits =
893 joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
894 int rdmult = cpi->td.mb.rdmult;
895 for (int i = 0; i <= 3; i++) {
896 if (i > max_signaling_bits) break;
897 int best_lev0[CDEF_MAX_STRENGTHS] = { 0 };
898 int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
899 const int nb_strengths = 1 << i;
900 uint64_t tot_mse;
901 if (num_planes > 1) {
902 tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
903 mse, sb_count, pick_method);
904 } else {
905 tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
906 pick_method);
907 }
908
909 const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
910 (num_planes > 1 ? 2 : 1);
911 const int rate_cost = av1_cost_literal(total_bits);
912 const uint64_t dist = tot_mse * 16;
913 const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
914 if (rd < best_rd) {
915 best_rd = rd;
916 nb_strength_bits = i;
917 memcpy(cdef_info->cdef_strengths, best_lev0,
918 nb_strengths * sizeof(best_lev0[0]));
919 if (num_planes > 1) {
920 memcpy(cdef_info->cdef_uv_strengths, best_lev1,
921 nb_strengths * sizeof(best_lev1[0]));
922 }
923 }
924 }
925
926 cdef_info->cdef_bits = nb_strength_bits;
927 cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
928 for (int i = 0; i < sb_count; i++) {
929 uint64_t best_mse = UINT64_MAX;
930 int best_gi = 0;
931 for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
932 uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
933 if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
934 if (curr < best_mse) {
935 best_gi = gi;
936 best_mse = curr;
937 }
938 }
939 mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
940 best_gi;
941 }
942 if (fast) {
943 for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
944 const int luma_strength = cdef_info->cdef_strengths[j];
945 const int chroma_strength = cdef_info->cdef_uv_strengths[j];
946 int pri_strength, sec_strength;
947
948 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
949 luma_strength);
950 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
951 chroma_strength);
952 }
953 }
954
955 cdef_info->cdef_damping = damping;
956 // Deallocate CDEF search context buffers.
957 av1_cdef_dealloc_data(cdef_search_ctx);
958 }
959