xref: /aosp_15_r20/external/libaom/av1/encoder/pickcdef.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <stdbool.h>
14 #include <string.h>
15 
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/aom_scale_rtcd.h"
18 
19 #include "aom/aom_integer.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/encoder/encoder.h"
23 #include "av1/encoder/ethread.h"
24 #include "av1/encoder/pickcdef.h"
25 #include "av1/encoder/mcomp.h"
26 
27 // Get primary and secondary filter strength for the given strength index and
28 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)29 static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
30                                              int *pri_strength,
31                                              int *sec_strength,
32                                              int strength_idx) {
33   const int tot_sec_filter =
34       (pick_method == CDEF_FAST_SEARCH_LVL5)
35           ? REDUCED_SEC_STRENGTHS_LVL5
36           : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
37                                                     : CDEF_SEC_STRENGTHS);
38   const int pri_idx = strength_idx / tot_sec_filter;
39   const int sec_idx = strength_idx % tot_sec_filter;
40   *pri_strength = pri_idx;
41   *sec_strength = sec_idx;
42   if (pick_method == CDEF_FULL_SEARCH) return;
43 
44   switch (pick_method) {
45     case CDEF_FAST_SEARCH_LVL1:
46       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
47       *pri_strength = priconv_lvl1[pri_idx];
48       break;
49     case CDEF_FAST_SEARCH_LVL2:
50       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
51       *pri_strength = priconv_lvl2[pri_idx];
52       break;
53     case CDEF_FAST_SEARCH_LVL3:
54       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
55       assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
56       *pri_strength = priconv_lvl2[pri_idx];
57       *sec_strength = secconv_lvl3[sec_idx];
58       break;
59     case CDEF_FAST_SEARCH_LVL4:
60       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
61       assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
62       *pri_strength = priconv_lvl4[pri_idx];
63       *sec_strength = secconv_lvl3[sec_idx];
64       break;
65     case CDEF_FAST_SEARCH_LVL5:
66       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
67       assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
68       *pri_strength = priconv_lvl5[pri_idx];
69       *sec_strength = secconv_lvl5[sec_idx];
70       break;
71     default: assert(0 && "Invalid CDEF search method");
72   }
73 }
74 
75 // Store CDEF filter strength calculated from strength index for given search
76 // method
77 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
78   do {                                                                       \
79     get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength,   \
80                               (strength_idx));                               \
81     cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;        \
82   } while (0)
83 
84 /* Search for the best strength to add as an option, knowing we
85    already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)86 static uint64_t search_one(int *lev, int nb_strengths,
87                            uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
88                            CDEF_PICK_METHOD pick_method) {
89   uint64_t tot_mse[TOTAL_STRENGTHS];
90   const int total_strengths = nb_cdef_strengths[pick_method];
91   int i, j;
92   uint64_t best_tot_mse = (uint64_t)1 << 63;
93   int best_id = 0;
94   memset(tot_mse, 0, sizeof(tot_mse));
95   for (i = 0; i < sb_count; i++) {
96     int gi;
97     uint64_t best_mse = (uint64_t)1 << 63;
98     /* Find best mse among already selected options. */
99     for (gi = 0; gi < nb_strengths; gi++) {
100       if (mse[i][lev[gi]] < best_mse) {
101         best_mse = mse[i][lev[gi]];
102       }
103     }
104     /* Find best mse when adding each possible new option. */
105     for (j = 0; j < total_strengths; j++) {
106       uint64_t best = best_mse;
107       if (mse[i][j] < best) best = mse[i][j];
108       tot_mse[j] += best;
109     }
110   }
111   for (j = 0; j < total_strengths; j++) {
112     if (tot_mse[j] < best_tot_mse) {
113       best_tot_mse = tot_mse[j];
114       best_id = j;
115     }
116   }
117   lev[nb_strengths] = best_id;
118   return best_tot_mse;
119 }
120 
121 /* Search for the best luma+chroma strength to add as an option, knowing we
122    already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)123 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
124                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
125                                 CDEF_PICK_METHOD pick_method) {
126   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
127   int i, j;
128   uint64_t best_tot_mse = (uint64_t)1 << 63;
129   int best_id0 = 0;
130   int best_id1 = 0;
131   const int total_strengths = nb_cdef_strengths[pick_method];
132   memset(tot_mse, 0, sizeof(tot_mse));
133   for (i = 0; i < sb_count; i++) {
134     int gi;
135     uint64_t best_mse = (uint64_t)1 << 63;
136     /* Find best mse among already selected options. */
137     for (gi = 0; gi < nb_strengths; gi++) {
138       uint64_t curr = mse[0][i][lev0[gi]];
139       curr += mse[1][i][lev1[gi]];
140       if (curr < best_mse) {
141         best_mse = curr;
142       }
143     }
144     /* Find best mse when adding each possible new option. */
145     for (j = 0; j < total_strengths; j++) {
146       int k;
147       for (k = 0; k < total_strengths; k++) {
148         uint64_t best = best_mse;
149         uint64_t curr = mse[0][i][j];
150         curr += mse[1][i][k];
151         if (curr < best) best = curr;
152         tot_mse[j][k] += best;
153       }
154     }
155   }
156   for (j = 0; j < total_strengths; j++) {
157     int k;
158     for (k = 0; k < total_strengths; k++) {
159       if (tot_mse[j][k] < best_tot_mse) {
160         best_tot_mse = tot_mse[j][k];
161         best_id0 = j;
162         best_id1 = k;
163       }
164     }
165   }
166   lev0[nb_strengths] = best_id0;
167   lev1[nb_strengths] = best_id1;
168   return best_tot_mse;
169 }
170 
171 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)172 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
173                                       uint64_t mse[][TOTAL_STRENGTHS],
174                                       int sb_count,
175                                       CDEF_PICK_METHOD pick_method) {
176   uint64_t best_tot_mse;
177   int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
178               pick_method <= CDEF_FAST_SEARCH_LVL5);
179   int i;
180   best_tot_mse = (uint64_t)1 << 63;
181   /* Greedy search: add one strength options at a time. */
182   for (i = 0; i < nb_strengths; i++) {
183     best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
184   }
185   /* Trying to refine the greedy search by reconsidering each
186      already-selected option. */
187   if (!fast) {
188     for (i = 0; i < 4 * nb_strengths; i++) {
189       int j;
190       for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
191       best_tot_mse =
192           search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
193     }
194   }
195   return best_tot_mse;
196 }
197 
198 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)199 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
200                                            int nb_strengths,
201                                            uint64_t (**mse)[TOTAL_STRENGTHS],
202                                            int sb_count,
203                                            CDEF_PICK_METHOD pick_method) {
204   uint64_t best_tot_mse;
205   int i;
206   best_tot_mse = (uint64_t)1 << 63;
207   /* Greedy search: add one strength options at a time. */
208   for (i = 0; i < nb_strengths; i++) {
209     best_tot_mse =
210         search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
211   }
212   /* Trying to refine the greedy search by reconsidering each
213      already-selected option. */
214   for (i = 0; i < 4 * nb_strengths; i++) {
215     int j;
216     for (j = 0; j < nb_strengths - 1; j++) {
217       best_lev0[j] = best_lev0[j + 1];
218       best_lev1[j] = best_lev1[j + 1];
219     }
220     best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
221                                    sb_count, pick_method);
222   }
223   return best_tot_mse;
224 }
225 
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)226 static inline void init_src_params(int *src_stride, int *width, int *height,
227                                    int *width_log2, int *height_log2,
228                                    BLOCK_SIZE bsize) {
229   *src_stride = block_size_wide[bsize];
230   *width = block_size_wide[bsize];
231   *height = block_size_high[bsize];
232   *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
233   *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
234 }
235 #if CONFIG_AV1_HIGHBITDEPTH
236 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)237 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
238                                          cdef_list *dlist, int cdef_count,
239                                          BLOCK_SIZE bsize, int coeff_shift,
240                                          int row, int col) {
241   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
242          bsize == BLOCK_8X8);
243   uint64_t sum = 0;
244   int bi, bx, by;
245   uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
246   uint16_t *dst_buff = &dst16[row * dstride + col];
247   int src_stride, width, height, width_log2, height_log2;
248   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
249                   bsize);
250   for (bi = 0; bi < cdef_count; bi++) {
251     by = dlist[bi].by;
252     bx = dlist[bi].bx;
253     sum += aom_mse_wxh_16bit_highbd(
254         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
255         &src[bi << (height_log2 + width_log2)], src_stride, width, height);
256   }
257   return sum >> 2 * coeff_shift;
258 }
259 #endif
260 
261 // Checks dual and quad block processing is applicable for block widths 8 and 4
262 // respectively.
is_dual_or_quad_applicable(cdef_list * dlist,int width,int cdef_count,int bi,int iter)263 static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width,
264                                              int cdef_count, int bi, int iter) {
265   assert(width == 8 || width == 4);
266   const int blk_offset = (width == 8) ? 1 : 3;
267   if ((iter + blk_offset) >= cdef_count) return 0;
268 
269   if (dlist[bi].by == dlist[bi + blk_offset].by &&
270       dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
271     return 1;
272 
273   return 0;
274 }
275 
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)276 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
277                                   cdef_list *dlist, int cdef_count,
278                                   BLOCK_SIZE bsize, int coeff_shift, int row,
279                                   int col) {
280   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
281          bsize == BLOCK_8X8);
282   uint64_t sum = 0;
283   int bi, bx, by;
284   int iter = 0;
285   int inc = 1;
286   uint8_t *dst8 = (uint8_t *)dst;
287   uint8_t *dst_buff = &dst8[row * dstride + col];
288   int src_stride, width, height, width_log2, height_log2;
289   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
290                   bsize);
291 
292   const int num_blks = 16 / width;
293   for (bi = 0; bi < cdef_count; bi += inc) {
294     by = dlist[bi].by;
295     bx = dlist[bi].bx;
296     uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
297     uint8_t *dst_tmp =
298         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
299 
300     if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
301       sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
302       iter += num_blks;
303       inc = num_blks;
304     } else {
305       sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
306                                height);
307       iter += 1;
308       inc = 1;
309     }
310   }
311 
312   return sum >> 2 * coeff_shift;
313 }
314 
315 // Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
316 // region is outside frame boundary
fill_borders_for_fbs_on_frame_boundary(uint16_t * inbuf,int hfilt_size,int vfilt_size,bool is_fb_on_frm_left_boundary,bool is_fb_on_frm_right_boundary,bool is_fb_on_frm_top_boundary,bool is_fb_on_frm_bottom_boundary)317 static inline void fill_borders_for_fbs_on_frame_boundary(
318     uint16_t *inbuf, int hfilt_size, int vfilt_size,
319     bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
320     bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
321   if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
322       !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
323     return;
324   if (is_fb_on_frm_bottom_boundary) {
325     // Fill bottom region of the block
326     const int buf_offset =
327         (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
328     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
329               CDEF_VERY_LARGE);
330   }
331   if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
332     const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
333     // Fill bottom-left region of the block
334     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
335               CDEF_VERY_LARGE);
336   }
337   if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
338     const int buf_offset =
339         (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
340     // Fill bottom-right region of the block
341     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
342               CDEF_VERY_LARGE);
343   }
344   if (is_fb_on_frm_top_boundary) {
345     // Fill top region of the block
346     fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
347               CDEF_VERY_LARGE);
348   }
349   if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
350     // Fill top-left region of the block
351     fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
352   }
353   if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
354     const int buf_offset = hfilt_size + CDEF_HBORDER;
355     // Fill top-right region of the block
356     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
357               CDEF_VERY_LARGE);
358   }
359   if (is_fb_on_frm_left_boundary) {
360     const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
361     // Fill left region of the block
362     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
363               CDEF_VERY_LARGE);
364   }
365   if (is_fb_on_frm_right_boundary) {
366     const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
367     // Fill right region of the block
368     fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
369               vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
370   }
371 }
372 
373 // Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
374 // after CDEF filtering in single function call
get_error_calc_width_in_filt_units(cdef_list * dlist,int cdef_count,int bi,int subsampling_x,int subsampling_y)375 static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
376     cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
377     int subsampling_y) {
378   // TODO(Ranjit): Extend the optimization for 422
379   if (subsampling_x != subsampling_y) return 1;
380 
381   // Combining more blocks seems to increase encode time due to increase in
382   // control code
383   if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
384       dlist[bi].bx + 3 == dlist[bi + 3].bx) {
385     /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
386      * logic if y co-ordinates match and x co-ordinates are
387      * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
388     return 4;
389   }
390   if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
391       dlist[bi].bx + 1 == dlist[bi + 1].bx) {
392     /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
393      * logic if their y co-ordinates match and x co-ordinates are
394      * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
395     return 2;
396   }
397   return 1;
398 }
399 
400 // Returns the block error after CDEF filtering for a given strength
get_filt_error(const CdefSearchCtx * cdef_search_ctx,const struct macroblockd_plane * pd,cdef_list * dlist,int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],int * dirinit,int var[CDEF_NBLOCKS][CDEF_NBLOCKS],uint16_t * in,uint8_t * ref_buffer,int ref_stride,int row,int col,int pri_strength,int sec_strength,int cdef_count,int pli,int coeff_shift,BLOCK_SIZE bs)401 static inline uint64_t get_filt_error(
402     const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
403     cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
404     int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
405     int ref_stride, int row, int col, int pri_strength, int sec_strength,
406     int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
407   uint64_t curr_sse = 0;
408   const BLOCK_SIZE plane_bsize =
409       get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
410   const int bw_log2 = 3 - pd->subsampling_x;
411   const int bh_log2 = 3 - pd->subsampling_y;
412 
413   // TODO(Ranjit): Extend this optimization for HBD
414   if (!cdef_search_ctx->use_highbitdepth) {
415     // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
416     // error at CDEF block level
417     const int tot_blk_count =
418         (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
419         (bw_log2 + bh_log2);
420     if (cdef_count == tot_blk_count) {
421       // Calculate the offset in the buffer based on block position
422       const FULLPEL_MV this_mv = { row, col };
423       const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
424       if (pri_strength == 0 && sec_strength == 0) {
425         // When CDEF strength is zero, filtering is not applied. Hence
426         // error is calculated between source and unfiltered pixels
427         curr_sse =
428             aom_sse(&ref_buffer[buf_offset], ref_stride,
429                     get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
430                     block_size_wide[plane_bsize], block_size_high[plane_bsize]);
431       } else {
432         DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
433 
434         av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
435                            cdef_search_ctx->xdec[pli],
436                            cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
437                            dlist, cdef_count, pri_strength,
438                            sec_strength + (sec_strength == 3),
439                            cdef_search_ctx->damping, coeff_shift);
440         curr_sse =
441             aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
442                     (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
443                     block_size_high[plane_bsize]);
444       }
445     } else {
446       // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
447       // functions produce 8-bit output and the error is calculated in 8-bit
448       // domain
449       if (pri_strength == 0 && sec_strength == 0) {
450         int num_error_calc_filt_units = 1;
451         for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
452           const uint8_t by = dlist[bi].by;
453           const uint8_t bx = dlist[bi].bx;
454           const int16_t by_pos = (by << bh_log2);
455           const int16_t bx_pos = (bx << bw_log2);
456           // Calculate the offset in the buffer based on block position
457           const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
458           const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
459           num_error_calc_filt_units = get_error_calc_width_in_filt_units(
460               dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
461           curr_sse += aom_sse(
462               &ref_buffer[buf_offset], ref_stride,
463               get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
464               num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
465         }
466       } else {
467         DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
468         av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
469                            cdef_search_ctx->xdec[pli],
470                            cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
471                            dlist, cdef_count, pri_strength,
472                            sec_strength + (sec_strength == 3),
473                            cdef_search_ctx->damping, coeff_shift);
474         int num_error_calc_filt_units = 1;
475         for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
476           const uint8_t by = dlist[bi].by;
477           const uint8_t bx = dlist[bi].bx;
478           const int16_t by_pos = (by << bh_log2);
479           const int16_t bx_pos = (bx << bw_log2);
480           // Calculate the offset in the buffer based on block position
481           const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
482           const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
483           const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
484           const int tmp_buf_offset =
485               get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
486           num_error_calc_filt_units = get_error_calc_width_in_filt_units(
487               dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
488           curr_sse += aom_sse(
489               &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
490               (1 << MAX_SB_SIZE_LOG2),
491               num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
492         }
493       }
494     }
495   } else {
496     DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
497 
498     av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
499                        cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
500                        dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
501                        sec_strength + (sec_strength == 3),
502                        cdef_search_ctx->damping, coeff_shift);
503     curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
504         ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
505         cdef_search_ctx->bsize[pli], coeff_shift, row, col);
506   }
507   return curr_sse;
508 }
509 
510 // Calculates MSE at block level.
511 // Inputs:
512 //   cdef_search_ctx: Pointer to the structure containing parameters related to
513 //   CDEF search context.
514 //   fbr: Row index in units of 64x64 block
515 //   fbc: Column index in units of 64x64 block
516 // Returns:
517 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,struct aom_internal_error_info * error_info,int fbr,int fbc,int sb_count)518 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
519                              struct aom_internal_error_info *error_info,
520                              int fbr, int fbc, int sb_count) {
521   // TODO(aomedia:3276): Pass error_info to the low-level functions as required
522   // in future to handle error propagation.
523   (void)error_info;
524   const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
525   const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
526   const int coeff_shift = cdef_search_ctx->coeff_shift;
527   const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
528   const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
529 
530   // Declare and initialize the temporary buffers.
531   DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
532   cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
533   int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
534   int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
535   uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
536   int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
537   int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
538   int hb_step = 1, vb_step = 1;
539   BLOCK_SIZE bs;
540 
541   const MB_MODE_INFO *const mbmi =
542       mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
543                               MI_SIZE_64X64 * fbc];
544 
545   uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
546                                         ref->v_buffer };
547   int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
548                                    ref->uv_stride };
549 
550   if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
551       mbmi->bsize == BLOCK_64X128) {
552     bs = mbmi->bsize;
553     if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
554       nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
555       hb_step = 2;
556     }
557     if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
558       nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
559       vb_step = 2;
560     }
561   } else {
562     bs = BLOCK_64X64;
563   }
564   // Get number of 8x8 blocks which are not skip. Cdef processing happens for
565   // 8x8 blocks which are not skip.
566   const int cdef_count = av1_cdef_compute_sb_list(
567       mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
568   const bool is_fb_on_frm_left_boundary = (fbc == 0);
569   const bool is_fb_on_frm_right_boundary =
570       (fbc + hb_step == cdef_search_ctx->nhfb);
571   const bool is_fb_on_frm_top_boundary = (fbr == 0);
572   const bool is_fb_on_frm_bottom_boundary =
573       (fbr + vb_step == cdef_search_ctx->nvfb);
574   const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
575   const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
576   int dirinit = 0;
577   for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
578     /* We avoid filtering the pixels for which some of the pixels to
579     average are outside the frame. We could change the filter instead,
580     but it would add special cases for any future vectorization. */
581     const int hfilt_size = (nhb << mi_wide_l2[pli]);
582     const int vfilt_size = (nvb << mi_high_l2[pli]);
583     const int ysize =
584         vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
585     const int xsize =
586         hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
587     const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
588     const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
589     struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
590     cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
591                              pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
592                              ysize, xsize);
593     fill_borders_for_fbs_on_frame_boundary(
594         inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
595         is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
596         is_fb_on_frm_bottom_boundary);
597     for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
598       int pri_strength, sec_strength;
599       get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
600                                 &sec_strength, gi);
601       const uint64_t curr_mse = get_filt_error(
602           cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
603           ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
604           pli, coeff_shift, bs);
605       if (pli < 2)
606         cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
607       else
608         cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
609     }
610   }
611   cdef_search_ctx->sb_index[sb_count] =
612       MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
613 }
614 
615 // MSE calculation at frame level.
616 // Inputs:
617 //   cdef_search_ctx: Pointer to the structure containing parameters related to
618 //   CDEF search context.
619 // Returns:
620 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx,struct aom_internal_error_info * error_info)621 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
622                                 struct aom_internal_error_info *error_info) {
623   // Loop over each sb.
624   for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
625     for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
626       // Checks if cdef processing can be skipped for particular sb.
627       if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
628       // Calculate mse for each sb and store the relevant sb index.
629       av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
630                               cdef_search_ctx->sb_count);
631       cdef_search_ctx->sb_count++;
632     }
633   }
634 }
635 
636 // Allocates memory for members of CdefSearchCtx.
637 // Inputs:
638 //   cdef_search_ctx: Pointer to the structure containing parameters
639 //   related to CDEF search context.
640 // Returns:
641 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(AV1_COMMON * cm,CdefSearchCtx * cdef_search_ctx)642 static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) {
643   const int nvfb = cdef_search_ctx->nvfb;
644   const int nhfb = cdef_search_ctx->nhfb;
645   CHECK_MEM_ERROR(
646       cm, cdef_search_ctx->sb_index,
647       aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0])));
648   cdef_search_ctx->sb_count = 0;
649   CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0],
650                   aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
651   CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1],
652                   aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
653 }
654 
655 // Deallocates the memory allocated for members of CdefSearchCtx.
656 // Inputs:
657 //   cdef_search_ctx: Pointer to the structure containing parameters
658 //   related to CDEF search context.
659 // Returns:
660 //   Nothing will be returned.
av1_cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)661 void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
662   if (cdef_search_ctx) {
663     aom_free(cdef_search_ctx->mse[0]);
664     cdef_search_ctx->mse[0] = NULL;
665     aom_free(cdef_search_ctx->mse[1]);
666     cdef_search_ctx->mse[1] = NULL;
667     aom_free(cdef_search_ctx->sb_index);
668     cdef_search_ctx->sb_index = NULL;
669   }
670 }
671 
672 // Initialize the parameters related to CDEF search context.
673 // Inputs:
674 //   frame: Pointer to compressed frame buffer
675 //   ref: Pointer to the frame buffer holding the source frame
676 //   cm: Pointer to top level common structure
677 //   xd: Pointer to common current coding block structure
678 //   cdef_search_ctx: Pointer to the structure containing parameters related to
679 //   CDEF search context.
680 //   pick_method: Search method used to select CDEF parameters
681 // Returns:
682 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)683 static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
684                                     const YV12_BUFFER_CONFIG *ref,
685                                     AV1_COMMON *cm, MACROBLOCKD *xd,
686                                     CdefSearchCtx *cdef_search_ctx,
687                                     CDEF_PICK_METHOD pick_method) {
688   const CommonModeInfoParams *const mi_params = &cm->mi_params;
689   const int num_planes = av1_num_planes(cm);
690   cdef_search_ctx->mi_params = &cm->mi_params;
691   cdef_search_ctx->ref = ref;
692   cdef_search_ctx->nvfb =
693       (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
694   cdef_search_ctx->nhfb =
695       (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
696   cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
697   cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
698   cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
699   cdef_search_ctx->num_planes = num_planes;
700   cdef_search_ctx->pick_method = pick_method;
701   cdef_search_ctx->sb_count = 0;
702   cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
703   av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
704                        num_planes);
705   // Initialize plane wise information.
706   for (int pli = 0; pli < num_planes; pli++) {
707     cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
708     cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
709     cdef_search_ctx->bsize[pli] =
710         cdef_search_ctx->ydec[pli]
711             ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
712             : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
713     cdef_search_ctx->mi_wide_l2[pli] =
714         MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
715     cdef_search_ctx->mi_high_l2[pli] =
716         MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
717     cdef_search_ctx->plane[pli] = xd->plane[pli];
718   }
719   // Function pointer initialization.
720 #if CONFIG_AV1_HIGHBITDEPTH
721   if (cm->seq_params->use_highbitdepth) {
722     cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
723     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
724   } else {
725     cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
726     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
727   }
728 #else
729   cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
730   cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
731 #endif
732 }
733 
av1_pick_cdef_from_qp(AV1_COMMON * const cm,int skip_cdef,int is_screen_content)734 void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
735                            int is_screen_content) {
736   const int bd = cm->seq_params->bit_depth;
737   const int q =
738       av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
739   CdefInfo *const cdef_info = &cm->cdef_info;
740   // Check the speed feature to avoid extra signaling.
741   if (skip_cdef) {
742     cdef_info->cdef_bits = 1;
743     cdef_info->nb_cdef_strengths = 2;
744   } else {
745     cdef_info->cdef_bits = 0;
746     cdef_info->nb_cdef_strengths = 1;
747   }
748   cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
749 
750   int predicted_y_f1 = 0;
751   int predicted_y_f2 = 0;
752   int predicted_uv_f1 = 0;
753   int predicted_uv_f2 = 0;
754   if (is_screen_content) {
755     predicted_y_f1 =
756         (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
757     predicted_y_f2 =
758         (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
759     predicted_uv_f1 =
760         (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
761     predicted_uv_f2 =
762         (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
763     predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
764     predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
765     predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
766     predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
767   } else {
768     if (!frame_is_intra_only(cm)) {
769       predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
770                                          q * 0.0068615186f + 0.02709886f),
771                              0, 15);
772       predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
773                                          q * 0.0013993345f + 0.03831067f),
774                              0, 3);
775       predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
776                                           q * 0.0034628846f + 0.00887099f),
777                               0, 15);
778       predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
779                                           q * 0.00028223585f + 0.05576307f),
780                               0, 3);
781     } else {
782       predicted_y_f1 = clamp(
783           (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
784           0, 15);
785       predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
786                                          q * 0.0027798624f + 0.0079405f),
787                              0, 3);
788       predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
789                                           q * 0.012892405f - 0.00748388f),
790                               0, 15);
791       predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
792                                           q * 0.00035520183f + 0.00228092f),
793                               0, 3);
794     }
795   }
796   cdef_info->cdef_strengths[0] =
797       predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
798   cdef_info->cdef_uv_strengths[0] =
799       predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
800 
801   // mbmi->cdef_strength is already set in the encoding stage. We don't need to
802   // set it again here.
803   if (skip_cdef) {
804     cdef_info->cdef_strengths[1] = 0;
805     cdef_info->cdef_uv_strengths[1] = 0;
806     return;
807   }
808 
809   const CommonModeInfoParams *const mi_params = &cm->mi_params;
810   const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
811   const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
812   MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
813   // mbmi is NULL when real-time rate control library is used.
814   if (!mbmi) return;
815   for (int r = 0; r < nvfb; ++r) {
816     for (int c = 0; c < nhfb; ++c) {
817       MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
818       current_mbmi->cdef_strength = 0;
819     }
820     mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
821   }
822 }
823 
av1_cdef_search(AV1_COMP * cpi)824 void av1_cdef_search(AV1_COMP *cpi) {
825   AV1_COMMON *cm = &cpi->common;
826   CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
827 
828   assert(cdef_control != CDEF_NONE);
829   if (cdef_control == CDEF_REFERENCE && cpi->ppi->rtc_ref.non_reference_frame) {
830     CdefInfo *const cdef_info = &cm->cdef_info;
831     cdef_info->nb_cdef_strengths = 1;
832     cdef_info->cdef_bits = 0;
833     cdef_info->cdef_strengths[0] = 0;
834     cdef_info->cdef_uv_strengths[0] = 0;
835     return;
836   }
837 
838   // Indicate if external RC is used for testing
839   const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl;
840   if (rtc_ext_rc) {
841     av1_pick_cdef_from_qp(cm, 0, 0);
842     return;
843   }
844   CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method;
845   if (pick_method == CDEF_PICK_FROM_Q) {
846     const int use_screen_content_model =
847         cm->quant_params.base_qindex >
848             AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
849                    cpi->rc.best_quality + 5) &&
850         cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
851     av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb,
852                           use_screen_content_model);
853     return;
854   }
855   const CommonModeInfoParams *const mi_params = &cm->mi_params;
856   const int damping = 3 + (cm->quant_params.base_qindex >> 6);
857   const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
858                     pick_method <= CDEF_FAST_SEARCH_LVL5);
859   const int num_planes = av1_num_planes(cm);
860   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
861 
862   if (!cpi->cdef_search_ctx)
863     CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx,
864                     aom_malloc(sizeof(*cpi->cdef_search_ctx)));
865   CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx;
866 
867   // Initialize parameters related to CDEF search context.
868   cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx,
869                    pick_method);
870   // Allocate CDEF search context buffers.
871   cdef_alloc_data(cm, cdef_search_ctx);
872   // Frame level mse calculation.
873   if (cpi->mt_info.num_workers > 1) {
874     av1_cdef_mse_calc_frame_mt(cpi);
875   } else {
876     cdef_mse_calc_frame(cdef_search_ctx, cm->error);
877   }
878 
879   /* Search for different number of signaling bits. */
880   int nb_strength_bits = 0;
881   uint64_t best_rd = UINT64_MAX;
882   CdefInfo *const cdef_info = &cm->cdef_info;
883   int sb_count = cdef_search_ctx->sb_count;
884   uint64_t(*mse[2])[TOTAL_STRENGTHS];
885   mse[0] = cdef_search_ctx->mse[0];
886   mse[1] = cdef_search_ctx->mse[1];
887   /* Calculate the maximum number of bits required to signal CDEF strengths at
888    * block level */
889   const int total_strengths = nb_cdef_strengths[pick_method];
890   const int joint_strengths =
891       num_planes > 1 ? total_strengths * total_strengths : total_strengths;
892   const int max_signaling_bits =
893       joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
894   int rdmult = cpi->td.mb.rdmult;
895   for (int i = 0; i <= 3; i++) {
896     if (i > max_signaling_bits) break;
897     int best_lev0[CDEF_MAX_STRENGTHS] = { 0 };
898     int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
899     const int nb_strengths = 1 << i;
900     uint64_t tot_mse;
901     if (num_planes > 1) {
902       tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
903                                            mse, sb_count, pick_method);
904     } else {
905       tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
906                                       pick_method);
907     }
908 
909     const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
910                                               (num_planes > 1 ? 2 : 1);
911     const int rate_cost = av1_cost_literal(total_bits);
912     const uint64_t dist = tot_mse * 16;
913     const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
914     if (rd < best_rd) {
915       best_rd = rd;
916       nb_strength_bits = i;
917       memcpy(cdef_info->cdef_strengths, best_lev0,
918              nb_strengths * sizeof(best_lev0[0]));
919       if (num_planes > 1) {
920         memcpy(cdef_info->cdef_uv_strengths, best_lev1,
921                nb_strengths * sizeof(best_lev1[0]));
922       }
923     }
924   }
925 
926   cdef_info->cdef_bits = nb_strength_bits;
927   cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
928   for (int i = 0; i < sb_count; i++) {
929     uint64_t best_mse = UINT64_MAX;
930     int best_gi = 0;
931     for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
932       uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
933       if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
934       if (curr < best_mse) {
935         best_gi = gi;
936         best_mse = curr;
937       }
938     }
939     mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
940         best_gi;
941   }
942   if (fast) {
943     for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
944       const int luma_strength = cdef_info->cdef_strengths[j];
945       const int chroma_strength = cdef_info->cdef_uv_strengths[j];
946       int pri_strength, sec_strength;
947 
948       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
949                                  luma_strength);
950       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
951                                  chroma_strength);
952     }
953   }
954 
955   cdef_info->cdef_damping = damping;
956   // Deallocate CDEF search context buffers.
957   av1_cdef_dealloc_data(cdef_search_ctx);
958 }
959