xref: /aosp_15_r20/external/libaom/av1/encoder/picklpf.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <limits.h>
14 
15 #include "config/aom_scale_rtcd.h"
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_dsp/psnr.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/mem.h"
21 
22 #include "av1/common/av1_common_int.h"
23 #include "av1/common/av1_loopfilter.h"
24 #include "av1/common/quant_common.h"
25 
26 #include "av1/encoder/av1_quantize.h"
27 #include "av1/encoder/encoder.h"
28 #include "av1/encoder/picklpf.h"
29 
30 // AV1 loop filter applies to the whole frame according to mi_rows and mi_cols,
31 // which are calculated based on aligned width and aligned height,
32 // In addition, if super res is enabled, it copies the whole frame
33 // according to the aligned width and height (av1_superres_upscale()).
34 // So we need to copy the whole filtered region, instead of the cropped region.
35 // For example, input image size is: 160x90.
36 // Then src->y_crop_width = 160, src->y_crop_height = 90.
37 // The aligned frame size is: src->y_width = 160, src->y_height = 96.
38 // AV1 aligns frame size to a multiple of 8, if there is
39 // chroma subsampling, it is able to ensure the chroma is also
40 // an integer number of mi units. mi unit is 4x4, 8 = 4 * 2, and 2 luma mi
41 // units correspond to 1 chroma mi unit if there is subsampling.
42 // See: aom_realloc_frame_buffer() in yv12config.c.
yv12_copy_plane(const YV12_BUFFER_CONFIG * src_bc,YV12_BUFFER_CONFIG * dst_bc,int plane)43 static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc,
44                             YV12_BUFFER_CONFIG *dst_bc, int plane) {
45   switch (plane) {
46     case 0: aom_yv12_copy_y(src_bc, dst_bc, 0); break;
47     case 1: aom_yv12_copy_u(src_bc, dst_bc, 0); break;
48     case 2: aom_yv12_copy_v(src_bc, dst_bc, 0); break;
49     default: assert(plane >= 0 && plane <= 2); break;
50   }
51 }
52 
get_max_filter_level(const AV1_COMP * cpi)53 static int get_max_filter_level(const AV1_COMP *cpi) {
54   if (is_stat_consumption_stage_twopass(cpi)) {
55     return cpi->ppi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
56                                                       : MAX_LOOP_FILTER;
57   } else {
58     return MAX_LOOP_FILTER;
59   }
60 }
61 
try_filter_frame(const YV12_BUFFER_CONFIG * sd,AV1_COMP * const cpi,int filt_level,int partial_frame,int plane,int dir)62 static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
63                                 AV1_COMP *const cpi, int filt_level,
64                                 int partial_frame, int plane, int dir) {
65   MultiThreadInfo *const mt_info = &cpi->mt_info;
66   int num_workers = mt_info->num_mod_workers[MOD_LPF];
67   AV1_COMMON *const cm = &cpi->common;
68   int64_t filt_err;
69 
70   assert(plane >= 0 && plane <= 2);
71   int filter_level[2] = { filt_level, filt_level };
72   if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1];
73   if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0];
74 
75   // set base filters for use of get_filter_level (av1_loopfilter.c) when in
76   // DELTA_LF mode
77   switch (plane) {
78     case 0:
79       cm->lf.filter_level[0] = filter_level[0];
80       cm->lf.filter_level[1] = filter_level[1];
81       break;
82     case 1: cm->lf.filter_level_u = filter_level[0]; break;
83     case 2: cm->lf.filter_level_v = filter_level[0]; break;
84   }
85 
86   // lpf_opt_level = 1 : Enables dual/quad loop-filtering.
87   int lpf_opt_level = is_inter_tx_size_search_level_one(&cpi->sf.tx_sf);
88 
89   av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd, plane,
90                            plane + 1, partial_frame, mt_info->workers,
91                            num_workers, &mt_info->lf_row_sync, lpf_opt_level);
92 
93   filt_err = aom_get_sse_plane(sd, &cm->cur_frame->buf, plane,
94                                cm->seq_params->use_highbitdepth);
95 
96   // Re-instate the unfiltered frame
97   yv12_copy_plane(&cpi->last_frame_uf, &cm->cur_frame->buf, plane);
98 
99   return filt_err;
100 }
101 
search_filter_level(const YV12_BUFFER_CONFIG * sd,AV1_COMP * cpi,int partial_frame,const int * last_frame_filter_level,int plane,int dir)102 static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
103                                int partial_frame,
104                                const int *last_frame_filter_level, int plane,
105                                int dir) {
106   const AV1_COMMON *const cm = &cpi->common;
107   const int min_filter_level = 0;
108   const int max_filter_level = get_max_filter_level(cpi);
109   int filt_direction = 0;
110   int64_t best_err;
111   int filt_best;
112 
113   // Start the search at the previous frame filter level unless it is now out of
114   // range.
115   int lvl;
116   switch (plane) {
117     case 0:
118       switch (dir) {
119         case 2:
120           lvl = (last_frame_filter_level[0] + last_frame_filter_level[1] + 1) >>
121                 1;
122           break;
123         case 0:
124         case 1: lvl = last_frame_filter_level[dir]; break;
125         default: assert(dir >= 0 && dir <= 2); return 0;
126       }
127       break;
128     case 1: lvl = last_frame_filter_level[2]; break;
129     case 2: lvl = last_frame_filter_level[3]; break;
130     default: assert(plane >= 0 && plane <= 2); return 0;
131   }
132   int filt_mid = clamp(lvl, min_filter_level, max_filter_level);
133   int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
134   // Sum squared error at each filter level
135   int64_t ss_err[MAX_LOOP_FILTER + 1];
136 
137   const int use_coarse_search = cpi->sf.lpf_sf.use_coarse_filter_level_search;
138   assert(use_coarse_search <= 1);
139   static const int min_filter_step_lookup[2] = { 0, 2 };
140   // min_filter_step_thesh determines the stopping criteria for the search.
141   // The search is terminated when filter_step equals min_filter_step_thesh.
142   const int min_filter_step_thesh = min_filter_step_lookup[use_coarse_search];
143 
144   // Set each entry to -1
145   memset(ss_err, 0xFF, sizeof(ss_err));
146   yv12_copy_plane(&cm->cur_frame->buf, &cpi->last_frame_uf, plane);
147   best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir);
148   filt_best = filt_mid;
149   ss_err[filt_mid] = best_err;
150 
151   while (filter_step > min_filter_step_thesh) {
152     const int filt_high = AOMMIN(filt_mid + filter_step, max_filter_level);
153     const int filt_low = AOMMAX(filt_mid - filter_step, min_filter_level);
154 
155     // Bias against raising loop filter in favor of lowering it.
156     int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
157 
158     if ((is_stat_consumption_stage_twopass(cpi)) &&
159         (cpi->ppi->twopass.section_intra_rating < 20))
160       bias = (bias * cpi->ppi->twopass.section_intra_rating) / 20;
161 
162     // yx, bias less for large block size
163     if (cm->features.tx_mode != ONLY_4X4) bias >>= 1;
164 
165     if (filt_direction <= 0 && filt_low != filt_mid) {
166       // Get Low filter error score
167       if (ss_err[filt_low] < 0) {
168         ss_err[filt_low] =
169             try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir);
170       }
171       // If value is close to the best so far then bias towards a lower loop
172       // filter value.
173       if (ss_err[filt_low] < (best_err + bias)) {
174         // Was it actually better than the previous best?
175         if (ss_err[filt_low] < best_err) {
176           best_err = ss_err[filt_low];
177         }
178         filt_best = filt_low;
179       }
180     }
181 
182     // Now look at filt_high
183     if (filt_direction >= 0 && filt_high != filt_mid) {
184       if (ss_err[filt_high] < 0) {
185         ss_err[filt_high] =
186             try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir);
187       }
188       // If value is significantly better than previous best, bias added against
189       // raising filter value
190       if (ss_err[filt_high] < (best_err - bias)) {
191         best_err = ss_err[filt_high];
192         filt_best = filt_high;
193       }
194     }
195 
196     // Half the step distance if the best filter value was the same as last time
197     if (filt_best == filt_mid) {
198       filter_step /= 2;
199       filt_direction = 0;
200     } else {
201       filt_direction = (filt_best < filt_mid) ? -1 : 1;
202       filt_mid = filt_best;
203     }
204   }
205 
206   return filt_best;
207 }
208 
av1_pick_filter_level(const YV12_BUFFER_CONFIG * sd,AV1_COMP * cpi,LPF_PICK_METHOD method)209 void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
210                            LPF_PICK_METHOD method) {
211   AV1_COMMON *const cm = &cpi->common;
212   const SequenceHeader *const seq_params = cm->seq_params;
213   const int num_planes = av1_num_planes(cm);
214   struct loopfilter *const lf = &cm->lf;
215   int disable_filter_rt_screen = 0;
216   (void)sd;
217 
218   lf->sharpness_level = 0;
219 
220   if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
221       cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
222       cpi->sf.rt_sf.skip_lf_screen)
223     disable_filter_rt_screen = av1_cyclic_refresh_disable_lf_cdef(cpi);
224 
225   if (disable_filter_rt_screen ||
226       cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_NONE ||
227       (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_REFERENCE &&
228        cpi->ppi->rtc_ref.non_reference_frame)) {
229     lf->filter_level[0] = 0;
230     lf->filter_level[1] = 0;
231     return;
232   }
233 
234   if (method == LPF_PICK_MINIMAL_LPF) {
235     lf->filter_level[0] = 0;
236     lf->filter_level[1] = 0;
237   } else if (method >= LPF_PICK_FROM_Q) {
238     const int min_filter_level = 0;
239     const int max_filter_level = get_max_filter_level(cpi);
240     const int q = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
241                                    seq_params->bit_depth);
242     // based on tests result for rtc test set
243     // 0.04590 boosted or 0.02295 non-booseted in 18-bit fixed point
244     const int strength_boost_q_treshold = 0;
245     int inter_frame_multiplier =
246         (q > strength_boost_q_treshold ||
247          (cpi->sf.rt_sf.use_nonrd_pick_mode &&
248           cpi->common.width * cpi->common.height > 352 * 288))
249             ? 12034
250             : 6017;
251     // Increase strength on base TL0 for temporal layers, for low-resoln,
252     // based on frame source_sad.
253     if (cpi->svc.number_temporal_layers > 1 &&
254         cpi->svc.temporal_layer_id == 0 &&
255         cpi->common.width * cpi->common.height <= 352 * 288 &&
256         cpi->sf.rt_sf.use_nonrd_pick_mode) {
257       if (cpi->rc.frame_source_sad > 100000)
258         inter_frame_multiplier = inter_frame_multiplier << 1;
259       else if (cpi->rc.frame_source_sad > 50000)
260         inter_frame_multiplier = 3 * (inter_frame_multiplier >> 1);
261     } else if (cpi->sf.rt_sf.use_fast_fixed_part) {
262       inter_frame_multiplier = inter_frame_multiplier << 1;
263     }
264     // These values were determined by linear fitting the result of the
265     // searched level for 8 bit depth:
266     // Keyframes: filt_guess = q * 0.06699 - 1.60817
267     // Other frames: filt_guess = q * inter_frame_multiplier + 2.48225
268     //
269     // And high bit depth separately:
270     // filt_guess = q * 0.316206 + 3.87252
271     int filt_guess;
272     switch (seq_params->bit_depth) {
273       case AOM_BITS_8:
274         filt_guess =
275             (cm->current_frame.frame_type == KEY_FRAME)
276                 ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18)
277                 : ROUND_POWER_OF_TWO(q * inter_frame_multiplier + 650707, 18);
278         break;
279       case AOM_BITS_10:
280         filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
281         break;
282       case AOM_BITS_12:
283         filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
284         break;
285       default:
286         assert(0 &&
287                "bit_depth should be AOM_BITS_8, AOM_BITS_10 "
288                "or AOM_BITS_12");
289         return;
290     }
291     if (seq_params->bit_depth != AOM_BITS_8 &&
292         cm->current_frame.frame_type == KEY_FRAME)
293       filt_guess -= 4;
294     // TODO(chengchen): retrain the model for Y, U, V filter levels
295     lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level);
296     lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level);
297     lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level);
298     lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level);
299     if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY &&
300         !frame_is_intra_only(cm) && !cpi->rc.high_source_sad) {
301       if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
302         lf->filter_level[0] = 0;
303         lf->filter_level[1] = 0;
304       } else {
305         const int num4x4 = (cm->width >> 2) * (cm->height >> 2);
306         const int newmv_thresh = 7;
307         const int distance_since_key_thresh = 5;
308         if ((cpi->td.rd_counts.newmv_or_intra_blocks * 100 / num4x4) <
309                 newmv_thresh &&
310             cpi->rc.frames_since_key > distance_since_key_thresh) {
311           lf->filter_level[0] = 0;
312           lf->filter_level[1] = 0;
313         }
314       }
315     }
316   } else {
317     int last_frame_filter_level[4] = { 0 };
318     if (!frame_is_intra_only(cm)) {
319       last_frame_filter_level[0] = cpi->ppi->filter_level[0];
320       last_frame_filter_level[1] = cpi->ppi->filter_level[1];
321       last_frame_filter_level[2] = cpi->ppi->filter_level_u;
322       last_frame_filter_level[3] = cpi->ppi->filter_level_v;
323     }
324     // The frame buffer last_frame_uf is used to store the non-loop filtered
325     // reconstructed frame in search_filter_level().
326     if (aom_realloc_frame_buffer(
327             &cpi->last_frame_uf, cm->width, cm->height,
328             seq_params->subsampling_x, seq_params->subsampling_y,
329             seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
330             cm->features.byte_alignment, NULL, NULL, NULL, false, 0))
331       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
332                          "Failed to allocate last frame buffer");
333 
334     lf->filter_level[0] = lf->filter_level[1] =
335         search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
336                             last_frame_filter_level, 0, 2);
337     if (method != LPF_PICK_FROM_FULL_IMAGE_NON_DUAL) {
338       lf->filter_level[0] =
339           search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
340                               last_frame_filter_level, 0, 0);
341       lf->filter_level[1] =
342           search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
343                               last_frame_filter_level, 0, 1);
344     }
345 
346     if (num_planes > 1) {
347       lf->filter_level_u =
348           search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
349                               last_frame_filter_level, 1, 0);
350       lf->filter_level_v =
351           search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
352                               last_frame_filter_level, 2, 0);
353     }
354   }
355 }
356