xref: /aosp_15_r20/external/libavc/encoder/svc/isvce_mc.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 /**
22  *******************************************************************************
23  * @file
24  *  isvce_mc.c
25  *
26  * @brief
27  *  Contains definition of functions for motion compensation
28  *
29  * @author
30  *  ittiam
31  *
32  * @par List of Functions:
33  *  - isvce_motion_comp_luma()
34  *  - isvce_motion_comp_chroma()
35  *
36  * @remarks
37  *  None
38  *
39  *******************************************************************************
40  */
41 
42 /*****************************************************************************/
43 /* File Includes                                                             */
44 /*****************************************************************************/
45 
46 /* System include files */
47 #include <stdio.h>
48 
49 /* User include files */
50 #include "ih264_typedefs.h"
51 #include "ih264_debug.h"
52 #include "isvc_defs.h"
53 #include "iv2.h"
54 #include "ive2.h"
55 #include "ime_distortion_metrics.h"
56 #include "ime_defs.h"
57 #include "ime_structs.h"
58 #include "isvc_structs.h"
59 #include "isvc_inter_pred_filters.h"
60 #include "isvc_mem_fns.h"
61 #include "ih264_padding.h"
62 #include "ih264_intra_pred_filters.h"
63 #include "ih264_deblk_edge_filters.h"
64 #include "isvc_trans_quant_itrans_iquant.h"
65 #include "isvc_cabac_tables.h"
66 #include "isvce_defs.h"
67 #include "ih264e_error.h"
68 #include "ih264e_bitstream.h"
69 #include "irc_cntrl_param.h"
70 #include "irc_frame_info_collector.h"
71 #include "isvce_rate_control.h"
72 #include "isvce_cabac_structs.h"
73 #include "isvce_structs.h"
74 #include "isvce_mc.h"
75 #include "ih264e_half_pel.h"
76 #include "isvce_ibl_eval.h"
77 
78 /*****************************************************************************/
79 /* Function Definitions                                                      */
80 /*****************************************************************************/
81 
82 /**
83  ******************************************************************************
84  *
85  * @brief
86  *  performs motion compensation for a luma mb for the given mv.
87  *
88  * @par Description
89  *  This routine performs motion compensation of an inter mb. When the inter
90  *  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
91  *  to pred buffer. In this case the function returns pointer and stride of the
92  *  ref. buffer and this info is used in place of pred buffer else where.
93  *  In other cases, the pred buffer is populated via copy / filtering + copy
94  *  (q pel cases) and returned.
95  *
96  * @param[in] ps_proc
97  *  pointer to current proc ctxt
98  *
99  * @return  none
100  *
101  * @remarks Assumes half pel buffers for the entire frame are populated.
102  *
103  ******************************************************************************
104  */
isvce_motion_comp_luma(isvce_process_ctxt_t * ps_proc,buffer_container_t * ps_pred)105 void isvce_motion_comp_luma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
106 {
107     /* codec context */
108     isvce_codec_t *ps_codec = ps_proc->ps_codec;
109 
110     /* me ctxt */
111     isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
112 
113     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
114     inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
115 
116     /* Pointer to the structure having motion vectors, size and position of curr
117      * partitions */
118     isvce_enc_pu_t *ps_curr_pu;
119 
120     /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
121      */
122     UWORD8 *pu1_ref[4];
123 
124     /* pred buffer ptr */
125     UWORD8 *pu1_pred;
126 
127     /* strides of full pel, half pel x, half pel y, half pel xy reference buffer
128      */
129     WORD32 i4_ref_strd[4];
130 
131     /* pred buffer stride */
132     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
133 
134     /* full pel motion vectors */
135     WORD32 u4_mv_x_full, u4_mv_y_full;
136 
137     /* half pel motion vectors */
138     WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
139 
140     /* quarter pel motion vectors */
141     WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
142 
143     /* width & height of the partition */
144     UWORD32 wd, ht;
145 
146     /* partition idx */
147     UWORD32 u4_num_prtn;
148 
149     /* half / qpel coefficient */
150     UWORD32 u4_subpel_factor;
151 
152     /* BIPRED Flag */
153     WORD32 i4_bipred_flag;
154 
155     /* temp var */
156     UWORD32 u4_lkup_idx1;
157 
158     if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
159     {
160         svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
161 
162         ps_pred->pv_data =
163             (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y]
164                             .pv_data);
165         ps_pred->i4_data_stride =
166             ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[Y].i4_data_stride;
167 
168         return;
169     }
170 
171     /* Init */
172     i4_ref_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride;
173 
174     i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_subpel_buf_strd;
175 
176     for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
177     {
178         mv_t *ps_curr_mv;
179 
180         /* update ptr to curr partition */
181         ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
182 
183         /* Set no no bipred */
184         i4_bipred_flag = 0;
185 
186         switch(ps_curr_pu->u1_pred_mode)
187         {
188             case PRED_L0:
189                 ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
190                 pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
191                 break;
192 
193             case PRED_L1:
194                 ps_curr_mv = &ps_curr_pu->as_me_info[1].s_mv;
195                 pu1_ref[0] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data;
196                 break;
197 
198             case PRED_BI:
199                 /*
200                  * In case of PRED_BI, we only need to ensure that
201                  * the reference buffer that gets selected is
202                  * ps_proc->pu1_best_subpel_buf
203                  */
204 
205                 /* Dummy */
206                 ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
207                 pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
208 
209                 i4_bipred_flag = 1;
210                 break;
211 
212             default:
213                 ps_curr_mv = &ps_curr_pu->as_me_info[0].s_mv;
214                 pu1_ref[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
215                 break;
216         }
217 
218         /* get full pel mv's (full pel units) */
219         u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
220         u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
221 
222         /* get half pel mv's */
223         u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
224         u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
225 
226         /* get quarter pel mv's */
227         u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
228         u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
229 
230         /* width and height of partition */
231         wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 2;
232         ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 2;
233 
234         /* decision ? qpel/hpel, fpel */
235         u4_subpel_factor =
236             (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
237 
238         /* Move ref to position given by MV */
239         pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
240 
241         /* Sub pel ptrs/ Biperd pointers init */
242         pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
243         i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
244 
245         /* update pred buff ptr */
246         pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
247                    4 * ps_curr_pu->u1_pos_x_in_4x4;
248 
249         /* u4_lkup_idx1 will be non zero for half pel and bipred */
250         u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
251 
252         {
253             /********************************************************************/
254             /* if the block is P16x16 MB and mv are not quarter pel motion      */
255             /* vectors, there is no need to copy 16x16 unit from reference frame*/
256             /* to pred buffer. We might as well send the reference frame buffer */
257             /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
258             /* and inverse transform unit.                                      */
259             /********************************************************************/
260             if(ps_proc->u4_num_sub_partitions == 1)
261             {
262                 ps_pred->pv_data = pu1_ref[u4_lkup_idx1];
263                 ps_pred->i4_data_stride = i4_ref_strd[u4_lkup_idx1];
264             }
265             /*
266              * Copying half pel or full pel to prediction buffer
267              * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only
268              * support 16x16 in P mbs
269              */
270             else
271             {
272                 ps_inter_pred_fxns->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred,
273                                                             i4_ref_strd[u4_lkup_idx1], i4_pred_strd,
274                                                             ht, wd, NULL, 0);
275             }
276         }
277     }
278 }
279 
280 /**
281  ******************************************************************************
282  *
283  * @brief
284  *  performs motion compensation for chroma mb
285  *
286  * @par   Description
287  *  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
288  *  according to the motion vectors given
289  *
290  * @param[in] ps_proc
291  *  pointer to current proc ctxt
292  *
293  * @return  none
294  *
295  * @remarks Assumes half pel and quarter pel buffers for the entire frame are
296  *  populated.
297  ******************************************************************************
298  */
isvce_motion_comp_chroma(isvce_process_ctxt_t * ps_proc,buffer_container_t * ps_pred)299 void isvce_motion_comp_chroma(isvce_process_ctxt_t *ps_proc, buffer_container_t *ps_pred)
300 {
301     /* codec context */
302     isvce_codec_t *ps_codec = ps_proc->ps_codec;
303     isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
304     inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
305 
306     /* Pointer to the structure having motion vectors, size and position of curr
307      * partitions */
308     isvce_enc_pu_t *ps_curr_pu;
309 
310     /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer
311      */
312     UWORD8 *pu1_ref;
313 
314     /* pred buffer ptr */
315     UWORD8 *pu1_pred;
316 
317     /* strides of full pel reference buffer */
318     WORD32 i4_ref_strd;
319 
320     /* pred buffer stride */
321     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
322 
323     /* full pel motion vectors */
324     WORD32 u4_mv_x_full, u4_mv_y_full;
325 
326     /* half pel motion vectors */
327     WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
328 
329     /* quarter pel motion vectors */
330     WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
331 
332     /* width & height of the partition */
333     UWORD32 wd, ht;
334 
335     /* partition idx */
336     UWORD32 u4_num_prtn;
337 
338     WORD32 u4_mv_x;
339     WORD32 u4_mv_y;
340     UWORD8 u1_dx, u1_dy;
341 
342     ASSERT(ps_proc->u4_num_sub_partitions <= ENC_MAX_PU_IN_MB);
343 
344     if((ps_proc->ps_mb_info->u2_mb_type == BASE_MODE) && ps_proc->ps_mb_info->u1_is_intra)
345     {
346         svc_intra_pred_ctxt_t *ps_intra_pred_ctxt = ps_proc->ps_intra_pred_ctxt;
347 
348         ps_pred->pv_data =
349             (UWORD8 *) (ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
350                             .pv_data);
351         ps_pred->i4_data_stride =
352             ps_intra_pred_ctxt->s_intra_pred_outputs.s_pred_buf.as_component_bufs[UV]
353                 .i4_data_stride;
354 
355         return;
356     }
357     else
358     {
359         ps_pred->pv_data = ps_proc->pu1_pred_mb;
360         ps_pred->i4_data_stride = ps_proc->i4_pred_strd;
361     }
362 
363     for(u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
364     {
365         mv_t *ps_curr_mv;
366 
367         ps_curr_pu = ps_proc->ps_mb_info->as_pu + u4_num_prtn;
368 
369         if(ps_curr_pu->u1_pred_mode != BI)
370         {
371             ps_curr_mv = &ps_curr_pu->as_me_info[ps_curr_pu->u1_pred_mode].s_mv;
372             pu1_ref =
373                 ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode].as_component_bufs[1].pv_data;
374             i4_ref_strd = ps_proc->as_ref_buf_props[ps_curr_pu->u1_pred_mode]
375                               .as_component_bufs[1]
376                               .i4_data_stride;
377 
378             u4_mv_x = ps_curr_mv->i2_mvx >> 3;
379             u4_mv_y = ps_curr_mv->i2_mvy >> 3;
380 
381             /*  corresponds to full pel motion vector in luma, but in chroma
382              * corresponds to pel formed wiith dx, dy =4 */
383             u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
384             u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
385 
386             /* get half pel mv's */
387             u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
388             u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
389 
390             /* get quarter pel mv's */
391             u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
392             u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
393 
394             /* width and height of sub macro block */
395             wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
396             ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
397 
398             /* move the pointers so that they point to the motion compensated
399              * locations */
400             pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
401 
402             pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
403                        2 * ps_curr_pu->u1_pos_x_in_4x4;
404 
405             u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
406             u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
407 
408             /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
409              * separate functions for better performance
410              *
411              * isvc_inter_pred_chroma_dx_zero_a9q
412              * and
413              * isvc_inter_pred_chroma_dy_zero_a9q
414              */
415 
416             ps_inter_pred_fxns->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
417                                                      u1_dx, u1_dy, ht, wd);
418         }
419         else
420         {
421             /*
422              * We need to interpolate the L0 and L1 ref pics with the chorma MV
423              * then use them to average for bilinrar interpred
424              */
425             WORD32 i4_predmode;
426             UWORD8 *pu1_ref_buf[2];
427 
428             /* Temporary buffers to store the interpolated value from L0 and L1 */
429             pu1_ref_buf[L0] = ps_proc->apu1_subpel_buffs[0];
430             pu1_ref_buf[L1] = ps_proc->apu1_subpel_buffs[1];
431 
432             for(i4_predmode = 0; i4_predmode < BI; i4_predmode++)
433             {
434                 ps_curr_mv = &ps_curr_pu->as_me_info[i4_predmode].s_mv;
435                 pu1_ref = ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].pv_data;
436                 i4_ref_strd =
437                     ps_proc->as_ref_buf_props[i4_predmode].as_component_bufs[1].i4_data_stride;
438 
439                 u4_mv_x = ps_curr_mv->i2_mvx >> 3;
440                 u4_mv_y = ps_curr_mv->i2_mvy >> 3;
441 
442                 /*
443                  * corresponds to full pel motion vector in luma, but in chroma
444                  * corresponds to pel formed wiith dx, dy =4
445                  */
446                 u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
447                 u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
448 
449                 /* get half pel mv's */
450                 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
451                 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
452 
453                 /* get quarter pel mv's */
454                 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
455                 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
456 
457                 /* width and height of sub macro block */
458                 wd = (ps_curr_pu->u1_wd_in_4x4_m1 + 1) << 1;
459                 ht = (ps_curr_pu->u1_ht_in_4x4_m1 + 1) << 1;
460 
461                 /* move the pointers so that they point to the motion compensated
462                  * locations */
463                 pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
464 
465                 pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->u1_pos_y_in_4x4 * i4_pred_strd +
466                            2 * ps_curr_pu->u1_pos_x_in_4x4;
467 
468                 u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
469                 u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
470 
471                 ps_inter_pred_fxns->pf_inter_pred_chroma(
472                     pu1_ref, pu1_ref_buf[i4_predmode], i4_ref_strd, MB_SIZE, u1_dx, u1_dy, ht, wd);
473             }
474 
475             ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(pu1_ref_buf[L0], pu1_ref_buf[L1],
476                                                             pu1_pred, MB_SIZE, MB_SIZE,
477                                                             i4_pred_strd, MB_SIZE >> 1, MB_SIZE);
478         }
479     }
480 }
481