xref: /aosp_15_r20/external/libhevc/encoder/ihevce_enc_loop_inter_mode_sifter.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_inter_mode_sifter.c
24 *
25 * \brief
26 *    This file contains functions for selecting best inter candidates for RDOPT evaluation
27 *
28 * \date
29 *    10/09/2014
30 *
31 ******************************************************************************
32 */
33 
34 /*****************************************************************************/
35 /* File Includes                                                             */
36 /*****************************************************************************/
37 /* System include files */
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 #include <stdarg.h>
43 #include <math.h>
44 #include <limits.h>
45 
46 /* User include files */
47 #include "ihevc_typedefs.h"
48 #include "itt_video_api.h"
49 #include "ihevce_api.h"
50 
51 #include "rc_cntrl_param.h"
52 #include "rc_frame_info_collector.h"
53 #include "rc_look_ahead_params.h"
54 
55 #include "ihevc_defs.h"
56 #include "ihevc_macros.h"
57 #include "ihevc_debug.h"
58 #include "ihevc_structs.h"
59 #include "ihevc_platform_macros.h"
60 #include "ihevc_deblk.h"
61 #include "ihevc_itrans_recon.h"
62 #include "ihevc_chroma_itrans_recon.h"
63 #include "ihevc_chroma_intra_pred.h"
64 #include "ihevc_intra_pred.h"
65 #include "ihevc_inter_pred.h"
66 #include "ihevc_mem_fns.h"
67 #include "ihevc_padding.h"
68 #include "ihevc_weighted_pred.h"
69 #include "ihevc_sao.h"
70 #include "ihevc_resi_trans.h"
71 #include "ihevc_quant_iquant_ssd.h"
72 #include "ihevc_cabac_tables.h"
73 
74 #include "ihevce_defs.h"
75 #include "ihevce_hle_interface.h"
76 #include "ihevce_lap_enc_structs.h"
77 #include "ihevce_multi_thrd_structs.h"
78 #include "ihevce_multi_thrd_funcs.h"
79 #include "ihevce_me_common_defs.h"
80 #include "ihevce_had_satd.h"
81 #include "ihevce_error_codes.h"
82 #include "ihevce_bitstream.h"
83 #include "ihevce_cabac.h"
84 #include "ihevce_rdoq_macros.h"
85 #include "ihevce_function_selector.h"
86 #include "ihevce_enc_structs.h"
87 #include "ihevce_entropy_structs.h"
88 #include "ihevce_cmn_utils_instr_set_router.h"
89 #include "ihevce_ipe_instr_set_router.h"
90 #include "ihevce_decomp_pre_intra_structs.h"
91 #include "ihevce_decomp_pre_intra_pass.h"
92 #include "ihevce_enc_loop_structs.h"
93 #include "ihevce_global_tables.h"
94 #include "ihevce_nbr_avail.h"
95 #include "ihevce_enc_loop_utils.h"
96 #include "ihevce_bs_compute_ctb.h"
97 #include "ihevce_cabac_rdo.h"
98 #include "ihevce_dep_mngr_interface.h"
99 #include "ihevce_enc_loop_pass.h"
100 #include "ihevce_rc_enc_structs.h"
101 #include "ihevce_common_utils.h"
102 #include "ihevce_stasino_helpers.h"
103 
104 #include "hme_datatype.h"
105 #include "hme_common_defs.h"
106 #include "hme_common_utils.h"
107 #include "hme_interface.h"
108 #include "hme_defs.h"
109 #include "ihevce_me_instr_set_router.h"
110 #include "hme_err_compute.h"
111 #include "hme_globals.h"
112 #include "ihevce_mv_pred.h"
113 #include "ihevce_mv_pred_merge.h"
114 #include "ihevce_inter_pred.h"
115 #include "ihevce_enc_loop_inter_mode_sifter.h"
116 
117 /*****************************************************************************/
118 /* Function Definitions                                                      */
119 /*****************************************************************************/
ihevce_get_num_part_types_in_me_cand_list(cu_inter_cand_t * ps_me_cand_list,UWORD8 * pu1_part_type_ref_cand,UWORD8 * pu1_idx_ref_cand,UWORD8 * pu1_diff_skip_cand_flag,WORD8 * pi1_skip_cand_from_merge_idx,WORD8 * pi1_final_skip_cand_merge_idx,UWORD8 u1_max_num_part_types_to_select,UWORD8 u1_num_me_cands)120 static WORD32 ihevce_get_num_part_types_in_me_cand_list(
121     cu_inter_cand_t *ps_me_cand_list,
122     UWORD8 *pu1_part_type_ref_cand,
123     UWORD8 *pu1_idx_ref_cand,
124     UWORD8 *pu1_diff_skip_cand_flag,
125     WORD8 *pi1_skip_cand_from_merge_idx,
126     WORD8 *pi1_final_skip_cand_merge_idx,
127     UWORD8 u1_max_num_part_types_to_select,
128     UWORD8 u1_num_me_cands)
129 {
130     UWORD8 i, j;
131     UWORD8 u1_num_unique_parts = 0;
132 
133     for(i = 0; i < u1_num_me_cands; i++)
134     {
135         UWORD8 u1_cur_part_type = ps_me_cand_list[i].b3_part_size;
136         UWORD8 u1_is_unique = 1;
137 
138         if(u1_num_unique_parts >= u1_max_num_part_types_to_select)
139         {
140             return u1_num_unique_parts;
141         }
142 
143         /* loop to check if the current cand is already present in the list */
144         for(j = 0; j < u1_num_unique_parts; j++)
145         {
146             if(u1_cur_part_type == pu1_part_type_ref_cand[j])
147             {
148                 u1_is_unique = 0;
149                 break;
150             }
151         }
152 
153         if(u1_is_unique)
154         {
155             if(SIZE_2Nx2N == u1_cur_part_type)
156             {
157                 *pu1_diff_skip_cand_flag = 0;
158                 *pi1_skip_cand_from_merge_idx = u1_num_unique_parts;
159                 *pi1_final_skip_cand_merge_idx = u1_num_unique_parts;
160             }
161 
162             pu1_part_type_ref_cand[u1_num_unique_parts] = u1_cur_part_type;
163             pu1_idx_ref_cand[u1_num_unique_parts] = i;
164             u1_num_unique_parts++;
165         }
166     }
167 
168     return u1_num_unique_parts;
169 }
170 
ihevce_compute_inter_pred_and_cost(inter_pred_ctxt_t * ps_mc_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_func,pu_t * ps_pu,void * pv_src,void * pv_pred,WORD32 i4_src_stride,WORD32 i4_pred_stride,UWORD8 u1_compute_error,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)171 static WORD32 ihevce_compute_inter_pred_and_cost(
172     inter_pred_ctxt_t *ps_mc_ctxt,
173     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
174     PF_SAD_FXN_T pf_sad_func,
175     pu_t *ps_pu,
176     void *pv_src,
177     void *pv_pred,
178     WORD32 i4_src_stride,
179     WORD32 i4_pred_stride,
180     UWORD8 u1_compute_error,
181     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
182 {
183     IV_API_CALL_STATUS_T u1_is_valid_mv;
184     WORD32 i4_error;
185 
186     u1_is_valid_mv = pf_luma_inter_pred_pu(ps_mc_ctxt, ps_pu, pv_pred, i4_pred_stride, 0);
187 
188     if(u1_compute_error)
189     {
190         if(IV_SUCCESS == u1_is_valid_mv)
191         {
192             err_prms_t s_err_prms;
193 
194             s_err_prms.i4_blk_ht = (ps_pu->b4_ht + 1) << 2;
195             s_err_prms.i4_blk_wd = (ps_pu->b4_wd + 1) << 2;
196             s_err_prms.pu1_inp = (UWORD8 *)pv_src;
197             s_err_prms.pu2_inp = (UWORD16 *)pv_src;
198             s_err_prms.pu1_ref = (UWORD8 *)pv_pred;
199             s_err_prms.pu2_ref = (UWORD16 *)pv_pred;
200             s_err_prms.i4_inp_stride = i4_src_stride;
201             s_err_prms.i4_ref_stride = i4_pred_stride;
202             s_err_prms.pi4_sad_grid = &i4_error;
203 
204             s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
205 
206             pf_sad_func(&s_err_prms);
207         }
208         else
209         {
210             /* max 32 bit satd */
211             i4_error = INT_MAX;
212         }
213 
214         return i4_error;
215     }
216 
217     return INT_MAX;
218 }
219 
ihevce_determine_best_merge_pu(merge_prms_t * ps_prms,pu_t * ps_pu_merge,pu_t * ps_pu_me,void * pv_src,WORD32 i4_me_cand_cost,WORD32 i4_pred_buf_offset,UWORD8 u1_num_cands,UWORD8 u1_part_id,UWORD8 u1_force_pred_evaluation)220 static WORD32 ihevce_determine_best_merge_pu(
221     merge_prms_t *ps_prms,
222     pu_t *ps_pu_merge,
223     pu_t *ps_pu_me,
224     void *pv_src,
225     WORD32 i4_me_cand_cost,
226     WORD32 i4_pred_buf_offset,
227     UWORD8 u1_num_cands,
228     UWORD8 u1_part_id,
229     UWORD8 u1_force_pred_evaluation)
230 {
231     pu_t *ps_pu;
232 
233     INTER_CANDIDATE_ID_T e_cand_id;
234 
235     UWORD8 i;
236     UWORD8 u1_best_pred_mode;
237     WORD32 i4_mean;
238     UWORD32 u4_cur_variance, u4_best_variance;
239 
240     merge_cand_list_t *ps_list = ps_prms->ps_list;
241     inter_pred_ctxt_t *ps_mc_ctxt = ps_prms->ps_mc_ctxt;
242     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_prms->pf_luma_inter_pred_pu;
243     PF_SAD_FXN_T pf_sad_fxn = ps_prms->pf_sad_fxn;
244 
245     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
246         ps_prms->ps_cmn_utils_optimised_function_list;
247 
248     WORD32(*pai4_noise_term)[MAX_NUM_INTER_PARTS] = ps_prms->pai4_noise_term;
249     UWORD32(*pau4_pred_variance)[MAX_NUM_INTER_PARTS] = ps_prms->pau4_pred_variance;
250     WORD32 i4_alpha_stim_multiplier = ps_prms->i4_alpha_stim_multiplier;
251     UWORD32 *pu4_src_variance = ps_prms->pu4_src_variance;
252     UWORD8 u1_is_cu_noisy = ps_prms->u1_is_cu_noisy;
253     UWORD8 u1_is_hbd = ps_prms->u1_is_hbd;
254     UWORD8 *pu1_valid_merge_indices = ps_prms->au1_valid_merge_indices;
255     void **ppv_pred_buf_list = ps_prms->ppv_pred_buf_list;
256     UWORD8 *pu1_merge_pred_buf_array = ps_prms->pu1_merge_pred_buf_array;
257     UWORD8(*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS] = ps_prms->pau1_best_pred_buf_id;
258     UWORD8 u1_merge_idx_cabac_model = ps_prms->u1_merge_idx_cabac_model;
259     WORD32 i4_lambda = ps_prms->i4_lambda;
260     WORD32 i4_src_stride = ps_prms->i4_src_stride;
261     WORD32 i4_pred_stride = ps_prms->i4_pred_stride;
262     UWORD8 u1_max_cands = ps_prms->u1_max_cands;
263     UWORD8 u1_best_buf_id = pu1_merge_pred_buf_array[0];
264     UWORD8 u1_cur_buf_id = pu1_merge_pred_buf_array[1];
265     UWORD8 u1_best_cand_id = UCHAR_MAX;
266     WORD32 i4_best_cost = INT_MAX;
267     WORD32 i4_cur_noise_term = 0;
268     WORD32 i4_best_noise_term = 0;
269 
270     ps_pu = ps_pu_merge;
271     e_cand_id = MERGE_DERIVED;
272 
273     ASSERT(ps_pu->b1_merge_flag);
274 
275     for(i = 0; i < u1_num_cands; i++)
276     {
277         WORD32 i4_cur_cost;
278 
279         void *pv_pred = (UWORD8 *)ppv_pred_buf_list[u1_cur_buf_id] + i4_pred_buf_offset;
280         UWORD8 u1_is_pred_available = 0;
281 
282         if(!ps_prms->u1_use_merge_cand_from_top_row && ps_prms->pu1_is_top_used[i])
283         {
284             continue;
285         }
286 
287         ps_pu->mv = ps_list[i].mv;
288         ps_pu->b3_merge_idx = pu1_valid_merge_indices[i];
289 
290         /* set the prediction mode */
291         if(ps_list[i].u1_pred_flag_l0 && ps_list[i].u1_pred_flag_l1)
292         {
293             ps_pu->b2_pred_mode = PRED_BI;
294         }
295         else if(ps_list[i].u1_pred_flag_l0)
296         {
297             ps_pu->b2_pred_mode = PRED_L0;
298         }
299         else
300         {
301             ps_pu->b2_pred_mode = PRED_L1;
302         }
303 
304         /* 8x8 SMPs should not have bipred mode as per std */
305         {
306             WORD32 i4_part_wd, i4_part_ht;
307 
308             i4_part_wd = (ps_pu->b4_wd + 1) << 2;
309             i4_part_ht = (ps_pu->b4_ht + 1) << 2;
310 
311             if((PRED_BI == ps_pu->b2_pred_mode) && ((i4_part_wd + i4_part_ht) < 16))
312             {
313                 continue;
314             }
315         }
316 
317         if((!u1_force_pred_evaluation) &&
318            (ihevce_compare_pu_mv_t(
319                &ps_pu->mv, &ps_pu_me->mv, ps_pu->b2_pred_mode, ps_pu_me->b2_pred_mode)))
320         {
321             i4_cur_cost = i4_me_cand_cost;
322             u1_is_pred_available = 1;
323 
324             if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
325             {
326                 i4_cur_noise_term = pai4_noise_term[ME_OR_SKIP_DERIVED][u1_part_id];
327                 u4_cur_variance = pau4_pred_variance[ME_OR_SKIP_DERIVED][u1_part_id];
328             }
329         }
330         else
331         {
332             i4_cur_cost = ihevce_compute_inter_pred_and_cost(
333                 ps_mc_ctxt,
334                 pf_luma_inter_pred_pu,
335                 pf_sad_fxn,
336                 ps_pu,
337                 pv_src,
338                 pv_pred,
339                 i4_src_stride,
340                 i4_pred_stride,
341                 1,
342                 ps_cmn_utils_optimised_function_list);
343 
344             if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
345             {
346                 ihevce_calc_variance(
347                     pv_pred,
348                     i4_pred_stride,
349                     &i4_mean,
350                     &u4_cur_variance,
351                     (ps_pu->b4_ht + 1) << 2,
352                     (ps_pu->b4_wd + 1) << 2,
353                     u1_is_hbd,
354                     0);
355 
356                 i4_cur_noise_term = ihevce_compute_noise_term(
357                     i4_alpha_stim_multiplier, pu4_src_variance[u1_part_id], u4_cur_variance);
358 
359                 MULTIPLY_STIM_WITH_DISTORTION(
360                     i4_cur_cost, i4_cur_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
361             }
362         }
363 
364         if(i4_cur_cost < INT_MAX)
365         {
366             WORD32 i4_merge_idx_cost = 0;
367             COMPUTE_MERGE_IDX_COST(
368                 u1_merge_idx_cabac_model, i, u1_max_cands, i4_lambda, i4_merge_idx_cost);
369             i4_cur_cost += i4_merge_idx_cost;
370         }
371 
372         if(i4_cur_cost < i4_best_cost)
373         {
374             i4_best_cost = i4_cur_cost;
375 
376             if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
377             {
378                 i4_best_noise_term = i4_cur_noise_term;
379                 u4_best_variance = u4_cur_variance;
380             }
381 
382             u1_best_cand_id = i;
383             u1_best_pred_mode = ps_pu->b2_pred_mode;
384 
385             if(u1_is_pred_available)
386             {
387                 pau1_best_pred_buf_id[e_cand_id][u1_part_id] =
388                     pau1_best_pred_buf_id[ME_OR_SKIP_DERIVED][u1_part_id];
389             }
390             else
391             {
392                 SWAP(u1_best_buf_id, u1_cur_buf_id);
393                 pau1_best_pred_buf_id[e_cand_id][u1_part_id] = u1_best_buf_id;
394             }
395         }
396     }
397 
398     if(u1_best_cand_id != UCHAR_MAX)
399     {
400         ps_pu->mv = ps_list[u1_best_cand_id].mv;
401         ps_pu->b2_pred_mode = u1_best_pred_mode;
402         ps_pu->b3_merge_idx = pu1_valid_merge_indices[u1_best_cand_id];
403 
404         if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
405         {
406             pai4_noise_term[MERGE_DERIVED][u1_part_id] = i4_best_noise_term;
407             pau4_pred_variance[MERGE_DERIVED][u1_part_id] = u4_best_variance;
408         }
409     }
410 
411     return i4_best_cost;
412 }
413 
ihevce_merge_cand_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)414 static WORD8 ihevce_merge_cand_pred_buffer_preparation(
415     void **ppv_pred_buf_list,
416     cu_inter_cand_t *ps_cand,
417     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
418     WORD32 i4_pred_stride,
419     UWORD8 u1_cu_size,
420     UWORD8 u1_part_type,
421     UWORD8 u1_num_bytes_per_pel,
422     FT_COPY_2D *pf_copy_2d)
423 {
424     WORD32 i4_part_wd;
425     WORD32 i4_part_ht;
426     WORD32 i4_part_wd_pu2;
427     WORD32 i4_part_ht_pu2;
428     WORD32 i4_buf_offset;
429     UWORD8 *pu1_pred_src = NULL;
430     UWORD8 *pu1_pred_dst = NULL;
431     WORD8 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
432 
433     WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
434 
435     if((0 == u1_part_type) ||
436        (pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[MERGE_DERIVED][1]))
437     {
438         ps_cand->pu1_pred_data =
439             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
440         ps_cand->pu2_pred_data =
441             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
442         ps_cand->i4_pred_data_stride = i4_pred_stride;
443 
444         i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
445     }
446     else if(pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
447     {
448         i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
449         i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
450 
451         i4_buf_offset = 0;
452 
453         pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
454                        i4_buf_offset;
455         pu1_pred_dst =
456             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] + i4_buf_offset;
457 
458         pf_copy_2d(
459             pu1_pred_dst,
460             i4_stride,
461             pu1_pred_src,
462             i4_stride,
463             i4_part_wd * u1_num_bytes_per_pel,
464             i4_part_ht);
465 
466         ps_cand->pu1_pred_data =
467             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
468         ps_cand->pu2_pred_data =
469             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
470         ps_cand->i4_pred_data_stride = i4_pred_stride;
471 
472         i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
473     }
474     else if(pau1_final_pred_buf_id[MERGE_DERIVED][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
475     {
476         i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
477         i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
478 
479         i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
480                         (i4_part_wd < u1_cu_size) * i4_part_wd;
481 
482         i4_buf_offset *= u1_num_bytes_per_pel;
483 
484         i4_part_wd = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
485         i4_part_ht = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
486 
487         pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
488                        i4_buf_offset;
489         pu1_pred_dst =
490             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] + i4_buf_offset;
491 
492         pf_copy_2d(
493             pu1_pred_dst,
494             i4_stride,
495             pu1_pred_src,
496             i4_stride,
497             i4_part_wd * u1_num_bytes_per_pel,
498             i4_part_ht);
499 
500         ps_cand->pu1_pred_data =
501             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
502         ps_cand->pu2_pred_data =
503             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
504         ps_cand->i4_pred_data_stride = i4_pred_stride;
505 
506         i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
507     }
508     else
509     {
510         i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
511         i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
512 
513         i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
514         i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
515 
516         switch((PART_TYPE_T)u1_part_type)
517         {
518         case PRT_2NxN:
519         case PRT_Nx2N:
520         case PRT_2NxnU:
521         case PRT_nLx2N:
522         {
523             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
524             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
525 
526             ps_cand->pu1_pred_data =
527                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
528             ps_cand->pu2_pred_data =
529                 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
530 
531             i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
532 
533             break;
534         }
535         case PRT_nRx2N:
536         case PRT_2NxnD:
537         {
538             i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
539                             (i4_part_wd < u1_cu_size) * i4_part_wd;
540 
541             i4_buf_offset *= u1_num_bytes_per_pel;
542 
543             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
544                            i4_buf_offset;
545             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
546                            i4_buf_offset;
547 
548             i4_part_wd = i4_part_wd_pu2;
549             i4_part_ht = i4_part_ht_pu2;
550 
551             ps_cand->pu1_pred_data =
552                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
553             ps_cand->pu2_pred_data =
554                 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
555 
556             i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
557 
558             break;
559         }
560         default:
561         {
562             DBG_PRINTF("Invalid partition type %d\n", u1_part_type);
563             break;
564         }
565         }
566 
567         pf_copy_2d(
568             pu1_pred_dst,
569             i4_stride,
570             pu1_pred_src,
571             i4_stride,
572             i4_part_wd * u1_num_bytes_per_pel,
573             i4_part_ht);
574 
575         ps_cand->i4_pred_data_stride = i4_pred_stride;
576     }
577 
578     return i1_retval;
579 }
580 
ihevce_mixed_mode_cand_type1_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,WORD32 i4_pred_stride,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_type0_cand_is_valid,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)581 static WORD8 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
582     void **ppv_pred_buf_list,
583     cu_inter_cand_t *ps_cand,
584     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
585     UWORD8 *pu1_merge_pred_buf_idx_array,
586     WORD32 i4_pred_stride,
587     UWORD8 u1_me_pred_buf_id,
588     UWORD8 u1_merge_pred_buf_id,
589     UWORD8 u1_type0_cand_is_valid,
590     UWORD8 u1_cu_size,
591     UWORD8 u1_part_type,
592     UWORD8 u1_num_bytes_per_pel,
593     FT_COPY_2D *pf_copy_2d)
594 {
595     WORD32 i4_part_wd;
596     WORD32 i4_part_ht;
597     WORD32 i4_part_wd_pu2;
598     WORD32 i4_part_ht_pu2;
599     UWORD8 *pu1_pred_src = NULL;
600     UWORD8 *pu1_pred_dst = NULL;
601     WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
602 
603     WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
604 
605     ASSERT(0 != u1_part_type);
606 
607     i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
608     i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
609 
610     i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
611     i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
612 
613     if(pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
614     {
615         ps_cand->pu1_pred_data =
616             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
617         ps_cand->pu2_pred_data =
618             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
619         ps_cand->i4_pred_data_stride = i4_pred_stride;
620 
621         i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
622 
623         return i1_retval;
624     }
625     else
626     {
627         UWORD8 u1_bitfield = ((u1_merge_pred_buf_id == UCHAR_MAX) << 3) |
628                              ((u1_me_pred_buf_id == UCHAR_MAX) << 2) |
629                              ((!u1_type0_cand_is_valid) << 1) |
630                              (pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] ==
631                               pau1_final_pred_buf_id[MERGE_DERIVED][1]);
632 
633         WORD32 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
634                                (i4_part_wd < u1_cu_size) * i4_part_wd;
635 
636         i4_buf_offset *= u1_num_bytes_per_pel;
637 
638         switch(u1_bitfield)
639         {
640         case 15:
641         case 14:
642         case 6:
643         {
644             switch((PART_TYPE_T)u1_part_type)
645             {
646             case PRT_2NxN:
647             case PRT_Nx2N:
648             case PRT_2NxnU:
649             case PRT_nLx2N:
650             {
651                 pu1_pred_src =
652                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
653                 pu1_pred_dst =
654                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
655 
656                 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
657 
658                 break;
659             }
660             case PRT_nRx2N:
661             case PRT_2NxnD:
662             {
663                 pu1_pred_src =
664                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]] +
665                     i4_buf_offset;
666                 pu1_pred_dst =
667                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
668                     i4_buf_offset;
669 
670                 i4_part_wd = i4_part_wd_pu2;
671                 i4_part_ht = i4_part_ht_pu2;
672 
673                 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
674 
675                 break;
676             }
677             default:
678             {
679                 DBG_PRINTF("Invalid partition type %d\n", u1_part_type);
680                 break;
681             }
682             }
683 
684             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
685             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
686             ps_cand->i4_pred_data_stride = i4_pred_stride;
687 
688             pf_copy_2d(
689                 pu1_pred_dst,
690                 i4_stride,
691                 pu1_pred_src,
692                 i4_stride,
693                 i4_part_wd * u1_num_bytes_per_pel,
694                 i4_part_ht);
695 
696             break;
697         }
698         case 13:
699         case 9:
700         case 5:
701         {
702             UWORD8 i;
703 
704             for(i = 0; i < 3; i++)
705             {
706                 if((pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
707                    (pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
708                 {
709                     pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
710                                    i4_buf_offset;
711 
712                     i1_retval = pu1_merge_pred_buf_idx_array[i];
713 
714                     break;
715                 }
716             }
717 
718             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
719                            i4_buf_offset;
720 
721             pf_copy_2d(
722                 pu1_pred_dst,
723                 i4_stride,
724                 pu1_pred_src,
725                 i4_stride,
726                 i4_part_wd_pu2 * u1_num_bytes_per_pel,
727                 i4_part_ht_pu2);
728             /* Copy PU1 */
729             pu1_pred_src =
730                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
731             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
732 
733             pf_copy_2d(
734                 pu1_pred_dst,
735                 i4_stride,
736                 pu1_pred_src,
737                 i4_stride,
738                 i4_part_wd * u1_num_bytes_per_pel,
739                 i4_part_ht);
740 
741             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
742             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
743             ps_cand->i4_pred_data_stride = i4_pred_stride;
744 
745             break;
746         }
747         case 12:
748         case 10:
749         case 8:
750         case 4:
751         case 2:
752         case 0:
753         {
754             pu1_pred_src =
755                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
756             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
757 
758             i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
759 
760             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
761             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
762             ps_cand->i4_pred_data_stride = i4_pred_stride;
763 
764             pf_copy_2d(
765                 pu1_pred_dst,
766                 i4_stride,
767                 pu1_pred_src,
768                 i4_stride,
769                 i4_part_wd * u1_num_bytes_per_pel,
770                 i4_part_ht);
771 
772             break;
773         }
774         case 11:
775         {
776             pu1_pred_src =
777                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
778             pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
779 
780             i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
781 
782             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
783             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
784             ps_cand->i4_pred_data_stride = i4_pred_stride;
785 
786             pf_copy_2d(
787                 pu1_pred_dst,
788                 i4_stride,
789                 pu1_pred_src,
790                 i4_stride,
791                 i4_part_wd * u1_num_bytes_per_pel,
792                 i4_part_ht);
793 
794             break;
795         }
796         case 7:
797         {
798             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
799                            i4_buf_offset;
800             pu1_pred_dst =
801                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
802                 i4_buf_offset;
803 
804             i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
805 
806             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
807             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
808             ps_cand->i4_pred_data_stride = i4_pred_stride;
809 
810             pf_copy_2d(
811                 pu1_pred_dst,
812                 i4_stride,
813                 pu1_pred_src,
814                 i4_stride,
815                 i4_part_wd_pu2 * u1_num_bytes_per_pel,
816                 i4_part_ht_pu2);
817 
818             break;
819         }
820         case 3:
821         case 1:
822         {
823             if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][0]) &&
824                (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][1]))
825             {
826                 pu1_pred_src =
827                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
828                 pu1_pred_dst =
829                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
830 
831                 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
832 
833                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
834                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
835                 ps_cand->i4_pred_data_stride = i4_pred_stride;
836 
837                 pf_copy_2d(
838                     pu1_pred_dst,
839                     i4_stride,
840                     pu1_pred_src,
841                     i4_stride,
842                     i4_part_wd * u1_num_bytes_per_pel,
843                     i4_part_ht);
844             }
845             else
846             {
847                 UWORD8 i;
848 
849                 for(i = 0; i < 3; i++)
850                 {
851                     if((pu1_merge_pred_buf_idx_array[i] !=
852                         pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
853                        (pu1_merge_pred_buf_idx_array[i] !=
854                         pau1_final_pred_buf_id[MERGE_DERIVED][0]))
855                     {
856                         pu1_pred_dst =
857                             (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
858                             i4_buf_offset;
859 
860                         i1_retval = pu1_merge_pred_buf_idx_array[i];
861 
862                         break;
863                     }
864                 }
865 
866                 pu1_pred_src =
867                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
868                     i4_buf_offset;
869 
870                 pf_copy_2d(
871                     pu1_pred_dst,
872                     i4_stride,
873                     pu1_pred_src,
874                     i4_stride,
875                     i4_part_wd_pu2 * u1_num_bytes_per_pel,
876                     i4_part_ht_pu2);
877 
878                 /* Copy PU1 */
879                 pu1_pred_src =
880                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
881                 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
882 
883                 pf_copy_2d(
884                     pu1_pred_dst,
885                     i4_stride,
886                     pu1_pred_src,
887                     i4_stride,
888                     i4_part_wd * u1_num_bytes_per_pel,
889                     i4_part_ht);
890 
891                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
892                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
893                 ps_cand->i4_pred_data_stride = i4_pred_stride;
894 
895                 break;
896             }
897         }
898         }
899     }
900 
901     return i1_retval;
902 }
903 
ihevce_mixed_mode_cand_type0_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_mixed_tyep1_pred_buf_id,WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)904 static WORD8 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
905     void **ppv_pred_buf_list,
906     cu_inter_cand_t *ps_cand,
907     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
908     UWORD8 *pu1_merge_pred_buf_idx_array,
909     UWORD8 u1_me_pred_buf_id,
910     UWORD8 u1_merge_pred_buf_id,
911     UWORD8 u1_mixed_tyep1_pred_buf_id,
912     WORD32 i4_pred_stride,
913     UWORD8 u1_cu_size,
914     UWORD8 u1_part_type,
915     UWORD8 u1_num_bytes_per_pel,
916     FT_COPY_2D *pf_copy_2d)
917 {
918     WORD32 i4_part_wd;
919     WORD32 i4_part_ht;
920     WORD32 i4_part_wd_pu2;
921     WORD32 i4_part_ht_pu2;
922     WORD32 i4_buf_offset;
923     UWORD8 *pu1_pred_src = NULL;
924     UWORD8 *pu1_pred_dst = NULL;
925     WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
926 
927     WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
928 
929     ASSERT(0 != u1_part_type);
930 
931     i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
932     i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
933     i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
934     i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
935 
936     i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
937                     (i4_part_wd < u1_cu_size) * i4_part_wd;
938 
939     i4_buf_offset *= u1_num_bytes_per_pel;
940 
941     if(pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
942     {
943         ps_cand->pu1_pred_data =
944             (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
945         ps_cand->pu2_pred_data =
946             (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
947         ps_cand->i4_pred_data_stride = i4_pred_stride;
948 
949         i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
950     }
951     else
952     {
953         UWORD8 u1_bitfield =
954             ((u1_merge_pred_buf_id == UCHAR_MAX) << 2) | ((u1_me_pred_buf_id == UCHAR_MAX) << 1) |
955             (u1_mixed_tyep1_pred_buf_id != pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]);
956 
957         switch(u1_bitfield)
958         {
959         case 7:
960         {
961             switch((PART_TYPE_T)u1_part_type)
962             {
963             case PRT_2NxN:
964             case PRT_Nx2N:
965             case PRT_2NxnU:
966             case PRT_nLx2N:
967             {
968                 pu1_pred_src =
969                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
970                 pu1_pred_dst =
971                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
972 
973                 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][1];
974 
975                 break;
976             }
977             case PRT_nRx2N:
978             case PRT_2NxnD:
979             {
980                 pu1_pred_src =
981                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
982                     i4_buf_offset;
983                 pu1_pred_dst =
984                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
985                     i4_buf_offset;
986 
987                 i4_part_wd = i4_part_wd_pu2;
988                 i4_part_ht = i4_part_ht_pu2;
989 
990                 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
991 
992                 break;
993             }
994             default:
995             {
996                 DBG_PRINTF("Invalid partition type %d\n", u1_part_type);
997                 break;
998             }
999             }
1000 
1001             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1002             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1003             ps_cand->i4_pred_data_stride = i4_pred_stride;
1004 
1005             pf_copy_2d(
1006                 pu1_pred_dst,
1007                 i4_stride,
1008                 pu1_pred_src,
1009                 i4_stride,
1010                 i4_part_wd * u1_num_bytes_per_pel,
1011                 i4_part_ht);
1012 
1013             break;
1014         }
1015         case 6:
1016         case 5:
1017         case 4:
1018         {
1019             pu1_pred_src =
1020                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1021                 i4_buf_offset;
1022             pu1_pred_dst =
1023                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
1024                 i4_buf_offset;
1025 
1026             i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
1027 
1028             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1029             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1030             ps_cand->i4_pred_data_stride = i4_pred_stride;
1031 
1032             pf_copy_2d(
1033                 pu1_pred_dst,
1034                 i4_stride,
1035                 pu1_pred_src,
1036                 i4_stride,
1037                 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1038                 i4_part_ht_pu2);
1039             break;
1040         }
1041         case 3:
1042         {
1043             pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
1044             pu1_pred_dst =
1045                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
1046 
1047             i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
1048 
1049             ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1050             ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1051             ps_cand->i4_pred_data_stride = i4_pred_stride;
1052 
1053             pf_copy_2d(
1054                 pu1_pred_dst,
1055                 i4_stride,
1056                 pu1_pred_src,
1057                 i4_stride,
1058                 i4_part_wd * u1_num_bytes_per_pel,
1059                 i4_part_ht);
1060 
1061             break;
1062         }
1063         case 2:
1064         case 1:
1065         case 0:
1066         {
1067             if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
1068                (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
1069             {
1070                 pu1_pred_src =
1071                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1072                     i4_buf_offset;
1073                 pu1_pred_dst =
1074                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
1075                     i4_buf_offset;
1076 
1077                 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
1078 
1079                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1080                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1081                 ps_cand->i4_pred_data_stride = i4_pred_stride;
1082 
1083                 pf_copy_2d(
1084                     pu1_pred_dst,
1085                     i4_stride,
1086                     pu1_pred_src,
1087                     i4_stride,
1088                     i4_part_wd_pu2 * u1_num_bytes_per_pel,
1089                     i4_part_ht_pu2);
1090             }
1091             else
1092             {
1093                 UWORD8 i;
1094 
1095                 for(i = 0; i < 3; i++)
1096                 {
1097                     if((pu1_merge_pred_buf_idx_array[i] != u1_merge_pred_buf_id) &&
1098                        (pu1_merge_pred_buf_idx_array[i] != u1_mixed_tyep1_pred_buf_id))
1099                     {
1100                         pu1_pred_dst =
1101                             (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
1102                             i4_buf_offset;
1103 
1104                         i1_retval = pu1_merge_pred_buf_idx_array[i];
1105 
1106                         break;
1107                     }
1108                 }
1109 
1110                 pu1_pred_src =
1111                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1112                     i4_buf_offset;
1113 
1114                 pf_copy_2d(
1115                     pu1_pred_dst,
1116                     i4_stride,
1117                     pu1_pred_src,
1118                     i4_stride,
1119                     i4_part_wd_pu2 * u1_num_bytes_per_pel,
1120                     i4_part_ht_pu2);
1121 
1122                 /* Copy PU1 */
1123                 pu1_pred_src =
1124                     (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
1125                 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1126 
1127                 pf_copy_2d(
1128                     pu1_pred_dst,
1129                     i4_stride,
1130                     pu1_pred_src,
1131                     i4_stride,
1132                     i4_part_wd * u1_num_bytes_per_pel,
1133                     i4_part_ht);
1134 
1135                 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1136                 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1137                 ps_cand->i4_pred_data_stride = i4_pred_stride;
1138 
1139                 break;
1140             }
1141         }
1142         }
1143     }
1144 
1145     return i1_retval;
1146 }
1147 
ihevce_find_idx_of_worst_cost(UWORD32 * pu4_cost_array,UWORD8 u1_array_size)1148 static UWORD8 ihevce_find_idx_of_worst_cost(UWORD32 *pu4_cost_array, UWORD8 u1_array_size)
1149 {
1150     WORD32 i;
1151 
1152     UWORD8 u1_worst_cost_idx = 0;
1153 
1154     for(i = 1; i < u1_array_size; i++)
1155     {
1156         if(pu4_cost_array[i] > pu4_cost_array[u1_worst_cost_idx])
1157         {
1158             u1_worst_cost_idx = i;
1159         }
1160     }
1161 
1162     return u1_worst_cost_idx;
1163 }
1164 
ihevce_free_unused_buf_indices(UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_buf_id_in_use,UWORD8 * pu1_buf_id_to_free,UWORD8 u1_me_buf_id,UWORD8 u1_num_available_cands,UWORD8 u1_num_bufs_to_free,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_part_type)1165 static void ihevce_free_unused_buf_indices(
1166     UWORD32 *pu4_pred_buf_usage_indicator,
1167     UWORD8 *pu1_merge_pred_buf_idx_array,
1168     UWORD8 *pu1_buf_id_in_use,
1169     UWORD8 *pu1_buf_id_to_free,
1170     UWORD8 u1_me_buf_id,
1171     UWORD8 u1_num_available_cands,
1172     UWORD8 u1_num_bufs_to_free,
1173     UWORD8 u1_eval_merge,
1174     UWORD8 u1_eval_skip,
1175     UWORD8 u1_part_type)
1176 {
1177     UWORD8 i;
1178 
1179     if(u1_eval_skip)
1180     {
1181         if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[0])
1182         {
1183             ihevce_set_pred_buf_as_free(
1184                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1185         }
1186         else if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[1])
1187         {
1188             ihevce_set_pred_buf_as_free(
1189                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1190         }
1191         else
1192         {
1193             ihevce_set_pred_buf_as_free(
1194                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1195 
1196             ihevce_set_pred_buf_as_free(
1197                 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1198         }
1199 
1200         for(i = 0; i < u1_num_bufs_to_free; i++)
1201         {
1202             if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1203             {
1204                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1205             }
1206         }
1207     }
1208     else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == UCHAR_MAX))
1209     {
1210         ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, u1_me_buf_id);
1211 
1212         for(i = 0; i < u1_num_bufs_to_free; i++)
1213         {
1214             if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1215             {
1216                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1217             }
1218         }
1219     }
1220     else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] != UCHAR_MAX))
1221     {
1222         for(i = 0; i < u1_num_bufs_to_free; i++)
1223         {
1224             if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1225             {
1226                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1227             }
1228         }
1229     }
1230     else if((u1_eval_merge) && (0 == u1_part_type))
1231     {
1232         /* ME pred buf */
1233         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1234             u1_me_buf_id,
1235             pu1_buf_id_in_use,
1236             pu1_buf_id_to_free,
1237             4,
1238             u1_num_bufs_to_free,
1239             pu4_pred_buf_usage_indicator);
1240 
1241         /* Merge pred buf 0 */
1242         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1243             pu1_merge_pred_buf_idx_array[0],
1244             pu1_buf_id_in_use,
1245             pu1_buf_id_to_free,
1246             4,
1247             u1_num_bufs_to_free,
1248             pu4_pred_buf_usage_indicator);
1249 
1250         /* Merge pred buf 1 */
1251         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1252             pu1_merge_pred_buf_idx_array[1],
1253             pu1_buf_id_in_use,
1254             pu1_buf_id_to_free,
1255             4,
1256             u1_num_bufs_to_free,
1257             pu4_pred_buf_usage_indicator);
1258 
1259         for(i = 0; i < u1_num_bufs_to_free; i++)
1260         {
1261             if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1262                (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1263                (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1264             {
1265                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1266             }
1267         }
1268     }
1269     else if((u1_eval_merge) || (u1_eval_skip))
1270     {
1271         /* ME pred buf */
1272         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1273             u1_me_buf_id,
1274             pu1_buf_id_in_use,
1275             pu1_buf_id_to_free,
1276             4,
1277             u1_num_bufs_to_free,
1278             pu4_pred_buf_usage_indicator);
1279 
1280         /* Merge pred buf 0 */
1281         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1282             pu1_merge_pred_buf_idx_array[0],
1283             pu1_buf_id_in_use,
1284             pu1_buf_id_to_free,
1285             4,
1286             u1_num_bufs_to_free,
1287             pu4_pred_buf_usage_indicator);
1288 
1289         /* Merge pred buf 1 */
1290         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1291             pu1_merge_pred_buf_idx_array[1],
1292             pu1_buf_id_in_use,
1293             pu1_buf_id_to_free,
1294             4,
1295             u1_num_bufs_to_free,
1296             pu4_pred_buf_usage_indicator);
1297 
1298         /* Merge pred buf 2 */
1299         COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1300             pu1_merge_pred_buf_idx_array[2],
1301             pu1_buf_id_in_use,
1302             pu1_buf_id_to_free,
1303             4,
1304             u1_num_bufs_to_free,
1305             pu4_pred_buf_usage_indicator);
1306 
1307         for(i = 0; i < u1_num_bufs_to_free; i++)
1308         {
1309             if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1310                (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1311                (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1312             {
1313                 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1314             }
1315         }
1316     }
1317 }
1318 
ihevce_check_if_buf_can_be_freed(UWORD8 * pu1_pred_id_of_winners,UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,UWORD8 u1_num_cands_previously_added)1319 static UWORD8 ihevce_check_if_buf_can_be_freed(
1320     UWORD8 *pu1_pred_id_of_winners,
1321     UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,
1322     UWORD8 u1_num_cands_previously_added)
1323 {
1324     UWORD8 i;
1325 
1326     UWORD8 u1_num_trysts = 0;
1327 
1328     for(i = 0; i < u1_num_cands_previously_added; i++)
1329     {
1330         if(u1_idx_of_worst_cost_in_pred_buf_array == pu1_pred_id_of_winners[i])
1331         {
1332             u1_num_trysts++;
1333 
1334             if(u1_num_trysts > 1)
1335             {
1336                 return 0;
1337             }
1338         }
1339     }
1340 
1341     ASSERT(u1_num_trysts > 0);
1342 
1343     return 1;
1344 }
1345 
ihevce_get_worst_costs_and_indices(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,UWORD8 * pu1_worst_dst_cand_idx,UWORD8 u1_src_array_length,UWORD8 u1_num_cands_to_pick,UWORD8 u1_worst_cost_idx_in_dst_array)1346 static void ihevce_get_worst_costs_and_indices(
1347     UWORD32 *pu4_cost_src,
1348     UWORD32 *pu4_cost_dst,
1349     UWORD8 *pu1_worst_dst_cand_idx,
1350     UWORD8 u1_src_array_length,
1351     UWORD8 u1_num_cands_to_pick,
1352     UWORD8 u1_worst_cost_idx_in_dst_array)
1353 {
1354     WORD32 i;
1355 
1356     pu4_cost_dst[0] = pu4_cost_src[u1_worst_cost_idx_in_dst_array];
1357     pu4_cost_src[u1_worst_cost_idx_in_dst_array] = 0;
1358     pu1_worst_dst_cand_idx[0] = u1_worst_cost_idx_in_dst_array;
1359 
1360     for(i = 1; i < u1_num_cands_to_pick; i++)
1361     {
1362         pu1_worst_dst_cand_idx[i] =
1363             ihevce_find_idx_of_worst_cost(pu4_cost_src, u1_src_array_length);
1364 
1365         pu4_cost_dst[i] = pu4_cost_src[pu1_worst_dst_cand_idx[i]];
1366         pu4_cost_src[pu1_worst_dst_cand_idx[i]] = 0;
1367     }
1368 
1369     for(i = 0; i < u1_num_cands_to_pick; i++)
1370     {
1371         pu4_cost_src[pu1_worst_dst_cand_idx[i]] = pu4_cost_dst[i];
1372     }
1373 }
1374 
ihevce_select_cands_to_replace_previous_worst(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,INTER_CANDIDATE_ID_T * pe_cand_id,UWORD8 * pu1_cand_idx_in_dst_array,UWORD8 * pu1_buf_id_to_free,UWORD8 * pu1_pred_id_of_winners,UWORD8 * pu1_num_bufs_to_free,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_num_cands_previously_added,UWORD8 u1_num_available_cands,UWORD8 u1_worst_cost_idx_in_dst_array)1375 static UWORD8 ihevce_select_cands_to_replace_previous_worst(
1376     UWORD32 *pu4_cost_src,
1377     UWORD32 *pu4_cost_dst,
1378     INTER_CANDIDATE_ID_T *pe_cand_id,
1379     UWORD8 *pu1_cand_idx_in_dst_array,
1380     UWORD8 *pu1_buf_id_to_free,
1381     UWORD8 *pu1_pred_id_of_winners,
1382     UWORD8 *pu1_num_bufs_to_free,
1383     WORD32 i4_max_num_inter_rdopt_cands,
1384     UWORD8 u1_num_cands_previously_added,
1385     UWORD8 u1_num_available_cands,
1386     UWORD8 u1_worst_cost_idx_in_dst_array)
1387 {
1388     WORD32 i, j, k;
1389     UWORD32 au4_worst_dst_costs[4];
1390     UWORD8 au1_worst_dst_cand_idx[4];
1391 
1392     INTER_CANDIDATE_ID_T ae_default_cand_id[4] = {
1393         ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1394     };
1395 
1396     UWORD8 u1_num_cands_to_add_wo_comparisons =
1397         i4_max_num_inter_rdopt_cands - u1_num_cands_previously_added;
1398     UWORD8 u1_num_cands_to_add_after_comparisons =
1399         u1_num_available_cands - u1_num_cands_to_add_wo_comparisons;
1400     UWORD8 u1_num_cands_to_add = 0;
1401     UWORD8 au1_valid_src_cands[4] = { 0, 0, 0, 0 };
1402 
1403     ASSERT(u1_num_cands_to_add_after_comparisons >= 0);
1404 
1405     /* Sorting src costs */
1406     SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
1407         pu4_cost_src, pe_cand_id, u1_num_available_cands, INTER_CANDIDATE_ID_T);
1408 
1409     for(i = 0; i < u1_num_cands_to_add_wo_comparisons; i++)
1410     {
1411         pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] = u1_num_cands_previously_added + i;
1412         au1_valid_src_cands[pe_cand_id[i]] = 1;
1413     }
1414 
1415     if(u1_num_cands_previously_added)
1416     {
1417         WORD8 i1_last_index = 0;
1418 
1419         ihevce_get_worst_costs_and_indices(
1420             pu4_cost_dst,
1421             au4_worst_dst_costs,
1422             au1_worst_dst_cand_idx,
1423             u1_num_cands_previously_added,
1424             u1_num_cands_to_add_after_comparisons,
1425             u1_worst_cost_idx_in_dst_array);
1426 
1427         for(i = u1_num_available_cands - 1; i >= u1_num_cands_to_add_wo_comparisons; i--)
1428         {
1429             for(j = u1_num_cands_to_add_after_comparisons - 1; j >= i1_last_index; j--)
1430             {
1431                 if((pu4_cost_src[i] < au4_worst_dst_costs[j]))
1432                 {
1433                     if((i - u1_num_cands_to_add_wo_comparisons) <= j)
1434                     {
1435                         for(k = 0; k <= (i - u1_num_cands_to_add_wo_comparisons); k++)
1436                         {
1437                             pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] =
1438                                 au1_worst_dst_cand_idx[k];
1439                             au1_valid_src_cands[pe_cand_id[u1_num_cands_to_add_wo_comparisons + k]] =
1440                                 1;
1441 
1442                             if(1 == ihevce_check_if_buf_can_be_freed(
1443                                         pu1_pred_id_of_winners,
1444                                         pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]],
1445                                         u1_num_cands_previously_added))
1446                             {
1447                                 pu1_buf_id_to_free[(*pu1_num_bufs_to_free)++] =
1448                                     pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]];
1449                             }
1450                             else
1451                             {
1452                                 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]] = UCHAR_MAX;
1453                             }
1454                         }
1455 
1456                         i1_last_index = -1;
1457                     }
1458                     else
1459                     {
1460                         i1_last_index = j;
1461                     }
1462 
1463                     break;
1464                 }
1465             }
1466 
1467             if(-1 == i1_last_index)
1468             {
1469                 break;
1470             }
1471         }
1472     }
1473 
1474     for(i = 0, j = 0; i < u1_num_available_cands; i++)
1475     {
1476         if(au1_valid_src_cands[ae_default_cand_id[i]])
1477         {
1478             pe_cand_id[j++] = ae_default_cand_id[i];
1479         }
1480     }
1481 
1482     return u1_num_cands_to_add;
1483 }
1484 
ihevce_merge_cands_with_existing_best(inter_cu_mode_info_t * ps_mode_info,cu_inter_cand_t ** pps_cand_src,pu_mv_t (* pas_mvp_winner)[NUM_INTER_PU_PARTS],UWORD32 (* pau4_cost)[MAX_NUM_INTER_PARTS],void ** ppv_pred_buf_list,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_num_merge_cands,UWORD8 * pu1_num_skip_cands,UWORD8 * pu1_num_mixed_mode_type0_cands,UWORD8 * pu1_num_mixed_mode_type1_cands,UWORD8 * pu1_merge_pred_buf_idx_array,FT_COPY_2D * pf_copy_2d,WORD32 i4_pred_stride,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_num_bytes_per_pel)1485 static UWORD8 ihevce_merge_cands_with_existing_best(
1486     inter_cu_mode_info_t *ps_mode_info,
1487     cu_inter_cand_t **pps_cand_src,
1488     pu_mv_t (*pas_mvp_winner)[NUM_INTER_PU_PARTS],
1489     UWORD32 (*pau4_cost)[MAX_NUM_INTER_PARTS],
1490     void **ppv_pred_buf_list,
1491     UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
1492     UWORD32 *pu4_pred_buf_usage_indicator,
1493     UWORD8 *pu1_num_merge_cands,
1494     UWORD8 *pu1_num_skip_cands,
1495     UWORD8 *pu1_num_mixed_mode_type0_cands,
1496     UWORD8 *pu1_num_mixed_mode_type1_cands,
1497     UWORD8 *pu1_merge_pred_buf_idx_array,
1498 
1499     FT_COPY_2D *pf_copy_2d,
1500 
1501     WORD32 i4_pred_stride,
1502     WORD32 i4_max_num_inter_rdopt_cands,
1503     UWORD8 u1_cu_size,
1504     UWORD8 u1_part_type,
1505     UWORD8 u1_eval_merge,
1506     UWORD8 u1_eval_skip,
1507     UWORD8 u1_num_bytes_per_pel)
1508 {
1509     UWORD32 au4_cost_src[4];
1510     WORD32 i;
1511     WORD32 u1_num_available_cands;
1512     UWORD8 au1_buf_id_in_use[4];
1513     UWORD8 au1_buf_id_to_free[4];
1514     UWORD8 au1_cand_idx_in_dst_array[4];
1515 
1516     INTER_CANDIDATE_ID_T ae_cand_id[4] = {
1517         ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1518     };
1519 
1520     cu_inter_cand_t **pps_cand_dst = ps_mode_info->aps_cu_data;
1521 
1522     UWORD8 u1_num_cands_previously_added = ps_mode_info->u1_num_inter_cands;
1523     UWORD8 u1_worst_cost_idx = ps_mode_info->u1_idx_of_worst_cost_in_cost_array;
1524     UWORD8 u1_idx_of_worst_cost_in_pred_buf_array =
1525         ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array;
1526     UWORD32 *pu4_cost_dst = ps_mode_info->au4_cost;
1527     UWORD8 *pu1_pred_id_of_winners = ps_mode_info->au1_pred_buf_idx;
1528     UWORD8 u1_num_bufs_to_free = 0;
1529     UWORD8 u1_skip_or_merge_cand_is_valid = 0;
1530     UWORD8 u1_num_invalid_cands = 0;
1531 
1532     memset(au1_buf_id_in_use, UCHAR_MAX, sizeof(au1_buf_id_in_use));
1533 
1534     u1_num_available_cands = (u1_eval_merge) ? 2 + ((u1_part_type != 0) + 1) : 1;
1535 
1536     for(i = 0; i < u1_num_available_cands; i++)
1537     {
1538         WORD32 i4_idx = i - u1_num_invalid_cands;
1539 
1540         if(u1_part_type == 0)
1541         {
1542             au4_cost_src[i4_idx] = pau4_cost[ae_cand_id[i4_idx]][0];
1543         }
1544         else
1545         {
1546             au4_cost_src[i4_idx] =
1547                 pau4_cost[ae_cand_id[i4_idx]][0] + pau4_cost[ae_cand_id[i4_idx]][1];
1548         }
1549 
1550         if(au4_cost_src[i4_idx] >= INT_MAX)
1551         {
1552             memmove(
1553                 &ae_cand_id[i4_idx],
1554                 &ae_cand_id[i4_idx + 1],
1555                 sizeof(INTER_CANDIDATE_ID_T) * (u1_num_available_cands - i - 1));
1556 
1557             u1_num_invalid_cands++;
1558         }
1559     }
1560 
1561     u1_num_available_cands -= u1_num_invalid_cands;
1562 
1563     if((u1_num_cands_previously_added + u1_num_available_cands) > i4_max_num_inter_rdopt_cands)
1564     {
1565         u1_num_available_cands = ihevce_select_cands_to_replace_previous_worst(
1566             au4_cost_src,
1567             pu4_cost_dst,
1568             ae_cand_id,
1569             au1_cand_idx_in_dst_array,
1570             au1_buf_id_to_free,
1571             pu1_pred_id_of_winners,
1572             &u1_num_bufs_to_free,
1573             i4_max_num_inter_rdopt_cands,
1574             u1_num_cands_previously_added,
1575             u1_num_available_cands,
1576             u1_worst_cost_idx);
1577     }
1578     else
1579     {
1580         for(i = 0; i < u1_num_available_cands; i++)
1581         {
1582             au1_cand_idx_in_dst_array[i] = u1_num_cands_previously_added + i;
1583         }
1584     }
1585 
1586     for(i = 0; i < u1_num_available_cands; i++)
1587     {
1588         UWORD8 u1_dst_array_idx = au1_cand_idx_in_dst_array[i];
1589 
1590         if(u1_part_type == 0)
1591         {
1592             au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0];
1593         }
1594         else
1595         {
1596             au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0] + pau4_cost[ae_cand_id[i]][1];
1597         }
1598 
1599         pps_cand_dst[u1_dst_array_idx] = pps_cand_src[ae_cand_id[i]];
1600 
1601         /* Adding a skip candidate identical to the merge winner */
1602         if((u1_eval_merge) && (0 == u1_part_type) && (MIXED_MODE_TYPE1 == ae_cand_id[i]))
1603         {
1604             (*pu1_num_skip_cands)++;
1605 
1606             pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1607 
1608             if(u1_num_cands_previously_added >= i4_max_num_inter_rdopt_cands)
1609             {
1610                 u1_worst_cost_idx =
1611                     ihevce_find_idx_of_worst_cost(pu4_cost_dst, u1_num_cands_previously_added);
1612 
1613                 u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1614             }
1615             else
1616             {
1617                 u1_num_cands_previously_added++;
1618             }
1619 
1620             if(u1_skip_or_merge_cand_is_valid)
1621             {
1622                 pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1623                     (UWORD8 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1624                 pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1625                     (UWORD16 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1626                 pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1627 
1628                 au1_buf_id_in_use[MIXED_MODE_TYPE1] = au1_buf_id_in_use[MERGE_DERIVED];
1629                 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1630             }
1631             else
1632             {
1633                 u1_skip_or_merge_cand_is_valid = 1;
1634 
1635                 au1_buf_id_in_use[MIXED_MODE_TYPE1] = ihevce_merge_cand_pred_buffer_preparation(
1636                     ppv_pred_buf_list,
1637                     pps_cand_dst[u1_dst_array_idx],
1638                     pau1_final_pred_buf_id,
1639                     i4_pred_stride,
1640                     u1_cu_size,
1641                     u1_part_type,
1642                     u1_num_bytes_per_pel,
1643                     pf_copy_2d);
1644 
1645                 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1646             }
1647 
1648             continue;
1649         }
1650 
1651         if(u1_num_cands_previously_added < i4_max_num_inter_rdopt_cands)
1652         {
1653             if(u1_num_cands_previously_added)
1654             {
1655                 if(au4_cost_src[i] > pu4_cost_dst[u1_worst_cost_idx])
1656                 {
1657                     u1_worst_cost_idx = u1_num_cands_previously_added;
1658                 }
1659             }
1660 
1661             pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1662 
1663             u1_num_cands_previously_added++;
1664         }
1665         else
1666         {
1667             pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1668 
1669             u1_worst_cost_idx = ihevce_find_idx_of_worst_cost(
1670                 ps_mode_info->au4_cost, u1_num_cands_previously_added);
1671 
1672             u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1673         }
1674 
1675         switch(ae_cand_id[i])
1676         {
1677         case ME_OR_SKIP_DERIVED:
1678         {
1679             (*pu1_num_skip_cands) += u1_eval_skip;
1680 
1681             pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1682                 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1683             pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1684                 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1685             pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1686 
1687             if(u1_worst_cost_idx == u1_dst_array_idx)
1688             {
1689                 u1_idx_of_worst_cost_in_pred_buf_array =
1690                     pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1691             }
1692 
1693             u1_skip_or_merge_cand_is_valid = u1_eval_skip;
1694 
1695             au1_buf_id_in_use[ME_OR_SKIP_DERIVED] = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1696             pu1_pred_id_of_winners[u1_dst_array_idx] =
1697                 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1698 
1699             break;
1700         }
1701         case MERGE_DERIVED:
1702         {
1703             (*pu1_num_merge_cands)++;
1704 
1705             au1_buf_id_in_use[MERGE_DERIVED] = ihevce_merge_cand_pred_buffer_preparation(
1706                 ppv_pred_buf_list,
1707                 pps_cand_dst[u1_dst_array_idx],
1708                 pau1_final_pred_buf_id,
1709                 i4_pred_stride,
1710                 u1_cu_size,
1711                 u1_part_type,
1712                 u1_num_bytes_per_pel,
1713                 pf_copy_2d
1714 
1715             );
1716 
1717             pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1718 
1719             if(u1_worst_cost_idx == u1_dst_array_idx)
1720             {
1721                 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MERGE_DERIVED];
1722             }
1723 
1724             u1_skip_or_merge_cand_is_valid = 1;
1725 
1726             break;
1727         }
1728         case MIXED_MODE_TYPE1:
1729         {
1730             (*pu1_num_mixed_mode_type1_cands)++;
1731 
1732             au1_buf_id_in_use[MIXED_MODE_TYPE1] =
1733                 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
1734                     ppv_pred_buf_list,
1735                     pps_cand_dst[u1_dst_array_idx],
1736                     pau1_final_pred_buf_id,
1737                     pu1_merge_pred_buf_idx_array,
1738                     i4_pred_stride,
1739                     au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1740                     au1_buf_id_in_use[MERGE_DERIVED],
1741                     (u1_num_available_cands - i) > 1,
1742                     u1_cu_size,
1743                     u1_part_type,
1744                     u1_num_bytes_per_pel,
1745                     pf_copy_2d
1746 
1747                 );
1748 
1749             pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1750 
1751             if(u1_worst_cost_idx == u1_dst_array_idx)
1752             {
1753                 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1754             }
1755 
1756             break;
1757         }
1758         case MIXED_MODE_TYPE0:
1759         {
1760             (*pu1_num_mixed_mode_type0_cands)++;
1761 
1762             au1_buf_id_in_use[MIXED_MODE_TYPE0] =
1763                 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
1764                     ppv_pred_buf_list,
1765                     pps_cand_dst[u1_dst_array_idx],
1766                     pau1_final_pred_buf_id,
1767                     pu1_merge_pred_buf_idx_array,
1768                     au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1769                     au1_buf_id_in_use[MERGE_DERIVED],
1770                     au1_buf_id_in_use[MIXED_MODE_TYPE1],
1771                     i4_pred_stride,
1772                     u1_cu_size,
1773                     u1_part_type,
1774                     u1_num_bytes_per_pel,
1775                     pf_copy_2d);
1776 
1777             pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1778 
1779             if(u1_worst_cost_idx == u1_dst_array_idx)
1780             {
1781                 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1782             }
1783 
1784             break;
1785         }
1786         }
1787     }
1788 
1789     ihevce_free_unused_buf_indices(
1790         pu4_pred_buf_usage_indicator,
1791         pu1_merge_pred_buf_idx_array,
1792         au1_buf_id_in_use,
1793         au1_buf_id_to_free,
1794         pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0],
1795         u1_num_available_cands,
1796         u1_num_bufs_to_free,
1797         u1_eval_merge,
1798         u1_eval_skip,
1799         u1_part_type);
1800 
1801     ps_mode_info->u1_idx_of_worst_cost_in_cost_array = u1_worst_cost_idx;
1802     ps_mode_info->u1_num_inter_cands = u1_num_cands_previously_added;
1803     ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array = u1_idx_of_worst_cost_in_pred_buf_array;
1804 
1805     return u1_skip_or_merge_cand_is_valid;
1806 }
1807 
ihevce_prepare_cand_containers(ihevce_inter_cand_sifter_prms_t * ps_ctxt,cu_inter_cand_t ** pps_cands,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_me_pred_buf_idx,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)1808 static UWORD8 ihevce_prepare_cand_containers(
1809     ihevce_inter_cand_sifter_prms_t *ps_ctxt,
1810     cu_inter_cand_t **pps_cands,
1811     UWORD8 *pu1_merge_pred_buf_idx_array,
1812     UWORD8 *pu1_me_pred_buf_idx,
1813     UWORD8 u1_part_type,
1814     UWORD8 u1_me_cand_list_idx,
1815     UWORD8 u1_eval_merge,
1816     UWORD8 u1_eval_skip)
1817 {
1818     UWORD8 u1_num_bufs_currently_allocated;
1819 
1820     WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
1821     UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
1822     UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
1823     UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
1824     void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
1825 
1826     if(!u1_eval_merge)
1827     {
1828         if(u1_eval_skip)
1829         {
1830             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1831                 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1832 
1833             if(u1_num_bufs_currently_allocated < 2)
1834             {
1835                 return 0;
1836             }
1837 
1838             pps_cands[ME_OR_SKIP_DERIVED] =
1839                 &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1840                      [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
1841                       ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
1842 
1843             pps_cands[ME_OR_SKIP_DERIVED]->b1_skip_flag = 1;
1844             pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_mark = 1;
1845             pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize = 1;
1846             pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize_by2 = 1;
1847             pps_cands[ME_OR_SKIP_DERIVED]->b1_intra_has_won = 0;
1848             pps_cands[ME_OR_SKIP_DERIVED]->b3_part_size = 0;
1849             pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1850             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_intra_flag = 0;
1851             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_merge_flag = 1;
1852             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_x = u1_cu_pos_x >> 2;
1853             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_y = u1_cu_pos_y >> 2;
1854             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_wd = (u1_cu_size >> 2) - 1;
1855             pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_ht = (u1_cu_size >> 2) - 1;
1856 
1857             pps_cands[MERGE_DERIVED] = pps_cands[ME_OR_SKIP_DERIVED];
1858         }
1859         else
1860         {
1861             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1862                 pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1863 
1864             if(u1_num_bufs_currently_allocated < 1)
1865             {
1866                 return 0;
1867             }
1868 
1869             pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1870             pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1871             pps_cands[ME_OR_SKIP_DERIVED]->pu1_pred_data =
1872                 (UWORD8 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1873             pps_cands[ME_OR_SKIP_DERIVED]->pu2_pred_data =
1874                 (UWORD16 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1875         }
1876     }
1877     else
1878     {
1879         u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1880             pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1881 
1882         if(u1_num_bufs_currently_allocated < 1)
1883         {
1884             return 0;
1885         }
1886 
1887         pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1888 
1889         if(u1_part_type > 0)
1890         {
1891             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1892                 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 3);
1893 
1894             if(u1_num_bufs_currently_allocated < 3)
1895             {
1896                 return 0;
1897             }
1898 
1899             pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1900                                             [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1901 
1902             pps_cands[MIXED_MODE_TYPE0] =
1903                 &ps_ctxt->ps_mixed_modes_datastore
1904                      ->as_cu_data[ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands];
1905 
1906             pps_cands[MIXED_MODE_TYPE1] =
1907                 &ps_ctxt->ps_mixed_modes_datastore->as_cu_data
1908                      [MAX_NUM_MIXED_MODE_INTER_RDO_CANDS - 1 -
1909                       ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands];
1910 
1911             *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1912             *pps_cands[MIXED_MODE_TYPE0] = *pps_cands[ME_OR_SKIP_DERIVED];
1913             *pps_cands[MIXED_MODE_TYPE1] = *pps_cands[ME_OR_SKIP_DERIVED];
1914         }
1915         else
1916         {
1917             u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1918                 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1919 
1920             if(u1_num_bufs_currently_allocated < 2)
1921             {
1922                 return 0;
1923             }
1924 
1925             pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1926                                             [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1927 
1928             *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1929         }
1930 
1931         pps_cands[MERGE_DERIVED]->as_inter_pu[0].b1_merge_flag = 1;
1932         pps_cands[MERGE_DERIVED]->as_inter_pu[1].b1_merge_flag = 1;
1933     }
1934 
1935     return u1_num_bufs_currently_allocated;
1936 }
1937 
ihevce_merge_prms_init(merge_prms_t * ps_prms,merge_cand_list_t * ps_list,inter_pred_ctxt_t * ps_mc_ctxt,mv_pred_ctxt_t * ps_mv_pred_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_fxn,void ** ppv_pred_buf_list,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,UWORD8 * pu1_merge_pred_buf_array,UWORD8 (* pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_is_top_used,WORD32 (* pai4_noise_term)[MAX_NUM_INTER_PARTS],UWORD32 (* pau4_pred_variance)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_src_variance,WORD32 i4_alpha_stim_multiplier,WORD32 i4_src_stride,WORD32 i4_pred_stride,WORD32 i4_lambda,UWORD8 u1_is_cu_noisy,UWORD8 u1_is_hbd,UWORD8 u1_max_cands,UWORD8 u1_merge_idx_cabac_model,UWORD8 u1_use_merge_cand_from_top_row)1938 static __inline void ihevce_merge_prms_init(
1939     merge_prms_t *ps_prms,
1940     merge_cand_list_t *ps_list,
1941     inter_pred_ctxt_t *ps_mc_ctxt,
1942     mv_pred_ctxt_t *ps_mv_pred_ctxt,
1943     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
1944     PF_SAD_FXN_T pf_sad_fxn,
1945     void **ppv_pred_buf_list,
1946     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
1947     UWORD8 *pu1_merge_pred_buf_array,
1948     UWORD8 (*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],
1949     UWORD8 *pu1_is_top_used,
1950     WORD32 (*pai4_noise_term)[MAX_NUM_INTER_PARTS],
1951     UWORD32 (*pau4_pred_variance)[MAX_NUM_INTER_PARTS],
1952     UWORD32 *pu4_src_variance,
1953     WORD32 i4_alpha_stim_multiplier,
1954     WORD32 i4_src_stride,
1955     WORD32 i4_pred_stride,
1956     WORD32 i4_lambda,
1957     UWORD8 u1_is_cu_noisy,
1958     UWORD8 u1_is_hbd,
1959     UWORD8 u1_max_cands,
1960     UWORD8 u1_merge_idx_cabac_model,
1961     UWORD8 u1_use_merge_cand_from_top_row)
1962 {
1963     ps_prms->ps_list = ps_list;
1964     ps_prms->ps_mc_ctxt = ps_mc_ctxt;
1965     ps_prms->ps_mv_pred_ctxt = ps_mv_pred_ctxt;
1966     ps_prms->pf_luma_inter_pred_pu = pf_luma_inter_pred_pu;
1967     ps_prms->pf_sad_fxn = pf_sad_fxn;
1968     ps_prms->ppv_pred_buf_list = ppv_pred_buf_list;
1969     ps_prms->ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
1970 
1971     ps_prms->pu1_merge_pred_buf_array = pu1_merge_pred_buf_array;
1972     ps_prms->pau1_best_pred_buf_id = pau1_best_pred_buf_id;
1973     ps_prms->pu1_is_top_used = pu1_is_top_used;
1974     ps_prms->pai4_noise_term = pai4_noise_term;
1975     ps_prms->pau4_pred_variance = pau4_pred_variance;
1976     ps_prms->pu4_src_variance = pu4_src_variance;
1977     ps_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
1978     ps_prms->i4_src_stride = i4_src_stride;
1979     ps_prms->i4_pred_stride = i4_pred_stride;
1980     ps_prms->i4_lambda = i4_lambda;
1981     ps_prms->u1_is_cu_noisy = u1_is_cu_noisy;
1982     ps_prms->u1_is_hbd = u1_is_hbd;
1983     ps_prms->u1_max_cands = u1_max_cands;
1984     ps_prms->u1_merge_idx_cabac_model = u1_merge_idx_cabac_model;
1985     ps_prms->u1_use_merge_cand_from_top_row = u1_use_merge_cand_from_top_row;
1986 }
1987 
ihevce_merge_candidate_seive(nbr_avail_flags_t * ps_nbr,merge_cand_list_t * ps_merge_cand,UWORD8 * pu1_is_top_used,UWORD8 u1_num_merge_cands,UWORD8 u1_use_merge_cand_from_top_row)1988 static UWORD8 ihevce_merge_candidate_seive(
1989     nbr_avail_flags_t *ps_nbr,
1990     merge_cand_list_t *ps_merge_cand,
1991     UWORD8 *pu1_is_top_used,
1992     UWORD8 u1_num_merge_cands,
1993     UWORD8 u1_use_merge_cand_from_top_row)
1994 {
1995     if(!u1_use_merge_cand_from_top_row)
1996     {
1997         if(ps_nbr->u1_bot_lt_avail || ps_nbr->u1_left_avail)
1998         {
1999             return !pu1_is_top_used[0];
2000         }
2001         else
2002         {
2003             return 0;
2004         }
2005     }
2006     else
2007     {
2008         return u1_num_merge_cands;
2009     }
2010 }
2011 
ihevce_compute_pred_and_populate_modes(ihevce_inter_cand_sifter_prms_t * ps_ctxt,PF_SAD_FXN_T pf_sad_func,UWORD32 * pu4_src_variance,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)2012 static UWORD8 ihevce_compute_pred_and_populate_modes(
2013     ihevce_inter_cand_sifter_prms_t *ps_ctxt,
2014     PF_SAD_FXN_T pf_sad_func,
2015     UWORD32 *pu4_src_variance,
2016     UWORD8 u1_part_type,
2017     UWORD8 u1_me_cand_list_idx,
2018     UWORD8 u1_eval_merge,
2019     UWORD8 u1_eval_skip)
2020 {
2021     cu_inter_cand_t *aps_cands[4];
2022     pu_mv_t as_mvp_winner[4][NUM_INTER_PU_PARTS];
2023     merge_prms_t s_merge_prms;
2024     merge_cand_list_t as_merge_cand[MAX_NUM_MERGE_CAND];
2025 
2026     UWORD8 i, j;
2027     UWORD32 au4_cost[4][NUM_INTER_PU_PARTS];
2028     UWORD8 au1_final_pred_buf_id[4][NUM_INTER_PU_PARTS];
2029     UWORD8 au1_merge_pred_buf_idx_array[3];
2030     UWORD8 au1_is_top_used[MAX_NUM_MERGE_CAND];
2031     UWORD8 u1_me_pred_buf_idx;
2032     UWORD8 u1_num_bufs_currently_allocated;
2033     WORD32 i4_mean;
2034     UWORD32 au4_pred_variance[4][NUM_INTER_PU_PARTS];
2035     WORD32 ai4_noise_term[4][NUM_INTER_PU_PARTS];
2036 
2037     UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
2038     UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
2039 
2040     inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2041     inter_pred_ctxt_t *ps_mc_ctxt = ps_ctxt->ps_mc_ctxt;
2042     nbr_4x4_t *ps_cu_nbr_buf = ps_ctxt->aps_cu_nbr_buf[0];
2043     nbr_4x4_t *ps_pu_left_nbr = ps_ctxt->ps_left_nbr_4x4;
2044     nbr_4x4_t *ps_pu_top_nbr = ps_ctxt->ps_top_nbr_4x4;
2045     nbr_4x4_t *ps_pu_topleft_nbr = ps_ctxt->ps_topleft_nbr_4x4;
2046 
2047     ihevce_inter_pred_buf_data_t *ps_pred_buf_info = ps_ctxt->ps_pred_buf_data;
2048     mv_pred_ctxt_t *ps_mv_pred_ctxt = ps_ctxt->ps_mv_pred_ctxt;
2049 
2050     PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_ctxt->pf_luma_inter_pred_pu;
2051 
2052     void *pv_src = ps_ctxt->pv_src;
2053     WORD32 i4_src_stride = ps_ctxt->i4_src_strd;
2054     WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
2055     UWORD8 u1_num_parts = (u1_part_type != PRT_2Nx2N) + 1;
2056     UWORD8 u1_num_bytes_per_pel = ps_ctxt->u1_is_hbd + 1;
2057     void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
2058     UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
2059     WORD32 i4_nbr_4x4_left_stride = ps_ctxt->i4_nbr_4x4_left_strd;
2060     UWORD8 *pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
2061     WORD32 i4_nbr_map_stride = ps_ctxt->i4_ctb_nbr_map_stride;
2062     UWORD8 u1_max_merge_candidates = ps_ctxt->u1_max_merge_candidates;
2063     WORD32 i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
2064     WORD32 i4_pred_buf_offset = 0;
2065     WORD32 i4_src_buf_offset = 0;
2066     UWORD8 u1_single_mcl_flag =
2067         ((8 == u1_cu_size) && (ps_mv_pred_ctxt->i4_log2_parallel_merge_level_minus2 > 0));
2068     UWORD8 u1_skip_or_merge_cand_is_valid = 0;
2069     WORD32 i4_lambda_qf = ps_ctxt->i4_lambda_qf;
2070     UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2071 
2072     ASSERT(0 == (u1_eval_skip && u1_eval_merge));
2073     ASSERT(u1_me_cand_list_idx < ps_ctxt->u1_num_me_cands);
2074 
2075     /*
2076     Algorithm -
2077     1. Determine pred and satd for ME cand.
2078     2. Determine merge winner for PU1.
2079     3. Determine pred and satd for mixed_type0 cand.
2080     4. Determine merge winner for PU2 and hence derive pred and satd for merge cand.
2081     5. Determine merge winner for PU2 assuming ME cand as PU1 winner and hence derive
2082     pred and satd for mixed_type1 cand.
2083     6. Sort the 4 preceding costs and hence, the cand list.
2084     7. Merge the sorted lists with the final cand list.
2085 
2086     PS : 2 - 7 will be relevant only if u1_eval_merge = 1 and u1_eval_skip = 0
2087     PPS : 1 will not be relevant if u1_eval_skip = 1
2088     */
2089 
2090     /*
2091     Explanatory notes -
2092     1. Motion Vector Merge candidates and nbr's in all merge mode (RealD)
2093     2. Motion Vector Merge candidates and nbr's in mixed mode (AltD)
2094     */
2095 
2096     u1_num_bufs_currently_allocated = ihevce_prepare_cand_containers(
2097         ps_ctxt,
2098         aps_cands,
2099         au1_merge_pred_buf_idx_array,
2100         &u1_me_pred_buf_idx,
2101         u1_part_type,
2102         u1_me_cand_list_idx,
2103         u1_eval_merge,
2104         u1_eval_skip);
2105 
2106     if(0 == u1_num_bufs_currently_allocated)
2107     {
2108         return 0;
2109     }
2110 
2111     if((u1_eval_merge) || (u1_eval_skip))
2112     {
2113         ihevce_merge_prms_init(
2114             &s_merge_prms,
2115             as_merge_cand,
2116             ps_mc_ctxt,
2117             ps_mv_pred_ctxt,
2118             pf_luma_inter_pred_pu,
2119             pf_sad_func,
2120             ppv_pred_buf_list,
2121             ps_ctxt->ps_cmn_utils_optimised_function_list,
2122             au1_merge_pred_buf_idx_array,
2123             au1_final_pred_buf_id,
2124             au1_is_top_used,
2125             ai4_noise_term,
2126             au4_pred_variance,
2127             pu4_src_variance,
2128             ps_ctxt->i4_alpha_stim_multiplier,
2129             i4_src_stride,
2130             i4_pred_stride,
2131             i4_lambda_qf,
2132             u1_is_cu_noisy,
2133             ps_ctxt->u1_is_hbd,
2134             u1_max_merge_candidates,
2135             ps_ctxt->u1_merge_idx_cabac_model,
2136             ps_ctxt->u1_use_merge_cand_from_top_row);
2137     }
2138 
2139     for(i = 0; i < u1_num_parts; i++)
2140     {
2141         nbr_avail_flags_t s_nbr;
2142 
2143         UWORD8 u1_part_wd;
2144         UWORD8 u1_part_ht;
2145         UWORD8 u1_pu_pos_x_4x4;
2146         UWORD8 u1_pu_pos_y_4x4;
2147 
2148         pu_t *ps_pu = &aps_cands[MERGE_DERIVED]->as_inter_pu[i];
2149 
2150         PART_SIZE_E e_part_size = (PART_SIZE_E)aps_cands[ME_OR_SKIP_DERIVED]->b3_part_size;
2151 
2152         void *pv_pu_src = (UWORD8 *)pv_src + i4_src_buf_offset;
2153         UWORD8 u1_num_merge_cands = 0;
2154 
2155         u1_part_wd = (aps_cands[0]->as_inter_pu[i].b4_wd + 1) << 2;
2156         u1_part_ht = (aps_cands[0]->as_inter_pu[i].b4_ht + 1) << 2;
2157         u1_pu_pos_x_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_x;
2158         u1_pu_pos_y_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_y;
2159 
2160         /* Inter cand pred and satd */
2161         if(!u1_eval_skip)
2162         {
2163             void *pv_pu_pred = (UWORD8 *)ppv_pred_buf_list[u1_me_pred_buf_idx] + i4_pred_buf_offset;
2164 
2165             if(ps_ctxt->u1_reuse_me_sad)
2166             {
2167                 ihevce_compute_inter_pred_and_cost(
2168                     ps_mc_ctxt,
2169                     pf_luma_inter_pred_pu,
2170                     pf_sad_func,
2171                     &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2172                     pv_pu_src,
2173                     pv_pu_pred,
2174                     i4_src_stride,
2175                     i4_pred_stride,
2176                     0,
2177                     ps_ctxt->ps_cmn_utils_optimised_function_list);
2178 
2179                 au4_cost[ME_OR_SKIP_DERIVED][i] =
2180                     ps_ctxt->pai4_me_err_metric[u1_me_cand_list_idx][i];
2181             }
2182             else
2183             {
2184                 au4_cost[ME_OR_SKIP_DERIVED][i] = ihevce_compute_inter_pred_and_cost(
2185                     ps_mc_ctxt,
2186                     pf_luma_inter_pred_pu,
2187                     pf_sad_func,
2188                     &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2189                     pv_pu_src,
2190                     pv_pu_pred,
2191                     i4_src_stride,
2192                     i4_pred_stride,
2193                     1,
2194                     ps_ctxt->ps_cmn_utils_optimised_function_list);
2195             }
2196 
2197             au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] = u1_me_pred_buf_idx;
2198 
2199             if(u1_is_cu_noisy && ps_ctxt->i4_alpha_stim_multiplier)
2200             {
2201                 ihevce_calc_variance(
2202                     pv_pu_pred,
2203                     i4_pred_stride,
2204                     &i4_mean,
2205                     &au4_pred_variance[ME_OR_SKIP_DERIVED][i],
2206                     u1_part_ht,
2207                     u1_part_wd,
2208                     ps_ctxt->u1_is_hbd,
2209                     0);
2210 
2211                 ai4_noise_term[ME_OR_SKIP_DERIVED][i] = ihevce_compute_noise_term(
2212                     ps_ctxt->i4_alpha_stim_multiplier,
2213                     pu4_src_variance[i],
2214                     au4_pred_variance[ME_OR_SKIP_DERIVED][i]);
2215 
2216                 MULTIPLY_STIM_WITH_DISTORTION(
2217                     au4_cost[ME_OR_SKIP_DERIVED][i],
2218                     ai4_noise_term[ME_OR_SKIP_DERIVED][i],
2219                     STIM_Q_FORMAT,
2220                     ALPHA_Q_FORMAT);
2221             }
2222         }
2223 
2224         if(u1_eval_skip || u1_eval_merge)
2225         {
2226             pu_t s_pu, *ps_pu_merge;
2227 
2228             UWORD8 u1_is_any_top_available = 1;
2229             UWORD8 u1_are_valid_merge_cands_available = 1;
2230 
2231             /* get the neighbour availability flags */
2232             if((u1_num_parts > 1) && u1_single_mcl_flag)
2233             { /* 8x8 SMPs take the 2Nx2N neighbours */
2234                 ihevce_get_only_nbr_flag(
2235                     &s_nbr,
2236                     pu1_ctb_nbr_map,
2237                     i4_nbr_map_stride,
2238                     aps_cands[0]->as_inter_pu[0].b4_pos_x,
2239                     aps_cands[0]->as_inter_pu[0].b4_pos_y,
2240                     u1_cu_size >> 2,
2241                     u1_cu_size >> 2);
2242 
2243                 /* Make the PU width and height as 8 */
2244                 memcpy(&s_pu, ps_pu, sizeof(pu_t));
2245                 s_pu.b4_pos_x = u1_cu_pos_x >> 2;
2246                 s_pu.b4_pos_y = u1_cu_pos_y >> 2;
2247                 s_pu.b4_wd = (u1_cu_size >> 2) - 1;
2248                 s_pu.b4_ht = (u1_cu_size >> 2) - 1;
2249 
2250                 /* Give the local PU structure to MV merge */
2251                 ps_pu_merge = &s_pu;
2252             }
2253             else
2254             {
2255                 ihevce_get_only_nbr_flag(
2256                     &s_nbr,
2257                     pu1_ctb_nbr_map,
2258                     i4_nbr_map_stride,
2259                     u1_pu_pos_x_4x4,
2260                     u1_pu_pos_y_4x4,
2261                     u1_part_wd >> 2,
2262                     u1_part_ht >> 2);
2263 
2264                 u1_is_any_top_available = s_nbr.u1_top_avail || s_nbr.u1_top_rt_avail ||
2265                                           s_nbr.u1_top_lt_avail;
2266 
2267                 if(!ps_ctxt->u1_use_merge_cand_from_top_row)
2268                 {
2269                     if(u1_is_any_top_available)
2270                     {
2271                         if(s_nbr.u1_left_avail || s_nbr.u1_bot_lt_avail)
2272                         {
2273                             s_nbr.u1_top_avail = 0;
2274                             s_nbr.u1_top_rt_avail = 0;
2275                             s_nbr.u1_top_lt_avail = 0;
2276                         }
2277                         else
2278                         {
2279                             u1_are_valid_merge_cands_available = 0;
2280                         }
2281                     }
2282                 }
2283 
2284                 /* Actual PU passed to MV merge */
2285                 ps_pu_merge = ps_pu;
2286             }
2287             if(u1_are_valid_merge_cands_available)
2288             {
2289                 u1_num_merge_cands = ihevce_mv_pred_merge(
2290                     ps_mv_pred_ctxt,
2291                     ps_pu_top_nbr,
2292                     ps_pu_left_nbr,
2293                     ps_pu_topleft_nbr,
2294                     i4_nbr_4x4_left_stride,
2295                     &s_nbr,
2296                     NULL,
2297                     ps_pu_merge,
2298                     e_part_size,
2299                     i,
2300                     u1_single_mcl_flag,
2301                     as_merge_cand,
2302                     au1_is_top_used);
2303 
2304                 if(u1_num_merge_cands > u1_max_merge_candidates)
2305                 {
2306                     u1_num_merge_cands = u1_max_merge_candidates;
2307                 }
2308 
2309                 u1_num_merge_cands = ihevce_merge_candidate_seive(
2310                     &s_nbr,
2311                     as_merge_cand,
2312                     au1_is_top_used,
2313                     u1_num_merge_cands,
2314                     ps_ctxt->u1_use_merge_cand_from_top_row || !u1_is_any_top_available);
2315 
2316                 for(j = 0; j < u1_num_merge_cands; j++)
2317                 {
2318                     s_merge_prms.au1_valid_merge_indices[j] = j;
2319                 }
2320 
2321                 au4_cost[MERGE_DERIVED][i] = ihevce_determine_best_merge_pu(
2322                     &s_merge_prms,
2323                     &aps_cands[MERGE_DERIVED]->as_inter_pu[i],
2324                     &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2325                     pv_pu_src,
2326                     au4_cost[ME_OR_SKIP_DERIVED][i],
2327                     i4_pred_buf_offset,
2328                     u1_num_merge_cands,
2329                     i,
2330                     u1_eval_skip);
2331             }
2332             else
2333             {
2334                 au4_cost[MERGE_DERIVED][i] = INT_MAX;
2335             }
2336 
2337             au4_cost[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] = au4_cost[MERGE_DERIVED][i];
2338 
2339             if(u1_eval_skip)
2340             {
2341                 /* This statement ensures that the skip candidate is always added */
2342                 au4_cost[ME_OR_SKIP_DERIVED][i] =
2343                     (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST : INT_MAX;
2344                 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] =
2345                     au1_final_pred_buf_id[MERGE_DERIVED][i];
2346             }
2347             else
2348             {
2349                 au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2350                 au4_cost[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2351                     au4_cost[ME_OR_SKIP_DERIVED][i];
2352             }
2353 
2354             au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] =
2355                 au1_final_pred_buf_id[MERGE_DERIVED][i];
2356             au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2357                 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i];
2358         }
2359         else
2360         {
2361             au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2362         }
2363 
2364         if(!i && (u1_num_parts > 1) && u1_eval_merge)
2365         {
2366             ihevce_set_inter_nbr_map(
2367                 pu1_ctb_nbr_map,
2368                 i4_nbr_map_stride,
2369                 u1_pu_pos_x_4x4,
2370                 u1_pu_pos_y_4x4,
2371                 (u1_part_wd >> 2),
2372                 (u1_part_ht >> 2),
2373                 1);
2374             ihevce_populate_nbr_4x4_with_pu_data(
2375                 ps_cu_nbr_buf, &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i], u1_cu_size >> 2);
2376 
2377             if(u1_part_wd < u1_cu_size)
2378             {
2379                 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2380 
2381                 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2382                 {
2383                     ps_cu_nbr_buf += (u1_part_wd >> 2);
2384                     ps_pu_left_nbr = ps_cu_nbr_buf - 1;
2385                     ps_pu_top_nbr += (u1_part_wd >> 2);
2386                     ps_pu_topleft_nbr = ps_pu_top_nbr - 1;
2387 
2388                     i4_nbr_4x4_left_stride = (u1_cu_size >> 2);
2389                 }
2390             }
2391             else if(u1_part_ht < u1_cu_size)
2392             {
2393                 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2394                 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2395 
2396                 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2397                 {
2398                     ps_cu_nbr_buf += (u1_part_ht >> 2) * (u1_cu_size >> 2);
2399                     ps_pu_left_nbr += (u1_part_ht >> 2) * i4_nbr_4x4_left_stride;
2400                     ps_pu_top_nbr = ps_cu_nbr_buf - (u1_cu_size >> 2);
2401                     ps_pu_topleft_nbr = ps_pu_left_nbr - i4_nbr_4x4_left_stride;
2402                 }
2403             }
2404 
2405             i4_pred_buf_offset *= u1_num_bytes_per_pel;
2406             i4_src_buf_offset *= u1_num_bytes_per_pel;
2407 
2408             aps_cands[MIXED_MODE_TYPE0]->as_inter_pu[0] = aps_cands[MERGE_DERIVED]->as_inter_pu[0];
2409         }
2410         else if(!i && (u1_num_parts > 1) && (!u1_eval_merge))
2411         {
2412             if(u1_part_wd < u1_cu_size)
2413             {
2414                 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2415             }
2416             else if(u1_part_ht < u1_cu_size)
2417             {
2418                 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2419                 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2420             }
2421 
2422             i4_pred_buf_offset *= u1_num_bytes_per_pel;
2423             i4_src_buf_offset *= u1_num_bytes_per_pel;
2424         }
2425         else if(i && (u1_num_parts > 1) && u1_eval_merge)
2426         {
2427             aps_cands[MIXED_MODE_TYPE1]->as_inter_pu[1] = aps_cands[MERGE_DERIVED]->as_inter_pu[1];
2428         }
2429     }
2430 
2431     /* Adding a skip candidate */
2432     if((u1_eval_merge) && (0 == u1_part_type))
2433     {
2434         cu_inter_cand_t *ps_cand = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
2435                                         [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
2436                                          ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
2437 
2438         (*ps_cand) = (*aps_cands[MERGE_DERIVED]);
2439 
2440         ps_cand->b1_skip_flag = 1;
2441 
2442         aps_cands[MIXED_MODE_TYPE1] = ps_cand;
2443         au4_cost[MIXED_MODE_TYPE1][0] = (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST
2444                                                                                : INT_MAX;
2445     }
2446 
2447     /* Sort and populate */
2448     u1_skip_or_merge_cand_is_valid = ihevce_merge_cands_with_existing_best(
2449         ps_cu_mode_info,
2450         aps_cands,
2451         as_mvp_winner,
2452         au4_cost,
2453         ppv_pred_buf_list,
2454         au1_final_pred_buf_id,
2455         &ps_pred_buf_info->u4_is_buf_in_use,
2456         &ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands,
2457         &ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands,
2458         &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands,
2459         &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands,
2460         au1_merge_pred_buf_idx_array,
2461         ps_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d,
2462 
2463         i4_pred_stride,
2464         i4_max_num_inter_rdopt_cands,
2465         u1_cu_size,
2466         u1_part_type,
2467         u1_eval_merge,
2468         u1_eval_skip,
2469         u1_num_bytes_per_pel);
2470 
2471     return u1_skip_or_merge_cand_is_valid;
2472 }
2473 
ihevce_redundant_candidate_pruner(inter_cu_mode_info_t * ps_inter_cu_mode_info)2474 static __inline void ihevce_redundant_candidate_pruner(inter_cu_mode_info_t *ps_inter_cu_mode_info)
2475 {
2476     WORD8 i, j;
2477     WORD8 i1_num_merge_vs_mvds;
2478 
2479     UWORD8 au1_redundant_cand_indices[MAX_NUM_INTER_RDO_CANDS] = { 0 };
2480 
2481     for(i = 0; i < (ps_inter_cu_mode_info->u1_num_inter_cands - 1); i++)
2482     {
2483         if(au1_redundant_cand_indices[i] || ps_inter_cu_mode_info->aps_cu_data[i]->b1_skip_flag)
2484         {
2485             continue;
2486         }
2487 
2488         for(j = i + 1; j < ps_inter_cu_mode_info->u1_num_inter_cands; j++)
2489         {
2490             if(au1_redundant_cand_indices[j] || ps_inter_cu_mode_info->aps_cu_data[j]->b1_skip_flag)
2491             {
2492                 continue;
2493             }
2494 
2495             i1_num_merge_vs_mvds = 0;
2496 
2497             if(ps_inter_cu_mode_info->aps_cu_data[j]->b3_part_size ==
2498                ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2499             {
2500                 if(ihevce_compare_pu_mv_t(
2501                        &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->mv,
2502                        &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->mv,
2503                        ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b2_pred_mode,
2504                        ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b2_pred_mode))
2505                 {
2506                     i1_num_merge_vs_mvds +=
2507                         ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b1_merge_flag -
2508                         ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b1_merge_flag;
2509 
2510                     if(ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2511                     {
2512                         if(ihevce_compare_pu_mv_t(
2513                                &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].mv,
2514                                &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].mv,
2515                                ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b2_pred_mode,
2516                                ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b2_pred_mode))
2517                         {
2518                             i1_num_merge_vs_mvds +=
2519                                 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b1_merge_flag -
2520                                 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b1_merge_flag;
2521                         }
2522                     }
2523                 }
2524             }
2525 
2526             if(i1_num_merge_vs_mvds != 0)
2527             {
2528                 au1_redundant_cand_indices[(i1_num_merge_vs_mvds > 0) ? j : i] = 1;
2529             }
2530         }
2531     }
2532 
2533     for(i = 0; i < ps_inter_cu_mode_info->u1_num_inter_cands; i++)
2534     {
2535         if(au1_redundant_cand_indices[i])
2536         {
2537             memmove(
2538                 &ps_inter_cu_mode_info->aps_cu_data[i],
2539                 &ps_inter_cu_mode_info->aps_cu_data[i + 1],
2540                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2541                     sizeof(ps_inter_cu_mode_info->aps_cu_data[i]));
2542 
2543             memmove(
2544                 &ps_inter_cu_mode_info->au4_cost[i],
2545                 &ps_inter_cu_mode_info->au4_cost[i + 1],
2546                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2547                     sizeof(ps_inter_cu_mode_info->au4_cost[i]));
2548 
2549             memmove(
2550                 &ps_inter_cu_mode_info->au1_pred_buf_idx[i],
2551                 &ps_inter_cu_mode_info->au1_pred_buf_idx[i + 1],
2552                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2553                     sizeof(ps_inter_cu_mode_info->au1_pred_buf_idx[i]));
2554 
2555             memmove(
2556                 &au1_redundant_cand_indices[i],
2557                 &au1_redundant_cand_indices[i + 1],
2558                 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2559                     sizeof(au1_redundant_cand_indices[i]));
2560 
2561             ps_inter_cu_mode_info->u1_num_inter_cands--;
2562             i--;
2563         }
2564     }
2565 }
2566 
2567 /*!
2568 ******************************************************************************
2569 * \if Function name : ihevce_inter_cand_sifter \endif
2570 *
2571 * \brief
2572 *    Selects the best inter candidate modes amongst ME, merge,
2573 *    skip and mixed modes. Also computes corresponding preds
2574 *
2575 * \author
2576 *  Ittiam
2577 *
2578 *****************************************************************************
2579 */
ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t * ps_ctxt)2580 void ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t *ps_ctxt)
2581 {
2582     PF_SAD_FXN_T pf_sad_func;
2583 
2584     UWORD8 au1_final_cand_idx[MAX_INTER_CU_CANDIDATES];
2585     UWORD8 au1_part_types_evaluated[MAX_INTER_CU_CANDIDATES];
2586     UWORD8 u1_num_unique_parts;
2587     UWORD8 i, j;
2588     UWORD32 au4_src_variance[NUM_INTER_PU_PARTS];
2589     WORD32 i4_mean;
2590 
2591     cu_inter_cand_t *ps_me_cands = ps_ctxt->ps_me_cands;
2592     inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2593 
2594     UWORD8 u1_diff_skip_cand_flag = 1;
2595     WORD8 i1_skip_cand_from_merge_idx = -1;
2596     WORD8 i1_final_skip_cand_merge_idx = -1;
2597     UWORD8 u1_max_num_part_types_to_select = MAX_INTER_CU_CANDIDATES;
2598     UWORD8 u1_num_me_cands = ps_ctxt->u1_num_me_cands;
2599     UWORD8 u1_num_parts_evaluated_for_merge = 0;
2600     UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2601 
2602     if((ps_ctxt->u1_quality_preset >= IHEVCE_QUALITY_P3) && (ps_ctxt->i1_slice_type == BSLICE))
2603     {
2604         u1_max_num_part_types_to_select = 1;
2605     }
2606 
2607     {
2608         pf_sad_func = (ps_ctxt->u1_use_satd_for_merge_eval) ? compute_satd_8bit
2609                                                             : ps_ctxt->pf_evalsad_pt_npu_mxn_8bit;
2610     }
2611 
2612     u1_num_unique_parts = ihevce_get_num_part_types_in_me_cand_list(
2613         ps_me_cands,
2614         au1_part_types_evaluated,
2615         au1_final_cand_idx,
2616         &u1_diff_skip_cand_flag,
2617         &i1_skip_cand_from_merge_idx,
2618         &i1_final_skip_cand_merge_idx,
2619         u1_max_num_part_types_to_select,
2620         u1_num_me_cands);
2621 
2622     if((u1_num_me_cands + u1_diff_skip_cand_flag) && u1_is_cu_noisy &&
2623        ps_ctxt->i4_alpha_stim_multiplier)
2624     {
2625         ihevce_calc_variance(
2626             ps_ctxt->pv_src,
2627             ps_ctxt->i4_src_strd,
2628             &i4_mean,
2629             &ps_cu_mode_info->u4_src_variance,
2630             ps_ctxt->u1_cu_size,
2631             ps_ctxt->u1_cu_size,
2632             ps_ctxt->u1_is_hbd,
2633             0);
2634     }
2635 
2636     if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2637     {
2638         u1_diff_skip_cand_flag = 0;
2639     }
2640     else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2641     {
2642         if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2643         {
2644             u1_diff_skip_cand_flag = 0;
2645         }
2646     }
2647 
2648     for(i = 0; i < u1_num_me_cands + u1_diff_skip_cand_flag; i++)
2649     {
2650         UWORD8 u1_part_type;
2651         UWORD8 u1_eval_skip;
2652         UWORD8 u1_eval_merge;
2653         UWORD8 u1_valid_cand;
2654 
2655         if(i == u1_num_me_cands)
2656         {
2657             u1_eval_skip = 1;
2658             u1_eval_merge = 0;
2659             u1_part_type = 0;
2660         }
2661         else
2662         {
2663             u1_eval_skip = 0;
2664             u1_part_type = ps_me_cands[i].b3_part_size;
2665 
2666             if(u1_num_parts_evaluated_for_merge >= u1_num_unique_parts)
2667             {
2668                 u1_eval_merge = 0;
2669                 u1_num_parts_evaluated_for_merge = u1_num_unique_parts;
2670             }
2671             else
2672             {
2673                 u1_eval_merge = (i == au1_final_cand_idx[u1_num_parts_evaluated_for_merge]);
2674             }
2675 
2676             for(j = 0; (j < u1_num_parts_evaluated_for_merge) && (u1_eval_merge); j++)
2677             {
2678                 if(u1_part_type == au1_part_types_evaluated[j])
2679                 {
2680                     u1_eval_merge = 0;
2681                     break;
2682                 }
2683             }
2684         }
2685 
2686         if(u1_is_cu_noisy && u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2687         {
2688             void *pv_src = ps_ctxt->pv_src;
2689             UWORD8 u1_pu_wd = (ps_me_cands[i].as_inter_pu[0].b4_wd + 1) << 2;
2690             UWORD8 u1_pu_ht = (ps_me_cands[i].as_inter_pu[0].b4_ht + 1) << 2;
2691 
2692             ihevce_calc_variance(
2693                 pv_src,
2694                 ps_ctxt->i4_src_strd,
2695                 &i4_mean,
2696                 &au4_src_variance[0],
2697                 u1_pu_ht,
2698                 u1_pu_wd,
2699                 ps_ctxt->u1_is_hbd,
2700                 0);
2701 
2702             pv_src = (void *) (((UWORD8 *) pv_src) +
2703                 ((ps_ctxt->u1_cu_size == u1_pu_wd) ? ps_ctxt->i4_src_strd * u1_pu_ht : u1_pu_wd)
2704                 * (ps_ctxt->u1_is_hbd + 1));
2705             u1_pu_wd = (ps_me_cands[i].as_inter_pu[1].b4_wd + 1) << 2;
2706             u1_pu_ht = (ps_me_cands[i].as_inter_pu[1].b4_ht + 1) << 2;
2707 
2708             ihevce_calc_variance(
2709                 pv_src,
2710                 ps_ctxt->i4_src_strd,
2711                 &i4_mean,
2712                 &au4_src_variance[1],
2713                 u1_pu_ht,
2714                 u1_pu_wd,
2715                 ps_ctxt->u1_is_hbd,
2716                 0);
2717         }
2718         else if(u1_is_cu_noisy && !u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2719         {
2720             au4_src_variance[0] = ps_cu_mode_info->u4_src_variance;
2721         }
2722 
2723         if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2724         {
2725             u1_eval_merge = 0;
2726         }
2727         else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2728         {
2729             if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2730             {
2731                 u1_eval_merge = 0;
2732             }
2733         }
2734 
2735         u1_valid_cand = ihevce_compute_pred_and_populate_modes(
2736             ps_ctxt,
2737             pf_sad_func,
2738             au4_src_variance,
2739             u1_part_type,
2740             MIN(i, (u1_num_me_cands - 1)),
2741             u1_eval_merge,
2742             u1_eval_skip);
2743 
2744         u1_num_parts_evaluated_for_merge += u1_eval_merge;
2745 
2746         /* set the neighbour map to 0 */
2747         if(u1_part_type)
2748         {
2749             ihevce_set_nbr_map(
2750                 ps_ctxt->pu1_ctb_nbr_map,
2751                 ps_ctxt->i4_ctb_nbr_map_stride,
2752                 (ps_ctxt->u1_cu_pos_x >> 2),
2753                 (ps_ctxt->u1_cu_pos_y >> 2),
2754                 (ps_ctxt->u1_cu_size >> 2),
2755                 0);
2756         }
2757     }
2758 
2759     ihevce_redundant_candidate_pruner(ps_ctxt->ps_inter_cu_mode_info);
2760 }
2761