1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_inter_mode_sifter.c
24 *
25 * \brief
26 * This file contains functions for selecting best inter candidates for RDOPT evaluation
27 *
28 * \date
29 * 10/09/2014
30 *
31 ******************************************************************************
32 */
33
34 /*****************************************************************************/
35 /* File Includes */
36 /*****************************************************************************/
37 /* System include files */
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 #include <stdarg.h>
43 #include <math.h>
44 #include <limits.h>
45
46 /* User include files */
47 #include "ihevc_typedefs.h"
48 #include "itt_video_api.h"
49 #include "ihevce_api.h"
50
51 #include "rc_cntrl_param.h"
52 #include "rc_frame_info_collector.h"
53 #include "rc_look_ahead_params.h"
54
55 #include "ihevc_defs.h"
56 #include "ihevc_macros.h"
57 #include "ihevc_debug.h"
58 #include "ihevc_structs.h"
59 #include "ihevc_platform_macros.h"
60 #include "ihevc_deblk.h"
61 #include "ihevc_itrans_recon.h"
62 #include "ihevc_chroma_itrans_recon.h"
63 #include "ihevc_chroma_intra_pred.h"
64 #include "ihevc_intra_pred.h"
65 #include "ihevc_inter_pred.h"
66 #include "ihevc_mem_fns.h"
67 #include "ihevc_padding.h"
68 #include "ihevc_weighted_pred.h"
69 #include "ihevc_sao.h"
70 #include "ihevc_resi_trans.h"
71 #include "ihevc_quant_iquant_ssd.h"
72 #include "ihevc_cabac_tables.h"
73
74 #include "ihevce_defs.h"
75 #include "ihevce_hle_interface.h"
76 #include "ihevce_lap_enc_structs.h"
77 #include "ihevce_multi_thrd_structs.h"
78 #include "ihevce_multi_thrd_funcs.h"
79 #include "ihevce_me_common_defs.h"
80 #include "ihevce_had_satd.h"
81 #include "ihevce_error_codes.h"
82 #include "ihevce_bitstream.h"
83 #include "ihevce_cabac.h"
84 #include "ihevce_rdoq_macros.h"
85 #include "ihevce_function_selector.h"
86 #include "ihevce_enc_structs.h"
87 #include "ihevce_entropy_structs.h"
88 #include "ihevce_cmn_utils_instr_set_router.h"
89 #include "ihevce_ipe_instr_set_router.h"
90 #include "ihevce_decomp_pre_intra_structs.h"
91 #include "ihevce_decomp_pre_intra_pass.h"
92 #include "ihevce_enc_loop_structs.h"
93 #include "ihevce_global_tables.h"
94 #include "ihevce_nbr_avail.h"
95 #include "ihevce_enc_loop_utils.h"
96 #include "ihevce_bs_compute_ctb.h"
97 #include "ihevce_cabac_rdo.h"
98 #include "ihevce_dep_mngr_interface.h"
99 #include "ihevce_enc_loop_pass.h"
100 #include "ihevce_rc_enc_structs.h"
101 #include "ihevce_common_utils.h"
102 #include "ihevce_stasino_helpers.h"
103
104 #include "hme_datatype.h"
105 #include "hme_common_defs.h"
106 #include "hme_common_utils.h"
107 #include "hme_interface.h"
108 #include "hme_defs.h"
109 #include "ihevce_me_instr_set_router.h"
110 #include "hme_err_compute.h"
111 #include "hme_globals.h"
112 #include "ihevce_mv_pred.h"
113 #include "ihevce_mv_pred_merge.h"
114 #include "ihevce_inter_pred.h"
115 #include "ihevce_enc_loop_inter_mode_sifter.h"
116
117 /*****************************************************************************/
118 /* Function Definitions */
119 /*****************************************************************************/
ihevce_get_num_part_types_in_me_cand_list(cu_inter_cand_t * ps_me_cand_list,UWORD8 * pu1_part_type_ref_cand,UWORD8 * pu1_idx_ref_cand,UWORD8 * pu1_diff_skip_cand_flag,WORD8 * pi1_skip_cand_from_merge_idx,WORD8 * pi1_final_skip_cand_merge_idx,UWORD8 u1_max_num_part_types_to_select,UWORD8 u1_num_me_cands)120 static WORD32 ihevce_get_num_part_types_in_me_cand_list(
121 cu_inter_cand_t *ps_me_cand_list,
122 UWORD8 *pu1_part_type_ref_cand,
123 UWORD8 *pu1_idx_ref_cand,
124 UWORD8 *pu1_diff_skip_cand_flag,
125 WORD8 *pi1_skip_cand_from_merge_idx,
126 WORD8 *pi1_final_skip_cand_merge_idx,
127 UWORD8 u1_max_num_part_types_to_select,
128 UWORD8 u1_num_me_cands)
129 {
130 UWORD8 i, j;
131 UWORD8 u1_num_unique_parts = 0;
132
133 for(i = 0; i < u1_num_me_cands; i++)
134 {
135 UWORD8 u1_cur_part_type = ps_me_cand_list[i].b3_part_size;
136 UWORD8 u1_is_unique = 1;
137
138 if(u1_num_unique_parts >= u1_max_num_part_types_to_select)
139 {
140 return u1_num_unique_parts;
141 }
142
143 /* loop to check if the current cand is already present in the list */
144 for(j = 0; j < u1_num_unique_parts; j++)
145 {
146 if(u1_cur_part_type == pu1_part_type_ref_cand[j])
147 {
148 u1_is_unique = 0;
149 break;
150 }
151 }
152
153 if(u1_is_unique)
154 {
155 if(SIZE_2Nx2N == u1_cur_part_type)
156 {
157 *pu1_diff_skip_cand_flag = 0;
158 *pi1_skip_cand_from_merge_idx = u1_num_unique_parts;
159 *pi1_final_skip_cand_merge_idx = u1_num_unique_parts;
160 }
161
162 pu1_part_type_ref_cand[u1_num_unique_parts] = u1_cur_part_type;
163 pu1_idx_ref_cand[u1_num_unique_parts] = i;
164 u1_num_unique_parts++;
165 }
166 }
167
168 return u1_num_unique_parts;
169 }
170
ihevce_compute_inter_pred_and_cost(inter_pred_ctxt_t * ps_mc_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_func,pu_t * ps_pu,void * pv_src,void * pv_pred,WORD32 i4_src_stride,WORD32 i4_pred_stride,UWORD8 u1_compute_error,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)171 static WORD32 ihevce_compute_inter_pred_and_cost(
172 inter_pred_ctxt_t *ps_mc_ctxt,
173 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
174 PF_SAD_FXN_T pf_sad_func,
175 pu_t *ps_pu,
176 void *pv_src,
177 void *pv_pred,
178 WORD32 i4_src_stride,
179 WORD32 i4_pred_stride,
180 UWORD8 u1_compute_error,
181 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
182 {
183 IV_API_CALL_STATUS_T u1_is_valid_mv;
184 WORD32 i4_error;
185
186 u1_is_valid_mv = pf_luma_inter_pred_pu(ps_mc_ctxt, ps_pu, pv_pred, i4_pred_stride, 0);
187
188 if(u1_compute_error)
189 {
190 if(IV_SUCCESS == u1_is_valid_mv)
191 {
192 err_prms_t s_err_prms;
193
194 s_err_prms.i4_blk_ht = (ps_pu->b4_ht + 1) << 2;
195 s_err_prms.i4_blk_wd = (ps_pu->b4_wd + 1) << 2;
196 s_err_prms.pu1_inp = (UWORD8 *)pv_src;
197 s_err_prms.pu2_inp = (UWORD16 *)pv_src;
198 s_err_prms.pu1_ref = (UWORD8 *)pv_pred;
199 s_err_prms.pu2_ref = (UWORD16 *)pv_pred;
200 s_err_prms.i4_inp_stride = i4_src_stride;
201 s_err_prms.i4_ref_stride = i4_pred_stride;
202 s_err_prms.pi4_sad_grid = &i4_error;
203
204 s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
205
206 pf_sad_func(&s_err_prms);
207 }
208 else
209 {
210 /* max 32 bit satd */
211 i4_error = INT_MAX;
212 }
213
214 return i4_error;
215 }
216
217 return INT_MAX;
218 }
219
ihevce_determine_best_merge_pu(merge_prms_t * ps_prms,pu_t * ps_pu_merge,pu_t * ps_pu_me,void * pv_src,WORD32 i4_me_cand_cost,WORD32 i4_pred_buf_offset,UWORD8 u1_num_cands,UWORD8 u1_part_id,UWORD8 u1_force_pred_evaluation)220 static WORD32 ihevce_determine_best_merge_pu(
221 merge_prms_t *ps_prms,
222 pu_t *ps_pu_merge,
223 pu_t *ps_pu_me,
224 void *pv_src,
225 WORD32 i4_me_cand_cost,
226 WORD32 i4_pred_buf_offset,
227 UWORD8 u1_num_cands,
228 UWORD8 u1_part_id,
229 UWORD8 u1_force_pred_evaluation)
230 {
231 pu_t *ps_pu;
232
233 INTER_CANDIDATE_ID_T e_cand_id;
234
235 UWORD8 i;
236 UWORD8 u1_best_pred_mode;
237 WORD32 i4_mean;
238 UWORD32 u4_cur_variance, u4_best_variance;
239
240 merge_cand_list_t *ps_list = ps_prms->ps_list;
241 inter_pred_ctxt_t *ps_mc_ctxt = ps_prms->ps_mc_ctxt;
242 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_prms->pf_luma_inter_pred_pu;
243 PF_SAD_FXN_T pf_sad_fxn = ps_prms->pf_sad_fxn;
244
245 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
246 ps_prms->ps_cmn_utils_optimised_function_list;
247
248 WORD32(*pai4_noise_term)[MAX_NUM_INTER_PARTS] = ps_prms->pai4_noise_term;
249 UWORD32(*pau4_pred_variance)[MAX_NUM_INTER_PARTS] = ps_prms->pau4_pred_variance;
250 WORD32 i4_alpha_stim_multiplier = ps_prms->i4_alpha_stim_multiplier;
251 UWORD32 *pu4_src_variance = ps_prms->pu4_src_variance;
252 UWORD8 u1_is_cu_noisy = ps_prms->u1_is_cu_noisy;
253 UWORD8 u1_is_hbd = ps_prms->u1_is_hbd;
254 UWORD8 *pu1_valid_merge_indices = ps_prms->au1_valid_merge_indices;
255 void **ppv_pred_buf_list = ps_prms->ppv_pred_buf_list;
256 UWORD8 *pu1_merge_pred_buf_array = ps_prms->pu1_merge_pred_buf_array;
257 UWORD8(*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS] = ps_prms->pau1_best_pred_buf_id;
258 UWORD8 u1_merge_idx_cabac_model = ps_prms->u1_merge_idx_cabac_model;
259 WORD32 i4_lambda = ps_prms->i4_lambda;
260 WORD32 i4_src_stride = ps_prms->i4_src_stride;
261 WORD32 i4_pred_stride = ps_prms->i4_pred_stride;
262 UWORD8 u1_max_cands = ps_prms->u1_max_cands;
263 UWORD8 u1_best_buf_id = pu1_merge_pred_buf_array[0];
264 UWORD8 u1_cur_buf_id = pu1_merge_pred_buf_array[1];
265 UWORD8 u1_best_cand_id = UCHAR_MAX;
266 WORD32 i4_best_cost = INT_MAX;
267 WORD32 i4_cur_noise_term = 0;
268 WORD32 i4_best_noise_term = 0;
269
270 ps_pu = ps_pu_merge;
271 e_cand_id = MERGE_DERIVED;
272
273 ASSERT(ps_pu->b1_merge_flag);
274
275 for(i = 0; i < u1_num_cands; i++)
276 {
277 WORD32 i4_cur_cost;
278
279 void *pv_pred = (UWORD8 *)ppv_pred_buf_list[u1_cur_buf_id] + i4_pred_buf_offset;
280 UWORD8 u1_is_pred_available = 0;
281
282 if(!ps_prms->u1_use_merge_cand_from_top_row && ps_prms->pu1_is_top_used[i])
283 {
284 continue;
285 }
286
287 ps_pu->mv = ps_list[i].mv;
288 ps_pu->b3_merge_idx = pu1_valid_merge_indices[i];
289
290 /* set the prediction mode */
291 if(ps_list[i].u1_pred_flag_l0 && ps_list[i].u1_pred_flag_l1)
292 {
293 ps_pu->b2_pred_mode = PRED_BI;
294 }
295 else if(ps_list[i].u1_pred_flag_l0)
296 {
297 ps_pu->b2_pred_mode = PRED_L0;
298 }
299 else
300 {
301 ps_pu->b2_pred_mode = PRED_L1;
302 }
303
304 /* 8x8 SMPs should not have bipred mode as per std */
305 {
306 WORD32 i4_part_wd, i4_part_ht;
307
308 i4_part_wd = (ps_pu->b4_wd + 1) << 2;
309 i4_part_ht = (ps_pu->b4_ht + 1) << 2;
310
311 if((PRED_BI == ps_pu->b2_pred_mode) && ((i4_part_wd + i4_part_ht) < 16))
312 {
313 continue;
314 }
315 }
316
317 if((!u1_force_pred_evaluation) &&
318 (ihevce_compare_pu_mv_t(
319 &ps_pu->mv, &ps_pu_me->mv, ps_pu->b2_pred_mode, ps_pu_me->b2_pred_mode)))
320 {
321 i4_cur_cost = i4_me_cand_cost;
322 u1_is_pred_available = 1;
323
324 if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
325 {
326 i4_cur_noise_term = pai4_noise_term[ME_OR_SKIP_DERIVED][u1_part_id];
327 u4_cur_variance = pau4_pred_variance[ME_OR_SKIP_DERIVED][u1_part_id];
328 }
329 }
330 else
331 {
332 i4_cur_cost = ihevce_compute_inter_pred_and_cost(
333 ps_mc_ctxt,
334 pf_luma_inter_pred_pu,
335 pf_sad_fxn,
336 ps_pu,
337 pv_src,
338 pv_pred,
339 i4_src_stride,
340 i4_pred_stride,
341 1,
342 ps_cmn_utils_optimised_function_list);
343
344 if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
345 {
346 ihevce_calc_variance(
347 pv_pred,
348 i4_pred_stride,
349 &i4_mean,
350 &u4_cur_variance,
351 (ps_pu->b4_ht + 1) << 2,
352 (ps_pu->b4_wd + 1) << 2,
353 u1_is_hbd,
354 0);
355
356 i4_cur_noise_term = ihevce_compute_noise_term(
357 i4_alpha_stim_multiplier, pu4_src_variance[u1_part_id], u4_cur_variance);
358
359 MULTIPLY_STIM_WITH_DISTORTION(
360 i4_cur_cost, i4_cur_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
361 }
362 }
363
364 if(i4_cur_cost < INT_MAX)
365 {
366 WORD32 i4_merge_idx_cost = 0;
367 COMPUTE_MERGE_IDX_COST(
368 u1_merge_idx_cabac_model, i, u1_max_cands, i4_lambda, i4_merge_idx_cost);
369 i4_cur_cost += i4_merge_idx_cost;
370 }
371
372 if(i4_cur_cost < i4_best_cost)
373 {
374 i4_best_cost = i4_cur_cost;
375
376 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
377 {
378 i4_best_noise_term = i4_cur_noise_term;
379 u4_best_variance = u4_cur_variance;
380 }
381
382 u1_best_cand_id = i;
383 u1_best_pred_mode = ps_pu->b2_pred_mode;
384
385 if(u1_is_pred_available)
386 {
387 pau1_best_pred_buf_id[e_cand_id][u1_part_id] =
388 pau1_best_pred_buf_id[ME_OR_SKIP_DERIVED][u1_part_id];
389 }
390 else
391 {
392 SWAP(u1_best_buf_id, u1_cur_buf_id);
393 pau1_best_pred_buf_id[e_cand_id][u1_part_id] = u1_best_buf_id;
394 }
395 }
396 }
397
398 if(u1_best_cand_id != UCHAR_MAX)
399 {
400 ps_pu->mv = ps_list[u1_best_cand_id].mv;
401 ps_pu->b2_pred_mode = u1_best_pred_mode;
402 ps_pu->b3_merge_idx = pu1_valid_merge_indices[u1_best_cand_id];
403
404 if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
405 {
406 pai4_noise_term[MERGE_DERIVED][u1_part_id] = i4_best_noise_term;
407 pau4_pred_variance[MERGE_DERIVED][u1_part_id] = u4_best_variance;
408 }
409 }
410
411 return i4_best_cost;
412 }
413
ihevce_merge_cand_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)414 static WORD8 ihevce_merge_cand_pred_buffer_preparation(
415 void **ppv_pred_buf_list,
416 cu_inter_cand_t *ps_cand,
417 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
418 WORD32 i4_pred_stride,
419 UWORD8 u1_cu_size,
420 UWORD8 u1_part_type,
421 UWORD8 u1_num_bytes_per_pel,
422 FT_COPY_2D *pf_copy_2d)
423 {
424 WORD32 i4_part_wd;
425 WORD32 i4_part_ht;
426 WORD32 i4_part_wd_pu2;
427 WORD32 i4_part_ht_pu2;
428 WORD32 i4_buf_offset;
429 UWORD8 *pu1_pred_src = NULL;
430 UWORD8 *pu1_pred_dst = NULL;
431 WORD8 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
432
433 WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
434
435 if((0 == u1_part_type) ||
436 (pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[MERGE_DERIVED][1]))
437 {
438 ps_cand->pu1_pred_data =
439 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
440 ps_cand->pu2_pred_data =
441 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
442 ps_cand->i4_pred_data_stride = i4_pred_stride;
443
444 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
445 }
446 else if(pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
447 {
448 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
449 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
450
451 i4_buf_offset = 0;
452
453 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
454 i4_buf_offset;
455 pu1_pred_dst =
456 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] + i4_buf_offset;
457
458 pf_copy_2d(
459 pu1_pred_dst,
460 i4_stride,
461 pu1_pred_src,
462 i4_stride,
463 i4_part_wd * u1_num_bytes_per_pel,
464 i4_part_ht);
465
466 ps_cand->pu1_pred_data =
467 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
468 ps_cand->pu2_pred_data =
469 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
470 ps_cand->i4_pred_data_stride = i4_pred_stride;
471
472 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
473 }
474 else if(pau1_final_pred_buf_id[MERGE_DERIVED][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
475 {
476 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
477 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
478
479 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
480 (i4_part_wd < u1_cu_size) * i4_part_wd;
481
482 i4_buf_offset *= u1_num_bytes_per_pel;
483
484 i4_part_wd = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
485 i4_part_ht = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
486
487 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
488 i4_buf_offset;
489 pu1_pred_dst =
490 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] + i4_buf_offset;
491
492 pf_copy_2d(
493 pu1_pred_dst,
494 i4_stride,
495 pu1_pred_src,
496 i4_stride,
497 i4_part_wd * u1_num_bytes_per_pel,
498 i4_part_ht);
499
500 ps_cand->pu1_pred_data =
501 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
502 ps_cand->pu2_pred_data =
503 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
504 ps_cand->i4_pred_data_stride = i4_pred_stride;
505
506 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
507 }
508 else
509 {
510 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
511 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
512
513 i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
514 i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
515
516 switch((PART_TYPE_T)u1_part_type)
517 {
518 case PRT_2NxN:
519 case PRT_Nx2N:
520 case PRT_2NxnU:
521 case PRT_nLx2N:
522 {
523 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
524 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
525
526 ps_cand->pu1_pred_data =
527 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
528 ps_cand->pu2_pred_data =
529 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
530
531 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
532
533 break;
534 }
535 case PRT_nRx2N:
536 case PRT_2NxnD:
537 {
538 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
539 (i4_part_wd < u1_cu_size) * i4_part_wd;
540
541 i4_buf_offset *= u1_num_bytes_per_pel;
542
543 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
544 i4_buf_offset;
545 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
546 i4_buf_offset;
547
548 i4_part_wd = i4_part_wd_pu2;
549 i4_part_ht = i4_part_ht_pu2;
550
551 ps_cand->pu1_pred_data =
552 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
553 ps_cand->pu2_pred_data =
554 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
555
556 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
557
558 break;
559 }
560 default:
561 {
562 DBG_PRINTF("Invalid partition type %d\n", u1_part_type);
563 break;
564 }
565 }
566
567 pf_copy_2d(
568 pu1_pred_dst,
569 i4_stride,
570 pu1_pred_src,
571 i4_stride,
572 i4_part_wd * u1_num_bytes_per_pel,
573 i4_part_ht);
574
575 ps_cand->i4_pred_data_stride = i4_pred_stride;
576 }
577
578 return i1_retval;
579 }
580
ihevce_mixed_mode_cand_type1_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,WORD32 i4_pred_stride,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_type0_cand_is_valid,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)581 static WORD8 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
582 void **ppv_pred_buf_list,
583 cu_inter_cand_t *ps_cand,
584 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
585 UWORD8 *pu1_merge_pred_buf_idx_array,
586 WORD32 i4_pred_stride,
587 UWORD8 u1_me_pred_buf_id,
588 UWORD8 u1_merge_pred_buf_id,
589 UWORD8 u1_type0_cand_is_valid,
590 UWORD8 u1_cu_size,
591 UWORD8 u1_part_type,
592 UWORD8 u1_num_bytes_per_pel,
593 FT_COPY_2D *pf_copy_2d)
594 {
595 WORD32 i4_part_wd;
596 WORD32 i4_part_ht;
597 WORD32 i4_part_wd_pu2;
598 WORD32 i4_part_ht_pu2;
599 UWORD8 *pu1_pred_src = NULL;
600 UWORD8 *pu1_pred_dst = NULL;
601 WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
602
603 WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
604
605 ASSERT(0 != u1_part_type);
606
607 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
608 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
609
610 i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
611 i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
612
613 if(pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
614 {
615 ps_cand->pu1_pred_data =
616 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
617 ps_cand->pu2_pred_data =
618 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
619 ps_cand->i4_pred_data_stride = i4_pred_stride;
620
621 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
622
623 return i1_retval;
624 }
625 else
626 {
627 UWORD8 u1_bitfield = ((u1_merge_pred_buf_id == UCHAR_MAX) << 3) |
628 ((u1_me_pred_buf_id == UCHAR_MAX) << 2) |
629 ((!u1_type0_cand_is_valid) << 1) |
630 (pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] ==
631 pau1_final_pred_buf_id[MERGE_DERIVED][1]);
632
633 WORD32 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
634 (i4_part_wd < u1_cu_size) * i4_part_wd;
635
636 i4_buf_offset *= u1_num_bytes_per_pel;
637
638 switch(u1_bitfield)
639 {
640 case 15:
641 case 14:
642 case 6:
643 {
644 switch((PART_TYPE_T)u1_part_type)
645 {
646 case PRT_2NxN:
647 case PRT_Nx2N:
648 case PRT_2NxnU:
649 case PRT_nLx2N:
650 {
651 pu1_pred_src =
652 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
653 pu1_pred_dst =
654 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
655
656 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
657
658 break;
659 }
660 case PRT_nRx2N:
661 case PRT_2NxnD:
662 {
663 pu1_pred_src =
664 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]] +
665 i4_buf_offset;
666 pu1_pred_dst =
667 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
668 i4_buf_offset;
669
670 i4_part_wd = i4_part_wd_pu2;
671 i4_part_ht = i4_part_ht_pu2;
672
673 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
674
675 break;
676 }
677 default:
678 {
679 DBG_PRINTF("Invalid partition type %d\n", u1_part_type);
680 break;
681 }
682 }
683
684 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
685 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
686 ps_cand->i4_pred_data_stride = i4_pred_stride;
687
688 pf_copy_2d(
689 pu1_pred_dst,
690 i4_stride,
691 pu1_pred_src,
692 i4_stride,
693 i4_part_wd * u1_num_bytes_per_pel,
694 i4_part_ht);
695
696 break;
697 }
698 case 13:
699 case 9:
700 case 5:
701 {
702 UWORD8 i;
703
704 for(i = 0; i < 3; i++)
705 {
706 if((pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
707 (pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
708 {
709 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
710 i4_buf_offset;
711
712 i1_retval = pu1_merge_pred_buf_idx_array[i];
713
714 break;
715 }
716 }
717
718 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
719 i4_buf_offset;
720
721 pf_copy_2d(
722 pu1_pred_dst,
723 i4_stride,
724 pu1_pred_src,
725 i4_stride,
726 i4_part_wd_pu2 * u1_num_bytes_per_pel,
727 i4_part_ht_pu2);
728 /* Copy PU1 */
729 pu1_pred_src =
730 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
731 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
732
733 pf_copy_2d(
734 pu1_pred_dst,
735 i4_stride,
736 pu1_pred_src,
737 i4_stride,
738 i4_part_wd * u1_num_bytes_per_pel,
739 i4_part_ht);
740
741 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
742 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
743 ps_cand->i4_pred_data_stride = i4_pred_stride;
744
745 break;
746 }
747 case 12:
748 case 10:
749 case 8:
750 case 4:
751 case 2:
752 case 0:
753 {
754 pu1_pred_src =
755 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
756 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
757
758 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
759
760 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
761 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
762 ps_cand->i4_pred_data_stride = i4_pred_stride;
763
764 pf_copy_2d(
765 pu1_pred_dst,
766 i4_stride,
767 pu1_pred_src,
768 i4_stride,
769 i4_part_wd * u1_num_bytes_per_pel,
770 i4_part_ht);
771
772 break;
773 }
774 case 11:
775 {
776 pu1_pred_src =
777 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
778 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
779
780 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
781
782 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
783 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
784 ps_cand->i4_pred_data_stride = i4_pred_stride;
785
786 pf_copy_2d(
787 pu1_pred_dst,
788 i4_stride,
789 pu1_pred_src,
790 i4_stride,
791 i4_part_wd * u1_num_bytes_per_pel,
792 i4_part_ht);
793
794 break;
795 }
796 case 7:
797 {
798 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
799 i4_buf_offset;
800 pu1_pred_dst =
801 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
802 i4_buf_offset;
803
804 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
805
806 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
807 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
808 ps_cand->i4_pred_data_stride = i4_pred_stride;
809
810 pf_copy_2d(
811 pu1_pred_dst,
812 i4_stride,
813 pu1_pred_src,
814 i4_stride,
815 i4_part_wd_pu2 * u1_num_bytes_per_pel,
816 i4_part_ht_pu2);
817
818 break;
819 }
820 case 3:
821 case 1:
822 {
823 if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][0]) &&
824 (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][1]))
825 {
826 pu1_pred_src =
827 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
828 pu1_pred_dst =
829 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
830
831 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
832
833 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
834 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
835 ps_cand->i4_pred_data_stride = i4_pred_stride;
836
837 pf_copy_2d(
838 pu1_pred_dst,
839 i4_stride,
840 pu1_pred_src,
841 i4_stride,
842 i4_part_wd * u1_num_bytes_per_pel,
843 i4_part_ht);
844 }
845 else
846 {
847 UWORD8 i;
848
849 for(i = 0; i < 3; i++)
850 {
851 if((pu1_merge_pred_buf_idx_array[i] !=
852 pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
853 (pu1_merge_pred_buf_idx_array[i] !=
854 pau1_final_pred_buf_id[MERGE_DERIVED][0]))
855 {
856 pu1_pred_dst =
857 (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
858 i4_buf_offset;
859
860 i1_retval = pu1_merge_pred_buf_idx_array[i];
861
862 break;
863 }
864 }
865
866 pu1_pred_src =
867 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
868 i4_buf_offset;
869
870 pf_copy_2d(
871 pu1_pred_dst,
872 i4_stride,
873 pu1_pred_src,
874 i4_stride,
875 i4_part_wd_pu2 * u1_num_bytes_per_pel,
876 i4_part_ht_pu2);
877
878 /* Copy PU1 */
879 pu1_pred_src =
880 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
881 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
882
883 pf_copy_2d(
884 pu1_pred_dst,
885 i4_stride,
886 pu1_pred_src,
887 i4_stride,
888 i4_part_wd * u1_num_bytes_per_pel,
889 i4_part_ht);
890
891 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
892 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
893 ps_cand->i4_pred_data_stride = i4_pred_stride;
894
895 break;
896 }
897 }
898 }
899 }
900
901 return i1_retval;
902 }
903
ihevce_mixed_mode_cand_type0_pred_buffer_preparation(void ** ppv_pred_buf_list,cu_inter_cand_t * ps_cand,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 u1_me_pred_buf_id,UWORD8 u1_merge_pred_buf_id,UWORD8 u1_mixed_tyep1_pred_buf_id,WORD32 i4_pred_stride,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_num_bytes_per_pel,FT_COPY_2D * pf_copy_2d)904 static WORD8 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
905 void **ppv_pred_buf_list,
906 cu_inter_cand_t *ps_cand,
907 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
908 UWORD8 *pu1_merge_pred_buf_idx_array,
909 UWORD8 u1_me_pred_buf_id,
910 UWORD8 u1_merge_pred_buf_id,
911 UWORD8 u1_mixed_tyep1_pred_buf_id,
912 WORD32 i4_pred_stride,
913 UWORD8 u1_cu_size,
914 UWORD8 u1_part_type,
915 UWORD8 u1_num_bytes_per_pel,
916 FT_COPY_2D *pf_copy_2d)
917 {
918 WORD32 i4_part_wd;
919 WORD32 i4_part_ht;
920 WORD32 i4_part_wd_pu2;
921 WORD32 i4_part_ht_pu2;
922 WORD32 i4_buf_offset;
923 UWORD8 *pu1_pred_src = NULL;
924 UWORD8 *pu1_pred_dst = NULL;
925 WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
926
927 WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
928
929 ASSERT(0 != u1_part_type);
930
931 i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
932 i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
933 i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
934 i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
935
936 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
937 (i4_part_wd < u1_cu_size) * i4_part_wd;
938
939 i4_buf_offset *= u1_num_bytes_per_pel;
940
941 if(pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
942 {
943 ps_cand->pu1_pred_data =
944 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
945 ps_cand->pu2_pred_data =
946 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
947 ps_cand->i4_pred_data_stride = i4_pred_stride;
948
949 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
950 }
951 else
952 {
953 UWORD8 u1_bitfield =
954 ((u1_merge_pred_buf_id == UCHAR_MAX) << 2) | ((u1_me_pred_buf_id == UCHAR_MAX) << 1) |
955 (u1_mixed_tyep1_pred_buf_id != pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]);
956
957 switch(u1_bitfield)
958 {
959 case 7:
960 {
961 switch((PART_TYPE_T)u1_part_type)
962 {
963 case PRT_2NxN:
964 case PRT_Nx2N:
965 case PRT_2NxnU:
966 case PRT_nLx2N:
967 {
968 pu1_pred_src =
969 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
970 pu1_pred_dst =
971 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
972
973 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][1];
974
975 break;
976 }
977 case PRT_nRx2N:
978 case PRT_2NxnD:
979 {
980 pu1_pred_src =
981 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
982 i4_buf_offset;
983 pu1_pred_dst =
984 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
985 i4_buf_offset;
986
987 i4_part_wd = i4_part_wd_pu2;
988 i4_part_ht = i4_part_ht_pu2;
989
990 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
991
992 break;
993 }
994 default:
995 {
996 DBG_PRINTF("Invalid partition type %d\n", u1_part_type);
997 break;
998 }
999 }
1000
1001 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1002 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1003 ps_cand->i4_pred_data_stride = i4_pred_stride;
1004
1005 pf_copy_2d(
1006 pu1_pred_dst,
1007 i4_stride,
1008 pu1_pred_src,
1009 i4_stride,
1010 i4_part_wd * u1_num_bytes_per_pel,
1011 i4_part_ht);
1012
1013 break;
1014 }
1015 case 6:
1016 case 5:
1017 case 4:
1018 {
1019 pu1_pred_src =
1020 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1021 i4_buf_offset;
1022 pu1_pred_dst =
1023 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
1024 i4_buf_offset;
1025
1026 i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
1027
1028 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1029 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1030 ps_cand->i4_pred_data_stride = i4_pred_stride;
1031
1032 pf_copy_2d(
1033 pu1_pred_dst,
1034 i4_stride,
1035 pu1_pred_src,
1036 i4_stride,
1037 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1038 i4_part_ht_pu2);
1039 break;
1040 }
1041 case 3:
1042 {
1043 pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
1044 pu1_pred_dst =
1045 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
1046
1047 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
1048
1049 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1050 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1051 ps_cand->i4_pred_data_stride = i4_pred_stride;
1052
1053 pf_copy_2d(
1054 pu1_pred_dst,
1055 i4_stride,
1056 pu1_pred_src,
1057 i4_stride,
1058 i4_part_wd * u1_num_bytes_per_pel,
1059 i4_part_ht);
1060
1061 break;
1062 }
1063 case 2:
1064 case 1:
1065 case 0:
1066 {
1067 if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
1068 (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
1069 {
1070 pu1_pred_src =
1071 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1072 i4_buf_offset;
1073 pu1_pred_dst =
1074 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
1075 i4_buf_offset;
1076
1077 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
1078
1079 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1080 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1081 ps_cand->i4_pred_data_stride = i4_pred_stride;
1082
1083 pf_copy_2d(
1084 pu1_pred_dst,
1085 i4_stride,
1086 pu1_pred_src,
1087 i4_stride,
1088 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1089 i4_part_ht_pu2);
1090 }
1091 else
1092 {
1093 UWORD8 i;
1094
1095 for(i = 0; i < 3; i++)
1096 {
1097 if((pu1_merge_pred_buf_idx_array[i] != u1_merge_pred_buf_id) &&
1098 (pu1_merge_pred_buf_idx_array[i] != u1_mixed_tyep1_pred_buf_id))
1099 {
1100 pu1_pred_dst =
1101 (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
1102 i4_buf_offset;
1103
1104 i1_retval = pu1_merge_pred_buf_idx_array[i];
1105
1106 break;
1107 }
1108 }
1109
1110 pu1_pred_src =
1111 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
1112 i4_buf_offset;
1113
1114 pf_copy_2d(
1115 pu1_pred_dst,
1116 i4_stride,
1117 pu1_pred_src,
1118 i4_stride,
1119 i4_part_wd_pu2 * u1_num_bytes_per_pel,
1120 i4_part_ht_pu2);
1121
1122 /* Copy PU1 */
1123 pu1_pred_src =
1124 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
1125 pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1126
1127 pf_copy_2d(
1128 pu1_pred_dst,
1129 i4_stride,
1130 pu1_pred_src,
1131 i4_stride,
1132 i4_part_wd * u1_num_bytes_per_pel,
1133 i4_part_ht);
1134
1135 ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
1136 ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
1137 ps_cand->i4_pred_data_stride = i4_pred_stride;
1138
1139 break;
1140 }
1141 }
1142 }
1143 }
1144
1145 return i1_retval;
1146 }
1147
ihevce_find_idx_of_worst_cost(UWORD32 * pu4_cost_array,UWORD8 u1_array_size)1148 static UWORD8 ihevce_find_idx_of_worst_cost(UWORD32 *pu4_cost_array, UWORD8 u1_array_size)
1149 {
1150 WORD32 i;
1151
1152 UWORD8 u1_worst_cost_idx = 0;
1153
1154 for(i = 1; i < u1_array_size; i++)
1155 {
1156 if(pu4_cost_array[i] > pu4_cost_array[u1_worst_cost_idx])
1157 {
1158 u1_worst_cost_idx = i;
1159 }
1160 }
1161
1162 return u1_worst_cost_idx;
1163 }
1164
ihevce_free_unused_buf_indices(UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_buf_id_in_use,UWORD8 * pu1_buf_id_to_free,UWORD8 u1_me_buf_id,UWORD8 u1_num_available_cands,UWORD8 u1_num_bufs_to_free,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_part_type)1165 static void ihevce_free_unused_buf_indices(
1166 UWORD32 *pu4_pred_buf_usage_indicator,
1167 UWORD8 *pu1_merge_pred_buf_idx_array,
1168 UWORD8 *pu1_buf_id_in_use,
1169 UWORD8 *pu1_buf_id_to_free,
1170 UWORD8 u1_me_buf_id,
1171 UWORD8 u1_num_available_cands,
1172 UWORD8 u1_num_bufs_to_free,
1173 UWORD8 u1_eval_merge,
1174 UWORD8 u1_eval_skip,
1175 UWORD8 u1_part_type)
1176 {
1177 UWORD8 i;
1178
1179 if(u1_eval_skip)
1180 {
1181 if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[0])
1182 {
1183 ihevce_set_pred_buf_as_free(
1184 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1185 }
1186 else if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[1])
1187 {
1188 ihevce_set_pred_buf_as_free(
1189 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1190 }
1191 else
1192 {
1193 ihevce_set_pred_buf_as_free(
1194 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
1195
1196 ihevce_set_pred_buf_as_free(
1197 pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
1198 }
1199
1200 for(i = 0; i < u1_num_bufs_to_free; i++)
1201 {
1202 if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1203 {
1204 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1205 }
1206 }
1207 }
1208 else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == UCHAR_MAX))
1209 {
1210 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, u1_me_buf_id);
1211
1212 for(i = 0; i < u1_num_bufs_to_free; i++)
1213 {
1214 if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1215 {
1216 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1217 }
1218 }
1219 }
1220 else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] != UCHAR_MAX))
1221 {
1222 for(i = 0; i < u1_num_bufs_to_free; i++)
1223 {
1224 if(pu1_buf_id_to_free[i] != u1_me_buf_id)
1225 {
1226 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1227 }
1228 }
1229 }
1230 else if((u1_eval_merge) && (0 == u1_part_type))
1231 {
1232 /* ME pred buf */
1233 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1234 u1_me_buf_id,
1235 pu1_buf_id_in_use,
1236 pu1_buf_id_to_free,
1237 4,
1238 u1_num_bufs_to_free,
1239 pu4_pred_buf_usage_indicator);
1240
1241 /* Merge pred buf 0 */
1242 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1243 pu1_merge_pred_buf_idx_array[0],
1244 pu1_buf_id_in_use,
1245 pu1_buf_id_to_free,
1246 4,
1247 u1_num_bufs_to_free,
1248 pu4_pred_buf_usage_indicator);
1249
1250 /* Merge pred buf 1 */
1251 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1252 pu1_merge_pred_buf_idx_array[1],
1253 pu1_buf_id_in_use,
1254 pu1_buf_id_to_free,
1255 4,
1256 u1_num_bufs_to_free,
1257 pu4_pred_buf_usage_indicator);
1258
1259 for(i = 0; i < u1_num_bufs_to_free; i++)
1260 {
1261 if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1262 (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1263 (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1264 {
1265 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1266 }
1267 }
1268 }
1269 else if((u1_eval_merge) || (u1_eval_skip))
1270 {
1271 /* ME pred buf */
1272 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1273 u1_me_buf_id,
1274 pu1_buf_id_in_use,
1275 pu1_buf_id_to_free,
1276 4,
1277 u1_num_bufs_to_free,
1278 pu4_pred_buf_usage_indicator);
1279
1280 /* Merge pred buf 0 */
1281 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1282 pu1_merge_pred_buf_idx_array[0],
1283 pu1_buf_id_in_use,
1284 pu1_buf_id_to_free,
1285 4,
1286 u1_num_bufs_to_free,
1287 pu4_pred_buf_usage_indicator);
1288
1289 /* Merge pred buf 1 */
1290 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1291 pu1_merge_pred_buf_idx_array[1],
1292 pu1_buf_id_in_use,
1293 pu1_buf_id_to_free,
1294 4,
1295 u1_num_bufs_to_free,
1296 pu4_pred_buf_usage_indicator);
1297
1298 /* Merge pred buf 2 */
1299 COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
1300 pu1_merge_pred_buf_idx_array[2],
1301 pu1_buf_id_in_use,
1302 pu1_buf_id_to_free,
1303 4,
1304 u1_num_bufs_to_free,
1305 pu4_pred_buf_usage_indicator);
1306
1307 for(i = 0; i < u1_num_bufs_to_free; i++)
1308 {
1309 if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
1310 (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
1311 (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
1312 {
1313 ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
1314 }
1315 }
1316 }
1317 }
1318
ihevce_check_if_buf_can_be_freed(UWORD8 * pu1_pred_id_of_winners,UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,UWORD8 u1_num_cands_previously_added)1319 static UWORD8 ihevce_check_if_buf_can_be_freed(
1320 UWORD8 *pu1_pred_id_of_winners,
1321 UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,
1322 UWORD8 u1_num_cands_previously_added)
1323 {
1324 UWORD8 i;
1325
1326 UWORD8 u1_num_trysts = 0;
1327
1328 for(i = 0; i < u1_num_cands_previously_added; i++)
1329 {
1330 if(u1_idx_of_worst_cost_in_pred_buf_array == pu1_pred_id_of_winners[i])
1331 {
1332 u1_num_trysts++;
1333
1334 if(u1_num_trysts > 1)
1335 {
1336 return 0;
1337 }
1338 }
1339 }
1340
1341 ASSERT(u1_num_trysts > 0);
1342
1343 return 1;
1344 }
1345
ihevce_get_worst_costs_and_indices(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,UWORD8 * pu1_worst_dst_cand_idx,UWORD8 u1_src_array_length,UWORD8 u1_num_cands_to_pick,UWORD8 u1_worst_cost_idx_in_dst_array)1346 static void ihevce_get_worst_costs_and_indices(
1347 UWORD32 *pu4_cost_src,
1348 UWORD32 *pu4_cost_dst,
1349 UWORD8 *pu1_worst_dst_cand_idx,
1350 UWORD8 u1_src_array_length,
1351 UWORD8 u1_num_cands_to_pick,
1352 UWORD8 u1_worst_cost_idx_in_dst_array)
1353 {
1354 WORD32 i;
1355
1356 pu4_cost_dst[0] = pu4_cost_src[u1_worst_cost_idx_in_dst_array];
1357 pu4_cost_src[u1_worst_cost_idx_in_dst_array] = 0;
1358 pu1_worst_dst_cand_idx[0] = u1_worst_cost_idx_in_dst_array;
1359
1360 for(i = 1; i < u1_num_cands_to_pick; i++)
1361 {
1362 pu1_worst_dst_cand_idx[i] =
1363 ihevce_find_idx_of_worst_cost(pu4_cost_src, u1_src_array_length);
1364
1365 pu4_cost_dst[i] = pu4_cost_src[pu1_worst_dst_cand_idx[i]];
1366 pu4_cost_src[pu1_worst_dst_cand_idx[i]] = 0;
1367 }
1368
1369 for(i = 0; i < u1_num_cands_to_pick; i++)
1370 {
1371 pu4_cost_src[pu1_worst_dst_cand_idx[i]] = pu4_cost_dst[i];
1372 }
1373 }
1374
ihevce_select_cands_to_replace_previous_worst(UWORD32 * pu4_cost_src,UWORD32 * pu4_cost_dst,INTER_CANDIDATE_ID_T * pe_cand_id,UWORD8 * pu1_cand_idx_in_dst_array,UWORD8 * pu1_buf_id_to_free,UWORD8 * pu1_pred_id_of_winners,UWORD8 * pu1_num_bufs_to_free,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_num_cands_previously_added,UWORD8 u1_num_available_cands,UWORD8 u1_worst_cost_idx_in_dst_array)1375 static UWORD8 ihevce_select_cands_to_replace_previous_worst(
1376 UWORD32 *pu4_cost_src,
1377 UWORD32 *pu4_cost_dst,
1378 INTER_CANDIDATE_ID_T *pe_cand_id,
1379 UWORD8 *pu1_cand_idx_in_dst_array,
1380 UWORD8 *pu1_buf_id_to_free,
1381 UWORD8 *pu1_pred_id_of_winners,
1382 UWORD8 *pu1_num_bufs_to_free,
1383 WORD32 i4_max_num_inter_rdopt_cands,
1384 UWORD8 u1_num_cands_previously_added,
1385 UWORD8 u1_num_available_cands,
1386 UWORD8 u1_worst_cost_idx_in_dst_array)
1387 {
1388 WORD32 i, j, k;
1389 UWORD32 au4_worst_dst_costs[4];
1390 UWORD8 au1_worst_dst_cand_idx[4];
1391
1392 INTER_CANDIDATE_ID_T ae_default_cand_id[4] = {
1393 ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1394 };
1395
1396 UWORD8 u1_num_cands_to_add_wo_comparisons =
1397 i4_max_num_inter_rdopt_cands - u1_num_cands_previously_added;
1398 UWORD8 u1_num_cands_to_add_after_comparisons =
1399 u1_num_available_cands - u1_num_cands_to_add_wo_comparisons;
1400 UWORD8 u1_num_cands_to_add = 0;
1401 UWORD8 au1_valid_src_cands[4] = { 0, 0, 0, 0 };
1402
1403 ASSERT(u1_num_cands_to_add_after_comparisons >= 0);
1404
1405 /* Sorting src costs */
1406 SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
1407 pu4_cost_src, pe_cand_id, u1_num_available_cands, INTER_CANDIDATE_ID_T);
1408
1409 for(i = 0; i < u1_num_cands_to_add_wo_comparisons; i++)
1410 {
1411 pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] = u1_num_cands_previously_added + i;
1412 au1_valid_src_cands[pe_cand_id[i]] = 1;
1413 }
1414
1415 if(u1_num_cands_previously_added)
1416 {
1417 WORD8 i1_last_index = 0;
1418
1419 ihevce_get_worst_costs_and_indices(
1420 pu4_cost_dst,
1421 au4_worst_dst_costs,
1422 au1_worst_dst_cand_idx,
1423 u1_num_cands_previously_added,
1424 u1_num_cands_to_add_after_comparisons,
1425 u1_worst_cost_idx_in_dst_array);
1426
1427 for(i = u1_num_available_cands - 1; i >= u1_num_cands_to_add_wo_comparisons; i--)
1428 {
1429 for(j = u1_num_cands_to_add_after_comparisons - 1; j >= i1_last_index; j--)
1430 {
1431 if((pu4_cost_src[i] < au4_worst_dst_costs[j]))
1432 {
1433 if((i - u1_num_cands_to_add_wo_comparisons) <= j)
1434 {
1435 for(k = 0; k <= (i - u1_num_cands_to_add_wo_comparisons); k++)
1436 {
1437 pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] =
1438 au1_worst_dst_cand_idx[k];
1439 au1_valid_src_cands[pe_cand_id[u1_num_cands_to_add_wo_comparisons + k]] =
1440 1;
1441
1442 if(1 == ihevce_check_if_buf_can_be_freed(
1443 pu1_pred_id_of_winners,
1444 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]],
1445 u1_num_cands_previously_added))
1446 {
1447 pu1_buf_id_to_free[(*pu1_num_bufs_to_free)++] =
1448 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]];
1449 }
1450 else
1451 {
1452 pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]] = UCHAR_MAX;
1453 }
1454 }
1455
1456 i1_last_index = -1;
1457 }
1458 else
1459 {
1460 i1_last_index = j;
1461 }
1462
1463 break;
1464 }
1465 }
1466
1467 if(-1 == i1_last_index)
1468 {
1469 break;
1470 }
1471 }
1472 }
1473
1474 for(i = 0, j = 0; i < u1_num_available_cands; i++)
1475 {
1476 if(au1_valid_src_cands[ae_default_cand_id[i]])
1477 {
1478 pe_cand_id[j++] = ae_default_cand_id[i];
1479 }
1480 }
1481
1482 return u1_num_cands_to_add;
1483 }
1484
ihevce_merge_cands_with_existing_best(inter_cu_mode_info_t * ps_mode_info,cu_inter_cand_t ** pps_cand_src,pu_mv_t (* pas_mvp_winner)[NUM_INTER_PU_PARTS],UWORD32 (* pau4_cost)[MAX_NUM_INTER_PARTS],void ** ppv_pred_buf_list,UWORD8 (* pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_pred_buf_usage_indicator,UWORD8 * pu1_num_merge_cands,UWORD8 * pu1_num_skip_cands,UWORD8 * pu1_num_mixed_mode_type0_cands,UWORD8 * pu1_num_mixed_mode_type1_cands,UWORD8 * pu1_merge_pred_buf_idx_array,FT_COPY_2D * pf_copy_2d,WORD32 i4_pred_stride,WORD32 i4_max_num_inter_rdopt_cands,UWORD8 u1_cu_size,UWORD8 u1_part_type,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip,UWORD8 u1_num_bytes_per_pel)1485 static UWORD8 ihevce_merge_cands_with_existing_best(
1486 inter_cu_mode_info_t *ps_mode_info,
1487 cu_inter_cand_t **pps_cand_src,
1488 pu_mv_t (*pas_mvp_winner)[NUM_INTER_PU_PARTS],
1489 UWORD32 (*pau4_cost)[MAX_NUM_INTER_PARTS],
1490 void **ppv_pred_buf_list,
1491 UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
1492 UWORD32 *pu4_pred_buf_usage_indicator,
1493 UWORD8 *pu1_num_merge_cands,
1494 UWORD8 *pu1_num_skip_cands,
1495 UWORD8 *pu1_num_mixed_mode_type0_cands,
1496 UWORD8 *pu1_num_mixed_mode_type1_cands,
1497 UWORD8 *pu1_merge_pred_buf_idx_array,
1498
1499 FT_COPY_2D *pf_copy_2d,
1500
1501 WORD32 i4_pred_stride,
1502 WORD32 i4_max_num_inter_rdopt_cands,
1503 UWORD8 u1_cu_size,
1504 UWORD8 u1_part_type,
1505 UWORD8 u1_eval_merge,
1506 UWORD8 u1_eval_skip,
1507 UWORD8 u1_num_bytes_per_pel)
1508 {
1509 UWORD32 au4_cost_src[4];
1510 WORD32 i;
1511 WORD32 u1_num_available_cands;
1512 UWORD8 au1_buf_id_in_use[4];
1513 UWORD8 au1_buf_id_to_free[4];
1514 UWORD8 au1_cand_idx_in_dst_array[4];
1515
1516 INTER_CANDIDATE_ID_T ae_cand_id[4] = {
1517 ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
1518 };
1519
1520 cu_inter_cand_t **pps_cand_dst = ps_mode_info->aps_cu_data;
1521
1522 UWORD8 u1_num_cands_previously_added = ps_mode_info->u1_num_inter_cands;
1523 UWORD8 u1_worst_cost_idx = ps_mode_info->u1_idx_of_worst_cost_in_cost_array;
1524 UWORD8 u1_idx_of_worst_cost_in_pred_buf_array =
1525 ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array;
1526 UWORD32 *pu4_cost_dst = ps_mode_info->au4_cost;
1527 UWORD8 *pu1_pred_id_of_winners = ps_mode_info->au1_pred_buf_idx;
1528 UWORD8 u1_num_bufs_to_free = 0;
1529 UWORD8 u1_skip_or_merge_cand_is_valid = 0;
1530 UWORD8 u1_num_invalid_cands = 0;
1531
1532 memset(au1_buf_id_in_use, UCHAR_MAX, sizeof(au1_buf_id_in_use));
1533
1534 u1_num_available_cands = (u1_eval_merge) ? 2 + ((u1_part_type != 0) + 1) : 1;
1535
1536 for(i = 0; i < u1_num_available_cands; i++)
1537 {
1538 WORD32 i4_idx = i - u1_num_invalid_cands;
1539
1540 if(u1_part_type == 0)
1541 {
1542 au4_cost_src[i4_idx] = pau4_cost[ae_cand_id[i4_idx]][0];
1543 }
1544 else
1545 {
1546 au4_cost_src[i4_idx] =
1547 pau4_cost[ae_cand_id[i4_idx]][0] + pau4_cost[ae_cand_id[i4_idx]][1];
1548 }
1549
1550 if(au4_cost_src[i4_idx] >= INT_MAX)
1551 {
1552 memmove(
1553 &ae_cand_id[i4_idx],
1554 &ae_cand_id[i4_idx + 1],
1555 sizeof(INTER_CANDIDATE_ID_T) * (u1_num_available_cands - i - 1));
1556
1557 u1_num_invalid_cands++;
1558 }
1559 }
1560
1561 u1_num_available_cands -= u1_num_invalid_cands;
1562
1563 if((u1_num_cands_previously_added + u1_num_available_cands) > i4_max_num_inter_rdopt_cands)
1564 {
1565 u1_num_available_cands = ihevce_select_cands_to_replace_previous_worst(
1566 au4_cost_src,
1567 pu4_cost_dst,
1568 ae_cand_id,
1569 au1_cand_idx_in_dst_array,
1570 au1_buf_id_to_free,
1571 pu1_pred_id_of_winners,
1572 &u1_num_bufs_to_free,
1573 i4_max_num_inter_rdopt_cands,
1574 u1_num_cands_previously_added,
1575 u1_num_available_cands,
1576 u1_worst_cost_idx);
1577 }
1578 else
1579 {
1580 for(i = 0; i < u1_num_available_cands; i++)
1581 {
1582 au1_cand_idx_in_dst_array[i] = u1_num_cands_previously_added + i;
1583 }
1584 }
1585
1586 for(i = 0; i < u1_num_available_cands; i++)
1587 {
1588 UWORD8 u1_dst_array_idx = au1_cand_idx_in_dst_array[i];
1589
1590 if(u1_part_type == 0)
1591 {
1592 au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0];
1593 }
1594 else
1595 {
1596 au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0] + pau4_cost[ae_cand_id[i]][1];
1597 }
1598
1599 pps_cand_dst[u1_dst_array_idx] = pps_cand_src[ae_cand_id[i]];
1600
1601 /* Adding a skip candidate identical to the merge winner */
1602 if((u1_eval_merge) && (0 == u1_part_type) && (MIXED_MODE_TYPE1 == ae_cand_id[i]))
1603 {
1604 (*pu1_num_skip_cands)++;
1605
1606 pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1607
1608 if(u1_num_cands_previously_added >= i4_max_num_inter_rdopt_cands)
1609 {
1610 u1_worst_cost_idx =
1611 ihevce_find_idx_of_worst_cost(pu4_cost_dst, u1_num_cands_previously_added);
1612
1613 u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1614 }
1615 else
1616 {
1617 u1_num_cands_previously_added++;
1618 }
1619
1620 if(u1_skip_or_merge_cand_is_valid)
1621 {
1622 pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1623 (UWORD8 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1624 pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1625 (UWORD16 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
1626 pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1627
1628 au1_buf_id_in_use[MIXED_MODE_TYPE1] = au1_buf_id_in_use[MERGE_DERIVED];
1629 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1630 }
1631 else
1632 {
1633 u1_skip_or_merge_cand_is_valid = 1;
1634
1635 au1_buf_id_in_use[MIXED_MODE_TYPE1] = ihevce_merge_cand_pred_buffer_preparation(
1636 ppv_pred_buf_list,
1637 pps_cand_dst[u1_dst_array_idx],
1638 pau1_final_pred_buf_id,
1639 i4_pred_stride,
1640 u1_cu_size,
1641 u1_part_type,
1642 u1_num_bytes_per_pel,
1643 pf_copy_2d);
1644
1645 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1646 }
1647
1648 continue;
1649 }
1650
1651 if(u1_num_cands_previously_added < i4_max_num_inter_rdopt_cands)
1652 {
1653 if(u1_num_cands_previously_added)
1654 {
1655 if(au4_cost_src[i] > pu4_cost_dst[u1_worst_cost_idx])
1656 {
1657 u1_worst_cost_idx = u1_num_cands_previously_added;
1658 }
1659 }
1660
1661 pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1662
1663 u1_num_cands_previously_added++;
1664 }
1665 else
1666 {
1667 pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
1668
1669 u1_worst_cost_idx = ihevce_find_idx_of_worst_cost(
1670 ps_mode_info->au4_cost, u1_num_cands_previously_added);
1671
1672 u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
1673 }
1674
1675 switch(ae_cand_id[i])
1676 {
1677 case ME_OR_SKIP_DERIVED:
1678 {
1679 (*pu1_num_skip_cands) += u1_eval_skip;
1680
1681 pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
1682 (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1683 pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
1684 (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
1685 pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
1686
1687 if(u1_worst_cost_idx == u1_dst_array_idx)
1688 {
1689 u1_idx_of_worst_cost_in_pred_buf_array =
1690 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1691 }
1692
1693 u1_skip_or_merge_cand_is_valid = u1_eval_skip;
1694
1695 au1_buf_id_in_use[ME_OR_SKIP_DERIVED] = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1696 pu1_pred_id_of_winners[u1_dst_array_idx] =
1697 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
1698
1699 break;
1700 }
1701 case MERGE_DERIVED:
1702 {
1703 (*pu1_num_merge_cands)++;
1704
1705 au1_buf_id_in_use[MERGE_DERIVED] = ihevce_merge_cand_pred_buffer_preparation(
1706 ppv_pred_buf_list,
1707 pps_cand_dst[u1_dst_array_idx],
1708 pau1_final_pred_buf_id,
1709 i4_pred_stride,
1710 u1_cu_size,
1711 u1_part_type,
1712 u1_num_bytes_per_pel,
1713 pf_copy_2d
1714
1715 );
1716
1717 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
1718
1719 if(u1_worst_cost_idx == u1_dst_array_idx)
1720 {
1721 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MERGE_DERIVED];
1722 }
1723
1724 u1_skip_or_merge_cand_is_valid = 1;
1725
1726 break;
1727 }
1728 case MIXED_MODE_TYPE1:
1729 {
1730 (*pu1_num_mixed_mode_type1_cands)++;
1731
1732 au1_buf_id_in_use[MIXED_MODE_TYPE1] =
1733 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
1734 ppv_pred_buf_list,
1735 pps_cand_dst[u1_dst_array_idx],
1736 pau1_final_pred_buf_id,
1737 pu1_merge_pred_buf_idx_array,
1738 i4_pred_stride,
1739 au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1740 au1_buf_id_in_use[MERGE_DERIVED],
1741 (u1_num_available_cands - i) > 1,
1742 u1_cu_size,
1743 u1_part_type,
1744 u1_num_bytes_per_pel,
1745 pf_copy_2d
1746
1747 );
1748
1749 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1750
1751 if(u1_worst_cost_idx == u1_dst_array_idx)
1752 {
1753 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE1];
1754 }
1755
1756 break;
1757 }
1758 case MIXED_MODE_TYPE0:
1759 {
1760 (*pu1_num_mixed_mode_type0_cands)++;
1761
1762 au1_buf_id_in_use[MIXED_MODE_TYPE0] =
1763 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
1764 ppv_pred_buf_list,
1765 pps_cand_dst[u1_dst_array_idx],
1766 pau1_final_pred_buf_id,
1767 pu1_merge_pred_buf_idx_array,
1768 au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
1769 au1_buf_id_in_use[MERGE_DERIVED],
1770 au1_buf_id_in_use[MIXED_MODE_TYPE1],
1771 i4_pred_stride,
1772 u1_cu_size,
1773 u1_part_type,
1774 u1_num_bytes_per_pel,
1775 pf_copy_2d);
1776
1777 pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1778
1779 if(u1_worst_cost_idx == u1_dst_array_idx)
1780 {
1781 u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE0];
1782 }
1783
1784 break;
1785 }
1786 }
1787 }
1788
1789 ihevce_free_unused_buf_indices(
1790 pu4_pred_buf_usage_indicator,
1791 pu1_merge_pred_buf_idx_array,
1792 au1_buf_id_in_use,
1793 au1_buf_id_to_free,
1794 pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0],
1795 u1_num_available_cands,
1796 u1_num_bufs_to_free,
1797 u1_eval_merge,
1798 u1_eval_skip,
1799 u1_part_type);
1800
1801 ps_mode_info->u1_idx_of_worst_cost_in_cost_array = u1_worst_cost_idx;
1802 ps_mode_info->u1_num_inter_cands = u1_num_cands_previously_added;
1803 ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array = u1_idx_of_worst_cost_in_pred_buf_array;
1804
1805 return u1_skip_or_merge_cand_is_valid;
1806 }
1807
ihevce_prepare_cand_containers(ihevce_inter_cand_sifter_prms_t * ps_ctxt,cu_inter_cand_t ** pps_cands,UWORD8 * pu1_merge_pred_buf_idx_array,UWORD8 * pu1_me_pred_buf_idx,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)1808 static UWORD8 ihevce_prepare_cand_containers(
1809 ihevce_inter_cand_sifter_prms_t *ps_ctxt,
1810 cu_inter_cand_t **pps_cands,
1811 UWORD8 *pu1_merge_pred_buf_idx_array,
1812 UWORD8 *pu1_me_pred_buf_idx,
1813 UWORD8 u1_part_type,
1814 UWORD8 u1_me_cand_list_idx,
1815 UWORD8 u1_eval_merge,
1816 UWORD8 u1_eval_skip)
1817 {
1818 UWORD8 u1_num_bufs_currently_allocated;
1819
1820 WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
1821 UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
1822 UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
1823 UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
1824 void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
1825
1826 if(!u1_eval_merge)
1827 {
1828 if(u1_eval_skip)
1829 {
1830 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1831 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1832
1833 if(u1_num_bufs_currently_allocated < 2)
1834 {
1835 return 0;
1836 }
1837
1838 pps_cands[ME_OR_SKIP_DERIVED] =
1839 &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1840 [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
1841 ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
1842
1843 pps_cands[ME_OR_SKIP_DERIVED]->b1_skip_flag = 1;
1844 pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_mark = 1;
1845 pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize = 1;
1846 pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize_by2 = 1;
1847 pps_cands[ME_OR_SKIP_DERIVED]->b1_intra_has_won = 0;
1848 pps_cands[ME_OR_SKIP_DERIVED]->b3_part_size = 0;
1849 pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1850 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_intra_flag = 0;
1851 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_merge_flag = 1;
1852 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_x = u1_cu_pos_x >> 2;
1853 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_y = u1_cu_pos_y >> 2;
1854 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_wd = (u1_cu_size >> 2) - 1;
1855 pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_ht = (u1_cu_size >> 2) - 1;
1856
1857 pps_cands[MERGE_DERIVED] = pps_cands[ME_OR_SKIP_DERIVED];
1858 }
1859 else
1860 {
1861 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1862 pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1863
1864 if(u1_num_bufs_currently_allocated < 1)
1865 {
1866 return 0;
1867 }
1868
1869 pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1870 pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
1871 pps_cands[ME_OR_SKIP_DERIVED]->pu1_pred_data =
1872 (UWORD8 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1873 pps_cands[ME_OR_SKIP_DERIVED]->pu2_pred_data =
1874 (UWORD16 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
1875 }
1876 }
1877 else
1878 {
1879 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1880 pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
1881
1882 if(u1_num_bufs_currently_allocated < 1)
1883 {
1884 return 0;
1885 }
1886
1887 pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
1888
1889 if(u1_part_type > 0)
1890 {
1891 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1892 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 3);
1893
1894 if(u1_num_bufs_currently_allocated < 3)
1895 {
1896 return 0;
1897 }
1898
1899 pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1900 [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1901
1902 pps_cands[MIXED_MODE_TYPE0] =
1903 &ps_ctxt->ps_mixed_modes_datastore
1904 ->as_cu_data[ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands];
1905
1906 pps_cands[MIXED_MODE_TYPE1] =
1907 &ps_ctxt->ps_mixed_modes_datastore->as_cu_data
1908 [MAX_NUM_MIXED_MODE_INTER_RDO_CANDS - 1 -
1909 ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands];
1910
1911 *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1912 *pps_cands[MIXED_MODE_TYPE0] = *pps_cands[ME_OR_SKIP_DERIVED];
1913 *pps_cands[MIXED_MODE_TYPE1] = *pps_cands[ME_OR_SKIP_DERIVED];
1914 }
1915 else
1916 {
1917 u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
1918 pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
1919
1920 if(u1_num_bufs_currently_allocated < 2)
1921 {
1922 return 0;
1923 }
1924
1925 pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
1926 [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
1927
1928 *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
1929 }
1930
1931 pps_cands[MERGE_DERIVED]->as_inter_pu[0].b1_merge_flag = 1;
1932 pps_cands[MERGE_DERIVED]->as_inter_pu[1].b1_merge_flag = 1;
1933 }
1934
1935 return u1_num_bufs_currently_allocated;
1936 }
1937
ihevce_merge_prms_init(merge_prms_t * ps_prms,merge_cand_list_t * ps_list,inter_pred_ctxt_t * ps_mc_ctxt,mv_pred_ctxt_t * ps_mv_pred_ctxt,PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,PF_SAD_FXN_T pf_sad_fxn,void ** ppv_pred_buf_list,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,UWORD8 * pu1_merge_pred_buf_array,UWORD8 (* pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],UWORD8 * pu1_is_top_used,WORD32 (* pai4_noise_term)[MAX_NUM_INTER_PARTS],UWORD32 (* pau4_pred_variance)[MAX_NUM_INTER_PARTS],UWORD32 * pu4_src_variance,WORD32 i4_alpha_stim_multiplier,WORD32 i4_src_stride,WORD32 i4_pred_stride,WORD32 i4_lambda,UWORD8 u1_is_cu_noisy,UWORD8 u1_is_hbd,UWORD8 u1_max_cands,UWORD8 u1_merge_idx_cabac_model,UWORD8 u1_use_merge_cand_from_top_row)1938 static __inline void ihevce_merge_prms_init(
1939 merge_prms_t *ps_prms,
1940 merge_cand_list_t *ps_list,
1941 inter_pred_ctxt_t *ps_mc_ctxt,
1942 mv_pred_ctxt_t *ps_mv_pred_ctxt,
1943 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
1944 PF_SAD_FXN_T pf_sad_fxn,
1945 void **ppv_pred_buf_list,
1946 ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
1947 UWORD8 *pu1_merge_pred_buf_array,
1948 UWORD8 (*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],
1949 UWORD8 *pu1_is_top_used,
1950 WORD32 (*pai4_noise_term)[MAX_NUM_INTER_PARTS],
1951 UWORD32 (*pau4_pred_variance)[MAX_NUM_INTER_PARTS],
1952 UWORD32 *pu4_src_variance,
1953 WORD32 i4_alpha_stim_multiplier,
1954 WORD32 i4_src_stride,
1955 WORD32 i4_pred_stride,
1956 WORD32 i4_lambda,
1957 UWORD8 u1_is_cu_noisy,
1958 UWORD8 u1_is_hbd,
1959 UWORD8 u1_max_cands,
1960 UWORD8 u1_merge_idx_cabac_model,
1961 UWORD8 u1_use_merge_cand_from_top_row)
1962 {
1963 ps_prms->ps_list = ps_list;
1964 ps_prms->ps_mc_ctxt = ps_mc_ctxt;
1965 ps_prms->ps_mv_pred_ctxt = ps_mv_pred_ctxt;
1966 ps_prms->pf_luma_inter_pred_pu = pf_luma_inter_pred_pu;
1967 ps_prms->pf_sad_fxn = pf_sad_fxn;
1968 ps_prms->ppv_pred_buf_list = ppv_pred_buf_list;
1969 ps_prms->ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
1970
1971 ps_prms->pu1_merge_pred_buf_array = pu1_merge_pred_buf_array;
1972 ps_prms->pau1_best_pred_buf_id = pau1_best_pred_buf_id;
1973 ps_prms->pu1_is_top_used = pu1_is_top_used;
1974 ps_prms->pai4_noise_term = pai4_noise_term;
1975 ps_prms->pau4_pred_variance = pau4_pred_variance;
1976 ps_prms->pu4_src_variance = pu4_src_variance;
1977 ps_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
1978 ps_prms->i4_src_stride = i4_src_stride;
1979 ps_prms->i4_pred_stride = i4_pred_stride;
1980 ps_prms->i4_lambda = i4_lambda;
1981 ps_prms->u1_is_cu_noisy = u1_is_cu_noisy;
1982 ps_prms->u1_is_hbd = u1_is_hbd;
1983 ps_prms->u1_max_cands = u1_max_cands;
1984 ps_prms->u1_merge_idx_cabac_model = u1_merge_idx_cabac_model;
1985 ps_prms->u1_use_merge_cand_from_top_row = u1_use_merge_cand_from_top_row;
1986 }
1987
ihevce_merge_candidate_seive(nbr_avail_flags_t * ps_nbr,merge_cand_list_t * ps_merge_cand,UWORD8 * pu1_is_top_used,UWORD8 u1_num_merge_cands,UWORD8 u1_use_merge_cand_from_top_row)1988 static UWORD8 ihevce_merge_candidate_seive(
1989 nbr_avail_flags_t *ps_nbr,
1990 merge_cand_list_t *ps_merge_cand,
1991 UWORD8 *pu1_is_top_used,
1992 UWORD8 u1_num_merge_cands,
1993 UWORD8 u1_use_merge_cand_from_top_row)
1994 {
1995 if(!u1_use_merge_cand_from_top_row)
1996 {
1997 if(ps_nbr->u1_bot_lt_avail || ps_nbr->u1_left_avail)
1998 {
1999 return !pu1_is_top_used[0];
2000 }
2001 else
2002 {
2003 return 0;
2004 }
2005 }
2006 else
2007 {
2008 return u1_num_merge_cands;
2009 }
2010 }
2011
ihevce_compute_pred_and_populate_modes(ihevce_inter_cand_sifter_prms_t * ps_ctxt,PF_SAD_FXN_T pf_sad_func,UWORD32 * pu4_src_variance,UWORD8 u1_part_type,UWORD8 u1_me_cand_list_idx,UWORD8 u1_eval_merge,UWORD8 u1_eval_skip)2012 static UWORD8 ihevce_compute_pred_and_populate_modes(
2013 ihevce_inter_cand_sifter_prms_t *ps_ctxt,
2014 PF_SAD_FXN_T pf_sad_func,
2015 UWORD32 *pu4_src_variance,
2016 UWORD8 u1_part_type,
2017 UWORD8 u1_me_cand_list_idx,
2018 UWORD8 u1_eval_merge,
2019 UWORD8 u1_eval_skip)
2020 {
2021 cu_inter_cand_t *aps_cands[4];
2022 pu_mv_t as_mvp_winner[4][NUM_INTER_PU_PARTS];
2023 merge_prms_t s_merge_prms;
2024 merge_cand_list_t as_merge_cand[MAX_NUM_MERGE_CAND];
2025
2026 UWORD8 i, j;
2027 UWORD32 au4_cost[4][NUM_INTER_PU_PARTS];
2028 UWORD8 au1_final_pred_buf_id[4][NUM_INTER_PU_PARTS];
2029 UWORD8 au1_merge_pred_buf_idx_array[3];
2030 UWORD8 au1_is_top_used[MAX_NUM_MERGE_CAND];
2031 UWORD8 u1_me_pred_buf_idx;
2032 UWORD8 u1_num_bufs_currently_allocated;
2033 WORD32 i4_mean;
2034 UWORD32 au4_pred_variance[4][NUM_INTER_PU_PARTS];
2035 WORD32 ai4_noise_term[4][NUM_INTER_PU_PARTS];
2036
2037 UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
2038 UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
2039
2040 inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2041 inter_pred_ctxt_t *ps_mc_ctxt = ps_ctxt->ps_mc_ctxt;
2042 nbr_4x4_t *ps_cu_nbr_buf = ps_ctxt->aps_cu_nbr_buf[0];
2043 nbr_4x4_t *ps_pu_left_nbr = ps_ctxt->ps_left_nbr_4x4;
2044 nbr_4x4_t *ps_pu_top_nbr = ps_ctxt->ps_top_nbr_4x4;
2045 nbr_4x4_t *ps_pu_topleft_nbr = ps_ctxt->ps_topleft_nbr_4x4;
2046
2047 ihevce_inter_pred_buf_data_t *ps_pred_buf_info = ps_ctxt->ps_pred_buf_data;
2048 mv_pred_ctxt_t *ps_mv_pred_ctxt = ps_ctxt->ps_mv_pred_ctxt;
2049
2050 PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_ctxt->pf_luma_inter_pred_pu;
2051
2052 void *pv_src = ps_ctxt->pv_src;
2053 WORD32 i4_src_stride = ps_ctxt->i4_src_strd;
2054 WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
2055 UWORD8 u1_num_parts = (u1_part_type != PRT_2Nx2N) + 1;
2056 UWORD8 u1_num_bytes_per_pel = ps_ctxt->u1_is_hbd + 1;
2057 void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
2058 UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
2059 WORD32 i4_nbr_4x4_left_stride = ps_ctxt->i4_nbr_4x4_left_strd;
2060 UWORD8 *pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
2061 WORD32 i4_nbr_map_stride = ps_ctxt->i4_ctb_nbr_map_stride;
2062 UWORD8 u1_max_merge_candidates = ps_ctxt->u1_max_merge_candidates;
2063 WORD32 i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
2064 WORD32 i4_pred_buf_offset = 0;
2065 WORD32 i4_src_buf_offset = 0;
2066 UWORD8 u1_single_mcl_flag =
2067 ((8 == u1_cu_size) && (ps_mv_pred_ctxt->i4_log2_parallel_merge_level_minus2 > 0));
2068 UWORD8 u1_skip_or_merge_cand_is_valid = 0;
2069 WORD32 i4_lambda_qf = ps_ctxt->i4_lambda_qf;
2070 UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2071
2072 ASSERT(0 == (u1_eval_skip && u1_eval_merge));
2073 ASSERT(u1_me_cand_list_idx < ps_ctxt->u1_num_me_cands);
2074
2075 /*
2076 Algorithm -
2077 1. Determine pred and satd for ME cand.
2078 2. Determine merge winner for PU1.
2079 3. Determine pred and satd for mixed_type0 cand.
2080 4. Determine merge winner for PU2 and hence derive pred and satd for merge cand.
2081 5. Determine merge winner for PU2 assuming ME cand as PU1 winner and hence derive
2082 pred and satd for mixed_type1 cand.
2083 6. Sort the 4 preceding costs and hence, the cand list.
2084 7. Merge the sorted lists with the final cand list.
2085
2086 PS : 2 - 7 will be relevant only if u1_eval_merge = 1 and u1_eval_skip = 0
2087 PPS : 1 will not be relevant if u1_eval_skip = 1
2088 */
2089
2090 /*
2091 Explanatory notes -
2092 1. Motion Vector Merge candidates and nbr's in all merge mode (RealD)
2093 2. Motion Vector Merge candidates and nbr's in mixed mode (AltD)
2094 */
2095
2096 u1_num_bufs_currently_allocated = ihevce_prepare_cand_containers(
2097 ps_ctxt,
2098 aps_cands,
2099 au1_merge_pred_buf_idx_array,
2100 &u1_me_pred_buf_idx,
2101 u1_part_type,
2102 u1_me_cand_list_idx,
2103 u1_eval_merge,
2104 u1_eval_skip);
2105
2106 if(0 == u1_num_bufs_currently_allocated)
2107 {
2108 return 0;
2109 }
2110
2111 if((u1_eval_merge) || (u1_eval_skip))
2112 {
2113 ihevce_merge_prms_init(
2114 &s_merge_prms,
2115 as_merge_cand,
2116 ps_mc_ctxt,
2117 ps_mv_pred_ctxt,
2118 pf_luma_inter_pred_pu,
2119 pf_sad_func,
2120 ppv_pred_buf_list,
2121 ps_ctxt->ps_cmn_utils_optimised_function_list,
2122 au1_merge_pred_buf_idx_array,
2123 au1_final_pred_buf_id,
2124 au1_is_top_used,
2125 ai4_noise_term,
2126 au4_pred_variance,
2127 pu4_src_variance,
2128 ps_ctxt->i4_alpha_stim_multiplier,
2129 i4_src_stride,
2130 i4_pred_stride,
2131 i4_lambda_qf,
2132 u1_is_cu_noisy,
2133 ps_ctxt->u1_is_hbd,
2134 u1_max_merge_candidates,
2135 ps_ctxt->u1_merge_idx_cabac_model,
2136 ps_ctxt->u1_use_merge_cand_from_top_row);
2137 }
2138
2139 for(i = 0; i < u1_num_parts; i++)
2140 {
2141 nbr_avail_flags_t s_nbr;
2142
2143 UWORD8 u1_part_wd;
2144 UWORD8 u1_part_ht;
2145 UWORD8 u1_pu_pos_x_4x4;
2146 UWORD8 u1_pu_pos_y_4x4;
2147
2148 pu_t *ps_pu = &aps_cands[MERGE_DERIVED]->as_inter_pu[i];
2149
2150 PART_SIZE_E e_part_size = (PART_SIZE_E)aps_cands[ME_OR_SKIP_DERIVED]->b3_part_size;
2151
2152 void *pv_pu_src = (UWORD8 *)pv_src + i4_src_buf_offset;
2153 UWORD8 u1_num_merge_cands = 0;
2154
2155 u1_part_wd = (aps_cands[0]->as_inter_pu[i].b4_wd + 1) << 2;
2156 u1_part_ht = (aps_cands[0]->as_inter_pu[i].b4_ht + 1) << 2;
2157 u1_pu_pos_x_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_x;
2158 u1_pu_pos_y_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_y;
2159
2160 /* Inter cand pred and satd */
2161 if(!u1_eval_skip)
2162 {
2163 void *pv_pu_pred = (UWORD8 *)ppv_pred_buf_list[u1_me_pred_buf_idx] + i4_pred_buf_offset;
2164
2165 if(ps_ctxt->u1_reuse_me_sad)
2166 {
2167 ihevce_compute_inter_pred_and_cost(
2168 ps_mc_ctxt,
2169 pf_luma_inter_pred_pu,
2170 pf_sad_func,
2171 &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2172 pv_pu_src,
2173 pv_pu_pred,
2174 i4_src_stride,
2175 i4_pred_stride,
2176 0,
2177 ps_ctxt->ps_cmn_utils_optimised_function_list);
2178
2179 au4_cost[ME_OR_SKIP_DERIVED][i] =
2180 ps_ctxt->pai4_me_err_metric[u1_me_cand_list_idx][i];
2181 }
2182 else
2183 {
2184 au4_cost[ME_OR_SKIP_DERIVED][i] = ihevce_compute_inter_pred_and_cost(
2185 ps_mc_ctxt,
2186 pf_luma_inter_pred_pu,
2187 pf_sad_func,
2188 &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2189 pv_pu_src,
2190 pv_pu_pred,
2191 i4_src_stride,
2192 i4_pred_stride,
2193 1,
2194 ps_ctxt->ps_cmn_utils_optimised_function_list);
2195 }
2196
2197 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] = u1_me_pred_buf_idx;
2198
2199 if(u1_is_cu_noisy && ps_ctxt->i4_alpha_stim_multiplier)
2200 {
2201 ihevce_calc_variance(
2202 pv_pu_pred,
2203 i4_pred_stride,
2204 &i4_mean,
2205 &au4_pred_variance[ME_OR_SKIP_DERIVED][i],
2206 u1_part_ht,
2207 u1_part_wd,
2208 ps_ctxt->u1_is_hbd,
2209 0);
2210
2211 ai4_noise_term[ME_OR_SKIP_DERIVED][i] = ihevce_compute_noise_term(
2212 ps_ctxt->i4_alpha_stim_multiplier,
2213 pu4_src_variance[i],
2214 au4_pred_variance[ME_OR_SKIP_DERIVED][i]);
2215
2216 MULTIPLY_STIM_WITH_DISTORTION(
2217 au4_cost[ME_OR_SKIP_DERIVED][i],
2218 ai4_noise_term[ME_OR_SKIP_DERIVED][i],
2219 STIM_Q_FORMAT,
2220 ALPHA_Q_FORMAT);
2221 }
2222 }
2223
2224 if(u1_eval_skip || u1_eval_merge)
2225 {
2226 pu_t s_pu, *ps_pu_merge;
2227
2228 UWORD8 u1_is_any_top_available = 1;
2229 UWORD8 u1_are_valid_merge_cands_available = 1;
2230
2231 /* get the neighbour availability flags */
2232 if((u1_num_parts > 1) && u1_single_mcl_flag)
2233 { /* 8x8 SMPs take the 2Nx2N neighbours */
2234 ihevce_get_only_nbr_flag(
2235 &s_nbr,
2236 pu1_ctb_nbr_map,
2237 i4_nbr_map_stride,
2238 aps_cands[0]->as_inter_pu[0].b4_pos_x,
2239 aps_cands[0]->as_inter_pu[0].b4_pos_y,
2240 u1_cu_size >> 2,
2241 u1_cu_size >> 2);
2242
2243 /* Make the PU width and height as 8 */
2244 memcpy(&s_pu, ps_pu, sizeof(pu_t));
2245 s_pu.b4_pos_x = u1_cu_pos_x >> 2;
2246 s_pu.b4_pos_y = u1_cu_pos_y >> 2;
2247 s_pu.b4_wd = (u1_cu_size >> 2) - 1;
2248 s_pu.b4_ht = (u1_cu_size >> 2) - 1;
2249
2250 /* Give the local PU structure to MV merge */
2251 ps_pu_merge = &s_pu;
2252 }
2253 else
2254 {
2255 ihevce_get_only_nbr_flag(
2256 &s_nbr,
2257 pu1_ctb_nbr_map,
2258 i4_nbr_map_stride,
2259 u1_pu_pos_x_4x4,
2260 u1_pu_pos_y_4x4,
2261 u1_part_wd >> 2,
2262 u1_part_ht >> 2);
2263
2264 u1_is_any_top_available = s_nbr.u1_top_avail || s_nbr.u1_top_rt_avail ||
2265 s_nbr.u1_top_lt_avail;
2266
2267 if(!ps_ctxt->u1_use_merge_cand_from_top_row)
2268 {
2269 if(u1_is_any_top_available)
2270 {
2271 if(s_nbr.u1_left_avail || s_nbr.u1_bot_lt_avail)
2272 {
2273 s_nbr.u1_top_avail = 0;
2274 s_nbr.u1_top_rt_avail = 0;
2275 s_nbr.u1_top_lt_avail = 0;
2276 }
2277 else
2278 {
2279 u1_are_valid_merge_cands_available = 0;
2280 }
2281 }
2282 }
2283
2284 /* Actual PU passed to MV merge */
2285 ps_pu_merge = ps_pu;
2286 }
2287 if(u1_are_valid_merge_cands_available)
2288 {
2289 u1_num_merge_cands = ihevce_mv_pred_merge(
2290 ps_mv_pred_ctxt,
2291 ps_pu_top_nbr,
2292 ps_pu_left_nbr,
2293 ps_pu_topleft_nbr,
2294 i4_nbr_4x4_left_stride,
2295 &s_nbr,
2296 NULL,
2297 ps_pu_merge,
2298 e_part_size,
2299 i,
2300 u1_single_mcl_flag,
2301 as_merge_cand,
2302 au1_is_top_used);
2303
2304 if(u1_num_merge_cands > u1_max_merge_candidates)
2305 {
2306 u1_num_merge_cands = u1_max_merge_candidates;
2307 }
2308
2309 u1_num_merge_cands = ihevce_merge_candidate_seive(
2310 &s_nbr,
2311 as_merge_cand,
2312 au1_is_top_used,
2313 u1_num_merge_cands,
2314 ps_ctxt->u1_use_merge_cand_from_top_row || !u1_is_any_top_available);
2315
2316 for(j = 0; j < u1_num_merge_cands; j++)
2317 {
2318 s_merge_prms.au1_valid_merge_indices[j] = j;
2319 }
2320
2321 au4_cost[MERGE_DERIVED][i] = ihevce_determine_best_merge_pu(
2322 &s_merge_prms,
2323 &aps_cands[MERGE_DERIVED]->as_inter_pu[i],
2324 &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
2325 pv_pu_src,
2326 au4_cost[ME_OR_SKIP_DERIVED][i],
2327 i4_pred_buf_offset,
2328 u1_num_merge_cands,
2329 i,
2330 u1_eval_skip);
2331 }
2332 else
2333 {
2334 au4_cost[MERGE_DERIVED][i] = INT_MAX;
2335 }
2336
2337 au4_cost[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] = au4_cost[MERGE_DERIVED][i];
2338
2339 if(u1_eval_skip)
2340 {
2341 /* This statement ensures that the skip candidate is always added */
2342 au4_cost[ME_OR_SKIP_DERIVED][i] =
2343 (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST : INT_MAX;
2344 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] =
2345 au1_final_pred_buf_id[MERGE_DERIVED][i];
2346 }
2347 else
2348 {
2349 au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2350 au4_cost[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2351 au4_cost[ME_OR_SKIP_DERIVED][i];
2352 }
2353
2354 au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] =
2355 au1_final_pred_buf_id[MERGE_DERIVED][i];
2356 au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
2357 au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i];
2358 }
2359 else
2360 {
2361 au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
2362 }
2363
2364 if(!i && (u1_num_parts > 1) && u1_eval_merge)
2365 {
2366 ihevce_set_inter_nbr_map(
2367 pu1_ctb_nbr_map,
2368 i4_nbr_map_stride,
2369 u1_pu_pos_x_4x4,
2370 u1_pu_pos_y_4x4,
2371 (u1_part_wd >> 2),
2372 (u1_part_ht >> 2),
2373 1);
2374 ihevce_populate_nbr_4x4_with_pu_data(
2375 ps_cu_nbr_buf, &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i], u1_cu_size >> 2);
2376
2377 if(u1_part_wd < u1_cu_size)
2378 {
2379 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2380
2381 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2382 {
2383 ps_cu_nbr_buf += (u1_part_wd >> 2);
2384 ps_pu_left_nbr = ps_cu_nbr_buf - 1;
2385 ps_pu_top_nbr += (u1_part_wd >> 2);
2386 ps_pu_topleft_nbr = ps_pu_top_nbr - 1;
2387
2388 i4_nbr_4x4_left_stride = (u1_cu_size >> 2);
2389 }
2390 }
2391 else if(u1_part_ht < u1_cu_size)
2392 {
2393 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2394 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2395
2396 if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
2397 {
2398 ps_cu_nbr_buf += (u1_part_ht >> 2) * (u1_cu_size >> 2);
2399 ps_pu_left_nbr += (u1_part_ht >> 2) * i4_nbr_4x4_left_stride;
2400 ps_pu_top_nbr = ps_cu_nbr_buf - (u1_cu_size >> 2);
2401 ps_pu_topleft_nbr = ps_pu_left_nbr - i4_nbr_4x4_left_stride;
2402 }
2403 }
2404
2405 i4_pred_buf_offset *= u1_num_bytes_per_pel;
2406 i4_src_buf_offset *= u1_num_bytes_per_pel;
2407
2408 aps_cands[MIXED_MODE_TYPE0]->as_inter_pu[0] = aps_cands[MERGE_DERIVED]->as_inter_pu[0];
2409 }
2410 else if(!i && (u1_num_parts > 1) && (!u1_eval_merge))
2411 {
2412 if(u1_part_wd < u1_cu_size)
2413 {
2414 i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
2415 }
2416 else if(u1_part_ht < u1_cu_size)
2417 {
2418 i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
2419 i4_src_buf_offset = u1_part_ht * i4_src_stride;
2420 }
2421
2422 i4_pred_buf_offset *= u1_num_bytes_per_pel;
2423 i4_src_buf_offset *= u1_num_bytes_per_pel;
2424 }
2425 else if(i && (u1_num_parts > 1) && u1_eval_merge)
2426 {
2427 aps_cands[MIXED_MODE_TYPE1]->as_inter_pu[1] = aps_cands[MERGE_DERIVED]->as_inter_pu[1];
2428 }
2429 }
2430
2431 /* Adding a skip candidate */
2432 if((u1_eval_merge) && (0 == u1_part_type))
2433 {
2434 cu_inter_cand_t *ps_cand = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
2435 [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
2436 ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
2437
2438 (*ps_cand) = (*aps_cands[MERGE_DERIVED]);
2439
2440 ps_cand->b1_skip_flag = 1;
2441
2442 aps_cands[MIXED_MODE_TYPE1] = ps_cand;
2443 au4_cost[MIXED_MODE_TYPE1][0] = (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST
2444 : INT_MAX;
2445 }
2446
2447 /* Sort and populate */
2448 u1_skip_or_merge_cand_is_valid = ihevce_merge_cands_with_existing_best(
2449 ps_cu_mode_info,
2450 aps_cands,
2451 as_mvp_winner,
2452 au4_cost,
2453 ppv_pred_buf_list,
2454 au1_final_pred_buf_id,
2455 &ps_pred_buf_info->u4_is_buf_in_use,
2456 &ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands,
2457 &ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands,
2458 &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands,
2459 &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands,
2460 au1_merge_pred_buf_idx_array,
2461 ps_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d,
2462
2463 i4_pred_stride,
2464 i4_max_num_inter_rdopt_cands,
2465 u1_cu_size,
2466 u1_part_type,
2467 u1_eval_merge,
2468 u1_eval_skip,
2469 u1_num_bytes_per_pel);
2470
2471 return u1_skip_or_merge_cand_is_valid;
2472 }
2473
ihevce_redundant_candidate_pruner(inter_cu_mode_info_t * ps_inter_cu_mode_info)2474 static __inline void ihevce_redundant_candidate_pruner(inter_cu_mode_info_t *ps_inter_cu_mode_info)
2475 {
2476 WORD8 i, j;
2477 WORD8 i1_num_merge_vs_mvds;
2478
2479 UWORD8 au1_redundant_cand_indices[MAX_NUM_INTER_RDO_CANDS] = { 0 };
2480
2481 for(i = 0; i < (ps_inter_cu_mode_info->u1_num_inter_cands - 1); i++)
2482 {
2483 if(au1_redundant_cand_indices[i] || ps_inter_cu_mode_info->aps_cu_data[i]->b1_skip_flag)
2484 {
2485 continue;
2486 }
2487
2488 for(j = i + 1; j < ps_inter_cu_mode_info->u1_num_inter_cands; j++)
2489 {
2490 if(au1_redundant_cand_indices[j] || ps_inter_cu_mode_info->aps_cu_data[j]->b1_skip_flag)
2491 {
2492 continue;
2493 }
2494
2495 i1_num_merge_vs_mvds = 0;
2496
2497 if(ps_inter_cu_mode_info->aps_cu_data[j]->b3_part_size ==
2498 ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2499 {
2500 if(ihevce_compare_pu_mv_t(
2501 &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->mv,
2502 &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->mv,
2503 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b2_pred_mode,
2504 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b2_pred_mode))
2505 {
2506 i1_num_merge_vs_mvds +=
2507 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b1_merge_flag -
2508 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b1_merge_flag;
2509
2510 if(ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
2511 {
2512 if(ihevce_compare_pu_mv_t(
2513 &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].mv,
2514 &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].mv,
2515 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b2_pred_mode,
2516 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b2_pred_mode))
2517 {
2518 i1_num_merge_vs_mvds +=
2519 ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b1_merge_flag -
2520 ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b1_merge_flag;
2521 }
2522 }
2523 }
2524 }
2525
2526 if(i1_num_merge_vs_mvds != 0)
2527 {
2528 au1_redundant_cand_indices[(i1_num_merge_vs_mvds > 0) ? j : i] = 1;
2529 }
2530 }
2531 }
2532
2533 for(i = 0; i < ps_inter_cu_mode_info->u1_num_inter_cands; i++)
2534 {
2535 if(au1_redundant_cand_indices[i])
2536 {
2537 memmove(
2538 &ps_inter_cu_mode_info->aps_cu_data[i],
2539 &ps_inter_cu_mode_info->aps_cu_data[i + 1],
2540 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2541 sizeof(ps_inter_cu_mode_info->aps_cu_data[i]));
2542
2543 memmove(
2544 &ps_inter_cu_mode_info->au4_cost[i],
2545 &ps_inter_cu_mode_info->au4_cost[i + 1],
2546 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2547 sizeof(ps_inter_cu_mode_info->au4_cost[i]));
2548
2549 memmove(
2550 &ps_inter_cu_mode_info->au1_pred_buf_idx[i],
2551 &ps_inter_cu_mode_info->au1_pred_buf_idx[i + 1],
2552 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2553 sizeof(ps_inter_cu_mode_info->au1_pred_buf_idx[i]));
2554
2555 memmove(
2556 &au1_redundant_cand_indices[i],
2557 &au1_redundant_cand_indices[i + 1],
2558 (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
2559 sizeof(au1_redundant_cand_indices[i]));
2560
2561 ps_inter_cu_mode_info->u1_num_inter_cands--;
2562 i--;
2563 }
2564 }
2565 }
2566
2567 /*!
2568 ******************************************************************************
2569 * \if Function name : ihevce_inter_cand_sifter \endif
2570 *
2571 * \brief
2572 * Selects the best inter candidate modes amongst ME, merge,
2573 * skip and mixed modes. Also computes corresponding preds
2574 *
2575 * \author
2576 * Ittiam
2577 *
2578 *****************************************************************************
2579 */
ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t * ps_ctxt)2580 void ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t *ps_ctxt)
2581 {
2582 PF_SAD_FXN_T pf_sad_func;
2583
2584 UWORD8 au1_final_cand_idx[MAX_INTER_CU_CANDIDATES];
2585 UWORD8 au1_part_types_evaluated[MAX_INTER_CU_CANDIDATES];
2586 UWORD8 u1_num_unique_parts;
2587 UWORD8 i, j;
2588 UWORD32 au4_src_variance[NUM_INTER_PU_PARTS];
2589 WORD32 i4_mean;
2590
2591 cu_inter_cand_t *ps_me_cands = ps_ctxt->ps_me_cands;
2592 inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
2593
2594 UWORD8 u1_diff_skip_cand_flag = 1;
2595 WORD8 i1_skip_cand_from_merge_idx = -1;
2596 WORD8 i1_final_skip_cand_merge_idx = -1;
2597 UWORD8 u1_max_num_part_types_to_select = MAX_INTER_CU_CANDIDATES;
2598 UWORD8 u1_num_me_cands = ps_ctxt->u1_num_me_cands;
2599 UWORD8 u1_num_parts_evaluated_for_merge = 0;
2600 UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
2601
2602 if((ps_ctxt->u1_quality_preset >= IHEVCE_QUALITY_P3) && (ps_ctxt->i1_slice_type == BSLICE))
2603 {
2604 u1_max_num_part_types_to_select = 1;
2605 }
2606
2607 {
2608 pf_sad_func = (ps_ctxt->u1_use_satd_for_merge_eval) ? compute_satd_8bit
2609 : ps_ctxt->pf_evalsad_pt_npu_mxn_8bit;
2610 }
2611
2612 u1_num_unique_parts = ihevce_get_num_part_types_in_me_cand_list(
2613 ps_me_cands,
2614 au1_part_types_evaluated,
2615 au1_final_cand_idx,
2616 &u1_diff_skip_cand_flag,
2617 &i1_skip_cand_from_merge_idx,
2618 &i1_final_skip_cand_merge_idx,
2619 u1_max_num_part_types_to_select,
2620 u1_num_me_cands);
2621
2622 if((u1_num_me_cands + u1_diff_skip_cand_flag) && u1_is_cu_noisy &&
2623 ps_ctxt->i4_alpha_stim_multiplier)
2624 {
2625 ihevce_calc_variance(
2626 ps_ctxt->pv_src,
2627 ps_ctxt->i4_src_strd,
2628 &i4_mean,
2629 &ps_cu_mode_info->u4_src_variance,
2630 ps_ctxt->u1_cu_size,
2631 ps_ctxt->u1_cu_size,
2632 ps_ctxt->u1_is_hbd,
2633 0);
2634 }
2635
2636 if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2637 {
2638 u1_diff_skip_cand_flag = 0;
2639 }
2640 else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2641 {
2642 if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2643 {
2644 u1_diff_skip_cand_flag = 0;
2645 }
2646 }
2647
2648 for(i = 0; i < u1_num_me_cands + u1_diff_skip_cand_flag; i++)
2649 {
2650 UWORD8 u1_part_type;
2651 UWORD8 u1_eval_skip;
2652 UWORD8 u1_eval_merge;
2653 UWORD8 u1_valid_cand;
2654
2655 if(i == u1_num_me_cands)
2656 {
2657 u1_eval_skip = 1;
2658 u1_eval_merge = 0;
2659 u1_part_type = 0;
2660 }
2661 else
2662 {
2663 u1_eval_skip = 0;
2664 u1_part_type = ps_me_cands[i].b3_part_size;
2665
2666 if(u1_num_parts_evaluated_for_merge >= u1_num_unique_parts)
2667 {
2668 u1_eval_merge = 0;
2669 u1_num_parts_evaluated_for_merge = u1_num_unique_parts;
2670 }
2671 else
2672 {
2673 u1_eval_merge = (i == au1_final_cand_idx[u1_num_parts_evaluated_for_merge]);
2674 }
2675
2676 for(j = 0; (j < u1_num_parts_evaluated_for_merge) && (u1_eval_merge); j++)
2677 {
2678 if(u1_part_type == au1_part_types_evaluated[j])
2679 {
2680 u1_eval_merge = 0;
2681 break;
2682 }
2683 }
2684 }
2685
2686 if(u1_is_cu_noisy && u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2687 {
2688 void *pv_src = ps_ctxt->pv_src;
2689 UWORD8 u1_pu_wd = (ps_me_cands[i].as_inter_pu[0].b4_wd + 1) << 2;
2690 UWORD8 u1_pu_ht = (ps_me_cands[i].as_inter_pu[0].b4_ht + 1) << 2;
2691
2692 ihevce_calc_variance(
2693 pv_src,
2694 ps_ctxt->i4_src_strd,
2695 &i4_mean,
2696 &au4_src_variance[0],
2697 u1_pu_ht,
2698 u1_pu_wd,
2699 ps_ctxt->u1_is_hbd,
2700 0);
2701
2702 pv_src = (void *) (((UWORD8 *) pv_src) +
2703 ((ps_ctxt->u1_cu_size == u1_pu_wd) ? ps_ctxt->i4_src_strd * u1_pu_ht : u1_pu_wd)
2704 * (ps_ctxt->u1_is_hbd + 1));
2705 u1_pu_wd = (ps_me_cands[i].as_inter_pu[1].b4_wd + 1) << 2;
2706 u1_pu_ht = (ps_me_cands[i].as_inter_pu[1].b4_ht + 1) << 2;
2707
2708 ihevce_calc_variance(
2709 pv_src,
2710 ps_ctxt->i4_src_strd,
2711 &i4_mean,
2712 &au4_src_variance[1],
2713 u1_pu_ht,
2714 u1_pu_wd,
2715 ps_ctxt->u1_is_hbd,
2716 0);
2717 }
2718 else if(u1_is_cu_noisy && !u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
2719 {
2720 au4_src_variance[0] = ps_cu_mode_info->u4_src_variance;
2721 }
2722
2723 if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2724 {
2725 u1_eval_merge = 0;
2726 }
2727 else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
2728 {
2729 if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
2730 {
2731 u1_eval_merge = 0;
2732 }
2733 }
2734
2735 u1_valid_cand = ihevce_compute_pred_and_populate_modes(
2736 ps_ctxt,
2737 pf_sad_func,
2738 au4_src_variance,
2739 u1_part_type,
2740 MIN(i, (u1_num_me_cands - 1)),
2741 u1_eval_merge,
2742 u1_eval_skip);
2743
2744 u1_num_parts_evaluated_for_merge += u1_eval_merge;
2745
2746 /* set the neighbour map to 0 */
2747 if(u1_part_type)
2748 {
2749 ihevce_set_nbr_map(
2750 ps_ctxt->pu1_ctb_nbr_map,
2751 ps_ctxt->i4_ctb_nbr_map_stride,
2752 (ps_ctxt->u1_cu_pos_x >> 2),
2753 (ps_ctxt->u1_cu_pos_y >> 2),
2754 (ps_ctxt->u1_cu_size >> 2),
2755 0);
2756 }
2757 }
2758
2759 ihevce_redundant_candidate_pruner(ps_ctxt->ps_inter_cu_mode_info);
2760 }
2761