1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /**
21*c83a76b0SSuyog Pawar ******************************************************************************
22*c83a76b0SSuyog Pawar * @file hme_refine.c
23*c83a76b0SSuyog Pawar *
24*c83a76b0SSuyog Pawar * @brief
25*c83a76b0SSuyog Pawar * Contains the implementation of the refinement layer searches and related
26*c83a76b0SSuyog Pawar * functionality like CU merge.
27*c83a76b0SSuyog Pawar *
28*c83a76b0SSuyog Pawar * @author
29*c83a76b0SSuyog Pawar * Ittiam
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar * List of Functions
33*c83a76b0SSuyog Pawar *
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar ******************************************************************************
36*c83a76b0SSuyog Pawar */
37*c83a76b0SSuyog Pawar
38*c83a76b0SSuyog Pawar /*****************************************************************************/
39*c83a76b0SSuyog Pawar /* File Includes */
40*c83a76b0SSuyog Pawar /*****************************************************************************/
41*c83a76b0SSuyog Pawar /* System include files */
42*c83a76b0SSuyog Pawar #include <stdio.h>
43*c83a76b0SSuyog Pawar #include <string.h>
44*c83a76b0SSuyog Pawar #include <stdlib.h>
45*c83a76b0SSuyog Pawar #include <assert.h>
46*c83a76b0SSuyog Pawar #include <stdarg.h>
47*c83a76b0SSuyog Pawar #include <math.h>
48*c83a76b0SSuyog Pawar #include <limits.h>
49*c83a76b0SSuyog Pawar
50*c83a76b0SSuyog Pawar /* User include files */
51*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
52*c83a76b0SSuyog Pawar #include "itt_video_api.h"
53*c83a76b0SSuyog Pawar #include "ihevce_api.h"
54*c83a76b0SSuyog Pawar
55*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
56*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
57*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
58*c83a76b0SSuyog Pawar
59*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
60*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
61*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
62*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
63*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
64*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
65*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
66*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
67*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
69*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
70*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
71*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
72*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
73*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
74*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
75*c83a76b0SSuyog Pawar
76*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
77*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
78*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
79*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
80*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
82*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
83*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
84*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
85*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
86*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
87*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
88*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
89*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
90*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
91*c83a76b0SSuyog Pawar #include "ihevce_bs_compute_ctb.h"
92*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
93*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
94*c83a76b0SSuyog Pawar #include "hme_datatype.h"
95*c83a76b0SSuyog Pawar #include "hme_interface.h"
96*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
97*c83a76b0SSuyog Pawar #include "hme_defs.h"
98*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
99*c83a76b0SSuyog Pawar #include "hme_globals.h"
100*c83a76b0SSuyog Pawar #include "hme_utils.h"
101*c83a76b0SSuyog Pawar #include "hme_coarse.h"
102*c83a76b0SSuyog Pawar #include "hme_fullpel.h"
103*c83a76b0SSuyog Pawar #include "hme_subpel.h"
104*c83a76b0SSuyog Pawar #include "hme_refine.h"
105*c83a76b0SSuyog Pawar #include "hme_err_compute.h"
106*c83a76b0SSuyog Pawar #include "hme_common_utils.h"
107*c83a76b0SSuyog Pawar #include "hme_search_algo.h"
108*c83a76b0SSuyog Pawar #include "ihevce_stasino_helpers.h"
109*c83a76b0SSuyog Pawar #include "ihevce_common_utils.h"
110*c83a76b0SSuyog Pawar
111*c83a76b0SSuyog Pawar /*****************************************************************************/
112*c83a76b0SSuyog Pawar /* Globals */
113*c83a76b0SSuyog Pawar /*****************************************************************************/
114*c83a76b0SSuyog Pawar
115*c83a76b0SSuyog Pawar /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116*c83a76b0SSuyog Pawar UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117*c83a76b0SSuyog Pawar { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118*c83a76b0SSuyog Pawar };
119*c83a76b0SSuyog Pawar
120*c83a76b0SSuyog Pawar /*****************************************************************************/
121*c83a76b0SSuyog Pawar /* Extern Fucntion declaration */
122*c83a76b0SSuyog Pawar /*****************************************************************************/
123*c83a76b0SSuyog Pawar extern ctb_boundary_attrs_t *
124*c83a76b0SSuyog Pawar get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125*c83a76b0SSuyog Pawar
126*c83a76b0SSuyog Pawar typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127*c83a76b0SSuyog Pawar search_node_t *ps_search_node,
128*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
129*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer,
130*c83a76b0SSuyog Pawar S32 i4_pos_x,
131*c83a76b0SSuyog Pawar S32 i4_pos_y,
132*c83a76b0SSuyog Pawar S08 i1_ref_id,
133*c83a76b0SSuyog Pawar S32 i4_result_id);
134*c83a76b0SSuyog Pawar
135*c83a76b0SSuyog Pawar typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136*c83a76b0SSuyog Pawar search_node_t *ps_search_node,
137*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
138*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer,
139*c83a76b0SSuyog Pawar S32 i4_pos_x,
140*c83a76b0SSuyog Pawar S32 i4_pos_y,
141*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0,
142*c83a76b0SSuyog Pawar U08 u1_pred_dir,
143*c83a76b0SSuyog Pawar U08 u1_default_ref_id,
144*c83a76b0SSuyog Pawar S32 i4_result_id);
145*c83a76b0SSuyog Pawar
146*c83a76b0SSuyog Pawar /*****************************************************************************/
147*c83a76b0SSuyog Pawar /* Function Definitions */
148*c83a76b0SSuyog Pawar /*****************************************************************************/
149*c83a76b0SSuyog Pawar
ihevce_no_wt_copy(coarse_me_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,pu_t * ps_pu,UWORD8 * pu1_temp_pred,WORD32 temp_stride,WORD32 blk_x,WORD32 blk_y)150*c83a76b0SSuyog Pawar void ihevce_no_wt_copy(
151*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt,
152*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
153*c83a76b0SSuyog Pawar pu_t *ps_pu,
154*c83a76b0SSuyog Pawar UWORD8 *pu1_temp_pred,
155*c83a76b0SSuyog Pawar WORD32 temp_stride,
156*c83a76b0SSuyog Pawar WORD32 blk_x,
157*c83a76b0SSuyog Pawar WORD32 blk_y)
158*c83a76b0SSuyog Pawar {
159*c83a76b0SSuyog Pawar UWORD8 *pu1_ref;
160*c83a76b0SSuyog Pawar WORD32 ref_stride, ref_offset;
161*c83a76b0SSuyog Pawar WORD32 row, col, i4_tmp;
162*c83a76b0SSuyog Pawar
163*c83a76b0SSuyog Pawar ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164*c83a76b0SSuyog Pawar
165*c83a76b0SSuyog Pawar if(ps_pu->b2_pred_mode == PRED_L0)
166*c83a76b0SSuyog Pawar {
167*c83a76b0SSuyog Pawar WORD8 i1_ref_idx;
168*c83a76b0SSuyog Pawar
169*c83a76b0SSuyog Pawar i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170*c83a76b0SSuyog Pawar pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171*c83a76b0SSuyog Pawar
172*c83a76b0SSuyog Pawar ref_stride = ps_curr_layer->i4_inp_stride;
173*c83a76b0SSuyog Pawar
174*c83a76b0SSuyog Pawar ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175*c83a76b0SSuyog Pawar ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176*c83a76b0SSuyog Pawar
177*c83a76b0SSuyog Pawar pu1_ref += ref_offset;
178*c83a76b0SSuyog Pawar
179*c83a76b0SSuyog Pawar for(row = 0; row < temp_stride; row++)
180*c83a76b0SSuyog Pawar {
181*c83a76b0SSuyog Pawar for(col = 0; col < temp_stride; col++)
182*c83a76b0SSuyog Pawar {
183*c83a76b0SSuyog Pawar i4_tmp = pu1_ref[col];
184*c83a76b0SSuyog Pawar pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185*c83a76b0SSuyog Pawar }
186*c83a76b0SSuyog Pawar
187*c83a76b0SSuyog Pawar pu1_ref += ref_stride;
188*c83a76b0SSuyog Pawar pu1_temp_pred += temp_stride;
189*c83a76b0SSuyog Pawar }
190*c83a76b0SSuyog Pawar }
191*c83a76b0SSuyog Pawar else
192*c83a76b0SSuyog Pawar {
193*c83a76b0SSuyog Pawar WORD8 i1_ref_idx;
194*c83a76b0SSuyog Pawar
195*c83a76b0SSuyog Pawar i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196*c83a76b0SSuyog Pawar pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197*c83a76b0SSuyog Pawar
198*c83a76b0SSuyog Pawar ref_stride = ps_curr_layer->i4_inp_stride;
199*c83a76b0SSuyog Pawar
200*c83a76b0SSuyog Pawar ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201*c83a76b0SSuyog Pawar ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202*c83a76b0SSuyog Pawar
203*c83a76b0SSuyog Pawar pu1_ref += ref_offset;
204*c83a76b0SSuyog Pawar
205*c83a76b0SSuyog Pawar for(row = 0; row < temp_stride; row++)
206*c83a76b0SSuyog Pawar {
207*c83a76b0SSuyog Pawar for(col = 0; col < temp_stride; col++)
208*c83a76b0SSuyog Pawar {
209*c83a76b0SSuyog Pawar i4_tmp = pu1_ref[col];
210*c83a76b0SSuyog Pawar pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211*c83a76b0SSuyog Pawar }
212*c83a76b0SSuyog Pawar
213*c83a76b0SSuyog Pawar pu1_ref += ref_stride;
214*c83a76b0SSuyog Pawar pu1_temp_pred += temp_stride;
215*c83a76b0SSuyog Pawar }
216*c83a76b0SSuyog Pawar }
217*c83a76b0SSuyog Pawar }
218*c83a76b0SSuyog Pawar
hme_add_clustered_mvs_as_merge_cands(cluster_data_t * ps_cluster_base,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,WORD32 i4_num_clusters,U08 u1_pred_dir)219*c83a76b0SSuyog Pawar static WORD32 hme_add_clustered_mvs_as_merge_cands(
220*c83a76b0SSuyog Pawar cluster_data_t *ps_cluster_base,
221*c83a76b0SSuyog Pawar search_node_t *ps_merge_cand,
222*c83a76b0SSuyog Pawar range_prms_t **pps_range_prms,
223*c83a76b0SSuyog Pawar U08 *pu1_refid_to_pred_dir_list,
224*c83a76b0SSuyog Pawar WORD32 i4_num_clusters,
225*c83a76b0SSuyog Pawar U08 u1_pred_dir)
226*c83a76b0SSuyog Pawar {
227*c83a76b0SSuyog Pawar WORD32 i, j, k;
228*c83a76b0SSuyog Pawar WORD32 i4_num_cands_added = 0;
229*c83a76b0SSuyog Pawar WORD32 i4_num_mvs_in_cluster;
230*c83a76b0SSuyog Pawar
231*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_clusters; i++)
232*c83a76b0SSuyog Pawar {
233*c83a76b0SSuyog Pawar cluster_data_t *ps_data = &ps_cluster_base[i];
234*c83a76b0SSuyog Pawar
235*c83a76b0SSuyog Pawar if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236*c83a76b0SSuyog Pawar {
237*c83a76b0SSuyog Pawar i4_num_mvs_in_cluster = ps_data->num_mvs;
238*c83a76b0SSuyog Pawar
239*c83a76b0SSuyog Pawar for(j = 0; j < i4_num_mvs_in_cluster; j++)
240*c83a76b0SSuyog Pawar {
241*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244*c83a76b0SSuyog Pawar
245*c83a76b0SSuyog Pawar CLIP_MV_WITHIN_RANGE(
246*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248*c83a76b0SSuyog Pawar pps_range_prms[ps_data->ref_id],
249*c83a76b0SSuyog Pawar 0,
250*c83a76b0SSuyog Pawar 0,
251*c83a76b0SSuyog Pawar 0);
252*c83a76b0SSuyog Pawar
253*c83a76b0SSuyog Pawar for(k = 0; k < i4_num_cands_added; k++)
254*c83a76b0SSuyog Pawar {
255*c83a76b0SSuyog Pawar if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256*c83a76b0SSuyog Pawar (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257*c83a76b0SSuyog Pawar (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258*c83a76b0SSuyog Pawar {
259*c83a76b0SSuyog Pawar break;
260*c83a76b0SSuyog Pawar }
261*c83a76b0SSuyog Pawar }
262*c83a76b0SSuyog Pawar
263*c83a76b0SSuyog Pawar if(k == i4_num_cands_added)
264*c83a76b0SSuyog Pawar {
265*c83a76b0SSuyog Pawar i4_num_cands_added++;
266*c83a76b0SSuyog Pawar }
267*c83a76b0SSuyog Pawar }
268*c83a76b0SSuyog Pawar }
269*c83a76b0SSuyog Pawar }
270*c83a76b0SSuyog Pawar
271*c83a76b0SSuyog Pawar return i4_num_cands_added;
272*c83a76b0SSuyog Pawar }
273*c83a76b0SSuyog Pawar
hme_add_me_best_as_merge_cands(search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,search_node_t * ps_merge_cand,range_prms_t ** pps_range_prms,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_quality_preset,S32 i4_num_cands_added,U08 u1_pred_dir)274*c83a76b0SSuyog Pawar static WORD32 hme_add_me_best_as_merge_cands(
275*c83a76b0SSuyog Pawar search_results_t **pps_child_data_array,
276*c83a76b0SSuyog Pawar inter_cu_results_t *ps_8x8cu_results,
277*c83a76b0SSuyog Pawar search_node_t *ps_merge_cand,
278*c83a76b0SSuyog Pawar range_prms_t **pps_range_prms,
279*c83a76b0SSuyog Pawar U08 *pu1_refid_to_pred_dir_list,
280*c83a76b0SSuyog Pawar S08 *pi1_past_list,
281*c83a76b0SSuyog Pawar S08 *pi1_future_list,
282*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size,
283*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_quality_preset,
284*c83a76b0SSuyog Pawar S32 i4_num_cands_added,
285*c83a76b0SSuyog Pawar U08 u1_pred_dir)
286*c83a76b0SSuyog Pawar {
287*c83a76b0SSuyog Pawar WORD32 i, j, k;
288*c83a76b0SSuyog Pawar WORD32 i4_max_cands_to_add;
289*c83a76b0SSuyog Pawar
290*c83a76b0SSuyog Pawar WORD32 i4_result_id = 0;
291*c83a76b0SSuyog Pawar
292*c83a76b0SSuyog Pawar ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293*c83a76b0SSuyog Pawar ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294*c83a76b0SSuyog Pawar ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295*c83a76b0SSuyog Pawar ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296*c83a76b0SSuyog Pawar
297*c83a76b0SSuyog Pawar switch(e_quality_preset)
298*c83a76b0SSuyog Pawar {
299*c83a76b0SSuyog Pawar case ME_PRISTINE_QUALITY:
300*c83a76b0SSuyog Pawar {
301*c83a76b0SSuyog Pawar i4_max_cands_to_add = MAX_MERGE_CANDTS;
302*c83a76b0SSuyog Pawar
303*c83a76b0SSuyog Pawar break;
304*c83a76b0SSuyog Pawar }
305*c83a76b0SSuyog Pawar case ME_HIGH_QUALITY:
306*c83a76b0SSuyog Pawar {
307*c83a76b0SSuyog Pawar /* All 4 children are split and each grandchild contributes an MV */
308*c83a76b0SSuyog Pawar /* and 2 best results per grandchild */
309*c83a76b0SSuyog Pawar i4_max_cands_to_add = 4 * 4 * 2;
310*c83a76b0SSuyog Pawar
311*c83a76b0SSuyog Pawar break;
312*c83a76b0SSuyog Pawar }
313*c83a76b0SSuyog Pawar case ME_MEDIUM_SPEED:
314*c83a76b0SSuyog Pawar {
315*c83a76b0SSuyog Pawar i4_max_cands_to_add = 4 * 2 * 2;
316*c83a76b0SSuyog Pawar
317*c83a76b0SSuyog Pawar break;
318*c83a76b0SSuyog Pawar }
319*c83a76b0SSuyog Pawar case ME_HIGH_SPEED:
320*c83a76b0SSuyog Pawar case ME_XTREME_SPEED:
321*c83a76b0SSuyog Pawar case ME_XTREME_SPEED_25:
322*c83a76b0SSuyog Pawar {
323*c83a76b0SSuyog Pawar i4_max_cands_to_add = 4 * 2 * 1;
324*c83a76b0SSuyog Pawar
325*c83a76b0SSuyog Pawar break;
326*c83a76b0SSuyog Pawar }
327*c83a76b0SSuyog Pawar }
328*c83a76b0SSuyog Pawar
329*c83a76b0SSuyog Pawar while(i4_result_id < 4)
330*c83a76b0SSuyog Pawar {
331*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
332*c83a76b0SSuyog Pawar {
333*c83a76b0SSuyog Pawar inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334*c83a76b0SSuyog Pawar inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335*c83a76b0SSuyog Pawar
336*c83a76b0SSuyog Pawar if(!pps_child_data_array[i]->u1_split_flag)
337*c83a76b0SSuyog Pawar {
338*c83a76b0SSuyog Pawar part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339*c83a76b0SSuyog Pawar
340*c83a76b0SSuyog Pawar if(ps_child_data->u1_num_best_results <= i4_result_id)
341*c83a76b0SSuyog Pawar {
342*c83a76b0SSuyog Pawar continue;
343*c83a76b0SSuyog Pawar }
344*c83a76b0SSuyog Pawar
345*c83a76b0SSuyog Pawar if(ps_data->as_pu_results->pu.b1_intra_flag)
346*c83a76b0SSuyog Pawar {
347*c83a76b0SSuyog Pawar continue;
348*c83a76b0SSuyog Pawar }
349*c83a76b0SSuyog Pawar
350*c83a76b0SSuyog Pawar for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351*c83a76b0SSuyog Pawar {
352*c83a76b0SSuyog Pawar mv_t *ps_mv;
353*c83a76b0SSuyog Pawar
354*c83a76b0SSuyog Pawar S08 i1_ref_idx;
355*c83a76b0SSuyog Pawar
356*c83a76b0SSuyog Pawar pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357*c83a76b0SSuyog Pawar
358*c83a76b0SSuyog Pawar if(u1_pred_dir !=
359*c83a76b0SSuyog Pawar ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360*c83a76b0SSuyog Pawar {
361*c83a76b0SSuyog Pawar continue;
362*c83a76b0SSuyog Pawar }
363*c83a76b0SSuyog Pawar
364*c83a76b0SSuyog Pawar if(u1_pred_dir)
365*c83a76b0SSuyog Pawar {
366*c83a76b0SSuyog Pawar ps_mv = &ps_pu->mv.s_l1_mv;
367*c83a76b0SSuyog Pawar i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368*c83a76b0SSuyog Pawar }
369*c83a76b0SSuyog Pawar else
370*c83a76b0SSuyog Pawar {
371*c83a76b0SSuyog Pawar ps_mv = &ps_pu->mv.s_l0_mv;
372*c83a76b0SSuyog Pawar i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373*c83a76b0SSuyog Pawar }
374*c83a76b0SSuyog Pawar
375*c83a76b0SSuyog Pawar if(-1 == i1_ref_idx)
376*c83a76b0SSuyog Pawar {
377*c83a76b0SSuyog Pawar continue;
378*c83a76b0SSuyog Pawar }
379*c83a76b0SSuyog Pawar
380*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383*c83a76b0SSuyog Pawar
384*c83a76b0SSuyog Pawar CLIP_MV_WITHIN_RANGE(
385*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387*c83a76b0SSuyog Pawar pps_range_prms[i1_ref_idx],
388*c83a76b0SSuyog Pawar 0,
389*c83a76b0SSuyog Pawar 0,
390*c83a76b0SSuyog Pawar 0);
391*c83a76b0SSuyog Pawar
392*c83a76b0SSuyog Pawar for(k = 0; k < i4_num_cands_added; k++)
393*c83a76b0SSuyog Pawar {
394*c83a76b0SSuyog Pawar if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395*c83a76b0SSuyog Pawar (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396*c83a76b0SSuyog Pawar (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397*c83a76b0SSuyog Pawar {
398*c83a76b0SSuyog Pawar break;
399*c83a76b0SSuyog Pawar }
400*c83a76b0SSuyog Pawar }
401*c83a76b0SSuyog Pawar
402*c83a76b0SSuyog Pawar if(k == i4_num_cands_added)
403*c83a76b0SSuyog Pawar {
404*c83a76b0SSuyog Pawar i4_num_cands_added++;
405*c83a76b0SSuyog Pawar
406*c83a76b0SSuyog Pawar if(i4_max_cands_to_add <= i4_num_cands_added)
407*c83a76b0SSuyog Pawar {
408*c83a76b0SSuyog Pawar return i4_num_cands_added;
409*c83a76b0SSuyog Pawar }
410*c83a76b0SSuyog Pawar }
411*c83a76b0SSuyog Pawar }
412*c83a76b0SSuyog Pawar }
413*c83a76b0SSuyog Pawar else
414*c83a76b0SSuyog Pawar {
415*c83a76b0SSuyog Pawar for(j = 0; j < 4; j++)
416*c83a76b0SSuyog Pawar {
417*c83a76b0SSuyog Pawar mv_t *ps_mv;
418*c83a76b0SSuyog Pawar
419*c83a76b0SSuyog Pawar S08 i1_ref_idx;
420*c83a76b0SSuyog Pawar
421*c83a76b0SSuyog Pawar part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422*c83a76b0SSuyog Pawar pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423*c83a76b0SSuyog Pawar
424*c83a76b0SSuyog Pawar ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425*c83a76b0SSuyog Pawar
426*c83a76b0SSuyog Pawar if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427*c83a76b0SSuyog Pawar {
428*c83a76b0SSuyog Pawar continue;
429*c83a76b0SSuyog Pawar }
430*c83a76b0SSuyog Pawar
431*c83a76b0SSuyog Pawar if(ps_data->as_pu_results->pu.b1_intra_flag)
432*c83a76b0SSuyog Pawar {
433*c83a76b0SSuyog Pawar continue;
434*c83a76b0SSuyog Pawar }
435*c83a76b0SSuyog Pawar
436*c83a76b0SSuyog Pawar if(u1_pred_dir !=
437*c83a76b0SSuyog Pawar ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438*c83a76b0SSuyog Pawar {
439*c83a76b0SSuyog Pawar continue;
440*c83a76b0SSuyog Pawar }
441*c83a76b0SSuyog Pawar
442*c83a76b0SSuyog Pawar if(u1_pred_dir)
443*c83a76b0SSuyog Pawar {
444*c83a76b0SSuyog Pawar ps_mv = &ps_pu->mv.s_l1_mv;
445*c83a76b0SSuyog Pawar i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446*c83a76b0SSuyog Pawar }
447*c83a76b0SSuyog Pawar else
448*c83a76b0SSuyog Pawar {
449*c83a76b0SSuyog Pawar ps_mv = &ps_pu->mv.s_l0_mv;
450*c83a76b0SSuyog Pawar i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451*c83a76b0SSuyog Pawar }
452*c83a76b0SSuyog Pawar
453*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456*c83a76b0SSuyog Pawar
457*c83a76b0SSuyog Pawar CLIP_MV_WITHIN_RANGE(
458*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459*c83a76b0SSuyog Pawar ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460*c83a76b0SSuyog Pawar pps_range_prms[i1_ref_idx],
461*c83a76b0SSuyog Pawar 0,
462*c83a76b0SSuyog Pawar 0,
463*c83a76b0SSuyog Pawar 0);
464*c83a76b0SSuyog Pawar
465*c83a76b0SSuyog Pawar for(k = 0; k < i4_num_cands_added; k++)
466*c83a76b0SSuyog Pawar {
467*c83a76b0SSuyog Pawar if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468*c83a76b0SSuyog Pawar (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469*c83a76b0SSuyog Pawar (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470*c83a76b0SSuyog Pawar {
471*c83a76b0SSuyog Pawar break;
472*c83a76b0SSuyog Pawar }
473*c83a76b0SSuyog Pawar }
474*c83a76b0SSuyog Pawar
475*c83a76b0SSuyog Pawar if(k == i4_num_cands_added)
476*c83a76b0SSuyog Pawar {
477*c83a76b0SSuyog Pawar i4_num_cands_added++;
478*c83a76b0SSuyog Pawar
479*c83a76b0SSuyog Pawar if(i4_max_cands_to_add <= i4_num_cands_added)
480*c83a76b0SSuyog Pawar {
481*c83a76b0SSuyog Pawar return i4_num_cands_added;
482*c83a76b0SSuyog Pawar }
483*c83a76b0SSuyog Pawar }
484*c83a76b0SSuyog Pawar }
485*c83a76b0SSuyog Pawar }
486*c83a76b0SSuyog Pawar }
487*c83a76b0SSuyog Pawar
488*c83a76b0SSuyog Pawar i4_result_id++;
489*c83a76b0SSuyog Pawar }
490*c83a76b0SSuyog Pawar
491*c83a76b0SSuyog Pawar return i4_num_cands_added;
492*c83a76b0SSuyog Pawar }
493*c83a76b0SSuyog Pawar
hme_add_cands_for_merge_eval(ctb_cluster_info_t * ps_cluster_info,search_results_t ** pps_child_data_array,inter_cu_results_t * ps_8x8cu_results,range_prms_t ** pps_range_prms,search_node_t * ps_merge_cand,U08 * pu1_refid_to_pred_dir_list,S08 * pi1_past_list,S08 * pi1_future_list,ME_QUALITY_PRESETS_T e_quality_preset,BLK_SIZE_T e_blk_size,U08 u1_pred_dir,U08 u1_blk_id)494*c83a76b0SSuyog Pawar WORD32 hme_add_cands_for_merge_eval(
495*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_cluster_info,
496*c83a76b0SSuyog Pawar search_results_t **pps_child_data_array,
497*c83a76b0SSuyog Pawar inter_cu_results_t *ps_8x8cu_results,
498*c83a76b0SSuyog Pawar range_prms_t **pps_range_prms,
499*c83a76b0SSuyog Pawar search_node_t *ps_merge_cand,
500*c83a76b0SSuyog Pawar U08 *pu1_refid_to_pred_dir_list,
501*c83a76b0SSuyog Pawar S08 *pi1_past_list,
502*c83a76b0SSuyog Pawar S08 *pi1_future_list,
503*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_quality_preset,
504*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size,
505*c83a76b0SSuyog Pawar U08 u1_pred_dir,
506*c83a76b0SSuyog Pawar U08 u1_blk_id)
507*c83a76b0SSuyog Pawar {
508*c83a76b0SSuyog Pawar WORD32 i4_num_cands_added = 0;
509*c83a76b0SSuyog Pawar
510*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY == e_quality_preset)
511*c83a76b0SSuyog Pawar {
512*c83a76b0SSuyog Pawar cluster_data_t *ps_cluster_primo;
513*c83a76b0SSuyog Pawar
514*c83a76b0SSuyog Pawar WORD32 i4_num_clusters;
515*c83a76b0SSuyog Pawar
516*c83a76b0SSuyog Pawar if(BLK_32x32 == e_blk_size)
517*c83a76b0SSuyog Pawar {
518*c83a76b0SSuyog Pawar ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519*c83a76b0SSuyog Pawar i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520*c83a76b0SSuyog Pawar }
521*c83a76b0SSuyog Pawar else
522*c83a76b0SSuyog Pawar {
523*c83a76b0SSuyog Pawar ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524*c83a76b0SSuyog Pawar i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525*c83a76b0SSuyog Pawar }
526*c83a76b0SSuyog Pawar
527*c83a76b0SSuyog Pawar i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528*c83a76b0SSuyog Pawar ps_cluster_primo,
529*c83a76b0SSuyog Pawar ps_merge_cand,
530*c83a76b0SSuyog Pawar pps_range_prms,
531*c83a76b0SSuyog Pawar pu1_refid_to_pred_dir_list,
532*c83a76b0SSuyog Pawar i4_num_clusters,
533*c83a76b0SSuyog Pawar u1_pred_dir);
534*c83a76b0SSuyog Pawar }
535*c83a76b0SSuyog Pawar
536*c83a76b0SSuyog Pawar i4_num_cands_added = hme_add_me_best_as_merge_cands(
537*c83a76b0SSuyog Pawar pps_child_data_array,
538*c83a76b0SSuyog Pawar ps_8x8cu_results,
539*c83a76b0SSuyog Pawar ps_merge_cand,
540*c83a76b0SSuyog Pawar pps_range_prms,
541*c83a76b0SSuyog Pawar pu1_refid_to_pred_dir_list,
542*c83a76b0SSuyog Pawar pi1_past_list,
543*c83a76b0SSuyog Pawar pi1_future_list,
544*c83a76b0SSuyog Pawar e_blk_size,
545*c83a76b0SSuyog Pawar e_quality_preset,
546*c83a76b0SSuyog Pawar i4_num_cands_added,
547*c83a76b0SSuyog Pawar u1_pred_dir);
548*c83a76b0SSuyog Pawar
549*c83a76b0SSuyog Pawar return i4_num_cands_added;
550*c83a76b0SSuyog Pawar }
551*c83a76b0SSuyog Pawar
552*c83a76b0SSuyog Pawar /**
553*c83a76b0SSuyog Pawar ********************************************************************************
554*c83a76b0SSuyog Pawar * @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555*c83a76b0SSuyog Pawar * S08 i1_ref_idx,
556*c83a76b0SSuyog Pawar * S32 i4_best_part_type,
557*c83a76b0SSuyog Pawar * S32 i4_is_vert)
558*c83a76b0SSuyog Pawar *
559*c83a76b0SSuyog Pawar * @brief Given a target partition orientation in the merged CU, and the
560*c83a76b0SSuyog Pawar * partition type of most likely partition this fxn picks up
561*c83a76b0SSuyog Pawar * candidates from the 4 constituent CUs and does refinement search
562*c83a76b0SSuyog Pawar * to identify best results for the merge CU across active partitions
563*c83a76b0SSuyog Pawar *
564*c83a76b0SSuyog Pawar * @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565*c83a76b0SSuyog Pawar * these params, the search result structure is also derived and
566*c83a76b0SSuyog Pawar * updated during the search
567*c83a76b0SSuyog Pawar *
568*c83a76b0SSuyog Pawar * @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569*c83a76b0SSuyog Pawar * Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570*c83a76b0SSuyog Pawar *
571*c83a76b0SSuyog Pawar * @param[in] i4_best_part_type : partition type of potential partition in the
572*c83a76b0SSuyog Pawar * merged CU, -1 if the merge process has not yet been able to
573*c83a76b0SSuyog Pawar * determine this.
574*c83a76b0SSuyog Pawar *
575*c83a76b0SSuyog Pawar * @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576*c83a76b0SSuyog Pawar * orientation or horizontal orientation.
577*c83a76b0SSuyog Pawar *
578*c83a76b0SSuyog Pawar * @return Number of merge candidates
579*c83a76b0SSuyog Pawar ********************************************************************************
580*c83a76b0SSuyog Pawar */
hme_pick_eval_merge_candts(hme_merge_prms_t * ps_merge_prms,hme_subpel_prms_t * ps_subpel_prms,S32 i4_search_idx,S32 i4_best_part_type,S32 i4_is_vert,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_frm_qstep,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)581*c83a76b0SSuyog Pawar WORD32 hme_pick_eval_merge_candts(
582*c83a76b0SSuyog Pawar hme_merge_prms_t *ps_merge_prms,
583*c83a76b0SSuyog Pawar hme_subpel_prms_t *ps_subpel_prms,
584*c83a76b0SSuyog Pawar S32 i4_search_idx,
585*c83a76b0SSuyog Pawar S32 i4_best_part_type,
586*c83a76b0SSuyog Pawar S32 i4_is_vert,
587*c83a76b0SSuyog Pawar wgt_pred_ctxt_t *ps_wt_inp_prms,
588*c83a76b0SSuyog Pawar S32 i4_frm_qstep,
589*c83a76b0SSuyog Pawar ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590*c83a76b0SSuyog Pawar ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591*c83a76b0SSuyog Pawar {
592*c83a76b0SSuyog Pawar S32 x_off, y_off;
593*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
594*c83a76b0SSuyog Pawar S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595*c83a76b0SSuyog Pawar S32 i4_num_valid_parts;
596*c83a76b0SSuyog Pawar pred_ctxt_t *ps_pred_ctxt;
597*c83a76b0SSuyog Pawar
598*c83a76b0SSuyog Pawar search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599*c83a76b0SSuyog Pawar S32 num_unique_nodes_cu_merge = 0;
600*c83a76b0SSuyog Pawar
601*c83a76b0SSuyog Pawar search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602*c83a76b0SSuyog Pawar CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603*c83a76b0SSuyog Pawar S32 i4_part_mask = ps_search_results->i4_part_mask;
604*c83a76b0SSuyog Pawar
605*c83a76b0SSuyog Pawar search_results_t *aps_child_results[4];
606*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607*c83a76b0SSuyog Pawar
608*c83a76b0SSuyog Pawar S32 i4_ref_stride, i, j;
609*c83a76b0SSuyog Pawar result_upd_prms_t s_result_prms;
610*c83a76b0SSuyog Pawar
611*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612*c83a76b0SSuyog Pawar S32 i4_offset;
613*c83a76b0SSuyog Pawar
614*c83a76b0SSuyog Pawar /*************************************************************************/
615*c83a76b0SSuyog Pawar /* Function pointer for SAD/SATD, array and prms structure to pass to */
616*c83a76b0SSuyog Pawar /* This function */
617*c83a76b0SSuyog Pawar /*************************************************************************/
618*c83a76b0SSuyog Pawar PF_SAD_FXN_T pf_err_compute;
619*c83a76b0SSuyog Pawar S32 ai4_sad_grid[9][17];
620*c83a76b0SSuyog Pawar err_prms_t s_err_prms;
621*c83a76b0SSuyog Pawar
622*c83a76b0SSuyog Pawar /*************************************************************************/
623*c83a76b0SSuyog Pawar /* Allowed MV RANGE */
624*c83a76b0SSuyog Pawar /*************************************************************************/
625*c83a76b0SSuyog Pawar range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626*c83a76b0SSuyog Pawar PF_INTERP_FXN_T pf_qpel_interp;
627*c83a76b0SSuyog Pawar PF_MV_COST_FXN pf_mv_cost_compute;
628*c83a76b0SSuyog Pawar WORD32 pred_lx;
629*c83a76b0SSuyog Pawar U08 *apu1_hpel_ref[4];
630*c83a76b0SSuyog Pawar
631*c83a76b0SSuyog Pawar interp_prms_t s_interp_prms;
632*c83a76b0SSuyog Pawar S32 i4_interp_buf_id;
633*c83a76b0SSuyog Pawar
634*c83a76b0SSuyog Pawar S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635*c83a76b0SSuyog Pawar S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636*c83a76b0SSuyog Pawar
637*c83a76b0SSuyog Pawar /* Sanity checks */
638*c83a76b0SSuyog Pawar ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639*c83a76b0SSuyog Pawar
640*c83a76b0SSuyog Pawar s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641*c83a76b0SSuyog Pawar
642*c83a76b0SSuyog Pawar /* Initialize all the ptrs to child CUs for merge decision */
643*c83a76b0SSuyog Pawar aps_child_results[0] = ps_merge_prms->ps_results_tl;
644*c83a76b0SSuyog Pawar aps_child_results[1] = ps_merge_prms->ps_results_tr;
645*c83a76b0SSuyog Pawar aps_child_results[2] = ps_merge_prms->ps_results_bl;
646*c83a76b0SSuyog Pawar aps_child_results[3] = ps_merge_prms->ps_results_br;
647*c83a76b0SSuyog Pawar
648*c83a76b0SSuyog Pawar num_unique_nodes_cu_merge = 0;
649*c83a76b0SSuyog Pawar
650*c83a76b0SSuyog Pawar pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651*c83a76b0SSuyog Pawar
652*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653*c83a76b0SSuyog Pawar {
654*c83a76b0SSuyog Pawar num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655*c83a76b0SSuyog Pawar ps_merge_prms->ps_cluster_info,
656*c83a76b0SSuyog Pawar aps_child_results,
657*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results,
658*c83a76b0SSuyog Pawar pps_range_prms,
659*c83a76b0SSuyog Pawar as_merge_unique_node,
660*c83a76b0SSuyog Pawar ps_search_results->pu1_is_past,
661*c83a76b0SSuyog Pawar ps_merge_prms->pi1_past_list,
662*c83a76b0SSuyog Pawar ps_merge_prms->pi1_future_list,
663*c83a76b0SSuyog Pawar ps_merge_prms->e_quality_preset,
664*c83a76b0SSuyog Pawar e_blk_size,
665*c83a76b0SSuyog Pawar i4_search_idx,
666*c83a76b0SSuyog Pawar (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667*c83a76b0SSuyog Pawar (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668*c83a76b0SSuyog Pawar }
669*c83a76b0SSuyog Pawar else
670*c83a76b0SSuyog Pawar {
671*c83a76b0SSuyog Pawar /*************************************************************************/
672*c83a76b0SSuyog Pawar /* Populate the list of unique search nodes in the child CUs for merge */
673*c83a76b0SSuyog Pawar /* evaluation */
674*c83a76b0SSuyog Pawar /*************************************************************************/
675*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
676*c83a76b0SSuyog Pawar {
677*c83a76b0SSuyog Pawar search_node_t s_search_node;
678*c83a76b0SSuyog Pawar
679*c83a76b0SSuyog Pawar PART_TYPE_T e_part_type;
680*c83a76b0SSuyog Pawar PART_ID_T e_part_id;
681*c83a76b0SSuyog Pawar
682*c83a76b0SSuyog Pawar WORD32 part_num;
683*c83a76b0SSuyog Pawar
684*c83a76b0SSuyog Pawar search_results_t *ps_child = aps_child_results[i];
685*c83a76b0SSuyog Pawar
686*c83a76b0SSuyog Pawar if(ps_child->ps_cu_results->u1_num_best_results)
687*c83a76b0SSuyog Pawar {
688*c83a76b0SSuyog Pawar if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689*c83a76b0SSuyog Pawar (1 == ps_child->ps_cu_results->u1_num_best_results)))
690*c83a76b0SSuyog Pawar {
691*c83a76b0SSuyog Pawar e_part_type =
692*c83a76b0SSuyog Pawar (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693*c83a76b0SSuyog Pawar
694*c83a76b0SSuyog Pawar ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695*c83a76b0SSuyog Pawar
696*c83a76b0SSuyog Pawar /* Insert mvs of NxN partitions. */
697*c83a76b0SSuyog Pawar for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698*c83a76b0SSuyog Pawar part_num++)
699*c83a76b0SSuyog Pawar {
700*c83a76b0SSuyog Pawar e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701*c83a76b0SSuyog Pawar
702*c83a76b0SSuyog Pawar if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703*c83a76b0SSuyog Pawar {
704*c83a76b0SSuyog Pawar s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705*c83a76b0SSuyog Pawar if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706*c83a76b0SSuyog Pawar {
707*c83a76b0SSuyog Pawar CLIP_MV_WITHIN_RANGE(
708*c83a76b0SSuyog Pawar s_search_node.s_mv.i2_mvx,
709*c83a76b0SSuyog Pawar s_search_node.s_mv.i2_mvy,
710*c83a76b0SSuyog Pawar pps_range_prms[s_search_node.i1_ref_idx],
711*c83a76b0SSuyog Pawar 0,
712*c83a76b0SSuyog Pawar 0,
713*c83a76b0SSuyog Pawar 0);
714*c83a76b0SSuyog Pawar
715*c83a76b0SSuyog Pawar INSERT_NEW_NODE_NOMAP(
716*c83a76b0SSuyog Pawar as_merge_unique_node,
717*c83a76b0SSuyog Pawar num_unique_nodes_cu_merge,
718*c83a76b0SSuyog Pawar s_search_node,
719*c83a76b0SSuyog Pawar 1);
720*c83a76b0SSuyog Pawar }
721*c83a76b0SSuyog Pawar }
722*c83a76b0SSuyog Pawar }
723*c83a76b0SSuyog Pawar }
724*c83a76b0SSuyog Pawar }
725*c83a76b0SSuyog Pawar else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726*c83a76b0SSuyog Pawar .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727*c83a76b0SSuyog Pawar (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728*c83a76b0SSuyog Pawar .ps_cu_results->u1_num_best_results)))
729*c83a76b0SSuyog Pawar {
730*c83a76b0SSuyog Pawar search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731*c83a76b0SSuyog Pawar
732*c83a76b0SSuyog Pawar for(j = 0; j < 4; j++)
733*c83a76b0SSuyog Pawar {
734*c83a76b0SSuyog Pawar e_part_type = (PART_TYPE_T)ps_results_root[j]
735*c83a76b0SSuyog Pawar .ps_cu_results->ps_best_results[0]
736*c83a76b0SSuyog Pawar .u1_part_type;
737*c83a76b0SSuyog Pawar
738*c83a76b0SSuyog Pawar ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739*c83a76b0SSuyog Pawar
740*c83a76b0SSuyog Pawar /* Insert mvs of NxN partitions. */
741*c83a76b0SSuyog Pawar for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742*c83a76b0SSuyog Pawar part_num++)
743*c83a76b0SSuyog Pawar {
744*c83a76b0SSuyog Pawar e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745*c83a76b0SSuyog Pawar
746*c83a76b0SSuyog Pawar if((ps_results_root[j]
747*c83a76b0SSuyog Pawar .aps_part_results[i4_search_idx][e_part_id]
748*c83a76b0SSuyog Pawar ->i1_ref_idx != -1) &&
749*c83a76b0SSuyog Pawar (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750*c83a76b0SSuyog Pawar .b1_intra_flag))
751*c83a76b0SSuyog Pawar {
752*c83a76b0SSuyog Pawar s_search_node =
753*c83a76b0SSuyog Pawar *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754*c83a76b0SSuyog Pawar if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755*c83a76b0SSuyog Pawar {
756*c83a76b0SSuyog Pawar CLIP_MV_WITHIN_RANGE(
757*c83a76b0SSuyog Pawar s_search_node.s_mv.i2_mvx,
758*c83a76b0SSuyog Pawar s_search_node.s_mv.i2_mvy,
759*c83a76b0SSuyog Pawar pps_range_prms[s_search_node.i1_ref_idx],
760*c83a76b0SSuyog Pawar 0,
761*c83a76b0SSuyog Pawar 0,
762*c83a76b0SSuyog Pawar 0);
763*c83a76b0SSuyog Pawar
764*c83a76b0SSuyog Pawar INSERT_NEW_NODE_NOMAP(
765*c83a76b0SSuyog Pawar as_merge_unique_node,
766*c83a76b0SSuyog Pawar num_unique_nodes_cu_merge,
767*c83a76b0SSuyog Pawar s_search_node,
768*c83a76b0SSuyog Pawar 1);
769*c83a76b0SSuyog Pawar }
770*c83a76b0SSuyog Pawar }
771*c83a76b0SSuyog Pawar }
772*c83a76b0SSuyog Pawar }
773*c83a76b0SSuyog Pawar }
774*c83a76b0SSuyog Pawar }
775*c83a76b0SSuyog Pawar }
776*c83a76b0SSuyog Pawar
777*c83a76b0SSuyog Pawar if(0 == num_unique_nodes_cu_merge)
778*c83a76b0SSuyog Pawar {
779*c83a76b0SSuyog Pawar return 0;
780*c83a76b0SSuyog Pawar }
781*c83a76b0SSuyog Pawar
782*c83a76b0SSuyog Pawar /*************************************************************************/
783*c83a76b0SSuyog Pawar /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784*c83a76b0SSuyog Pawar /* fixed through this subpel refinement for this partition. */
785*c83a76b0SSuyog Pawar /* Note, we do not enable grid sads since one pt is evaluated per node */
786*c83a76b0SSuyog Pawar /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */
787*c83a76b0SSuyog Pawar /*************************************************************************/
788*c83a76b0SSuyog Pawar i4_part_mask = ps_search_results->i4_part_mask;
789*c83a76b0SSuyog Pawar
790*c83a76b0SSuyog Pawar /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791*c83a76b0SSuyog Pawar if(ps_subpel_prms->i4_use_satd)
792*c83a76b0SSuyog Pawar {
793*c83a76b0SSuyog Pawar if(BLK_32x32 == e_blk_size)
794*c83a76b0SSuyog Pawar {
795*c83a76b0SSuyog Pawar pf_err_compute = hme_evalsatd_pt_pu_32x32;
796*c83a76b0SSuyog Pawar }
797*c83a76b0SSuyog Pawar else
798*c83a76b0SSuyog Pawar {
799*c83a76b0SSuyog Pawar pf_err_compute = hme_evalsatd_pt_pu_64x64;
800*c83a76b0SSuyog Pawar }
801*c83a76b0SSuyog Pawar }
802*c83a76b0SSuyog Pawar else
803*c83a76b0SSuyog Pawar {
804*c83a76b0SSuyog Pawar pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805*c83a76b0SSuyog Pawar }
806*c83a76b0SSuyog Pawar
807*c83a76b0SSuyog Pawar i4_ref_stride = ps_curr_layer->i4_rec_stride;
808*c83a76b0SSuyog Pawar
809*c83a76b0SSuyog Pawar x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810*c83a76b0SSuyog Pawar y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811*c83a76b0SSuyog Pawar i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812*c83a76b0SSuyog Pawar
813*c83a76b0SSuyog Pawar /*************************************************************************/
814*c83a76b0SSuyog Pawar /* This array stores the ids of the partitions whose */
815*c83a76b0SSuyog Pawar /* SADs are updated. Since the partitions whose SADs are updated may not */
816*c83a76b0SSuyog Pawar /* be in contiguous order, we supply another level of indirection. */
817*c83a76b0SSuyog Pawar /*************************************************************************/
818*c83a76b0SSuyog Pawar i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819*c83a76b0SSuyog Pawar
820*c83a76b0SSuyog Pawar /* Initialize result params used for partition update */
821*c83a76b0SSuyog Pawar s_result_prms.pf_mv_cost_compute = NULL;
822*c83a76b0SSuyog Pawar s_result_prms.ps_search_results = ps_search_results;
823*c83a76b0SSuyog Pawar s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824*c83a76b0SSuyog Pawar s_result_prms.i1_ref_idx = i4_search_idx;
825*c83a76b0SSuyog Pawar s_result_prms.i4_part_mask = i4_part_mask;
826*c83a76b0SSuyog Pawar s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827*c83a76b0SSuyog Pawar s_result_prms.i4_grid_mask = 1;
828*c83a76b0SSuyog Pawar
829*c83a76b0SSuyog Pawar /* One time Initialization of error params used for SAD/SATD compute */
830*c83a76b0SSuyog Pawar s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831*c83a76b0SSuyog Pawar s_err_prms.i4_ref_stride = i4_ref_stride;
832*c83a76b0SSuyog Pawar s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833*c83a76b0SSuyog Pawar s_err_prms.i4_grid_mask = 1;
834*c83a76b0SSuyog Pawar s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835*c83a76b0SSuyog Pawar s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836*c83a76b0SSuyog Pawar s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837*c83a76b0SSuyog Pawar s_err_prms.i4_step = 1;
838*c83a76b0SSuyog Pawar
839*c83a76b0SSuyog Pawar /*************************************************************************/
840*c83a76b0SSuyog Pawar /* One time preparation of non changing interpolation params. */
841*c83a76b0SSuyog Pawar /*************************************************************************/
842*c83a76b0SSuyog Pawar s_interp_prms.i4_ref_stride = i4_ref_stride;
843*c83a76b0SSuyog Pawar s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844*c83a76b0SSuyog Pawar s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845*c83a76b0SSuyog Pawar s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846*c83a76b0SSuyog Pawar s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847*c83a76b0SSuyog Pawar i4_interp_buf_id = 0;
848*c83a76b0SSuyog Pawar
849*c83a76b0SSuyog Pawar pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850*c83a76b0SSuyog Pawar
851*c83a76b0SSuyog Pawar /***************************************************************************/
852*c83a76b0SSuyog Pawar /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853*c83a76b0SSuyog Pawar /* results */
854*c83a76b0SSuyog Pawar /***************************************************************************/
855*c83a76b0SSuyog Pawar for(i = 0; i < num_unique_nodes_cu_merge; i++)
856*c83a76b0SSuyog Pawar {
857*c83a76b0SSuyog Pawar WORD8 i1_ref_idx;
858*c83a76b0SSuyog Pawar ps_search_node = &as_merge_unique_node[i];
859*c83a76b0SSuyog Pawar
860*c83a76b0SSuyog Pawar /*********************************************************************/
861*c83a76b0SSuyog Pawar /* Compute the base pointer for input, interpolated buffers */
862*c83a76b0SSuyog Pawar /* The base pointers point as follows: */
863*c83a76b0SSuyog Pawar /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864*c83a76b0SSuyog Pawar /* To these, we need to add the offset of the current node */
865*c83a76b0SSuyog Pawar /*********************************************************************/
866*c83a76b0SSuyog Pawar i1_ref_idx = ps_search_node->i1_ref_idx;
867*c83a76b0SSuyog Pawar apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868*c83a76b0SSuyog Pawar apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869*c83a76b0SSuyog Pawar apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870*c83a76b0SSuyog Pawar apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871*c83a76b0SSuyog Pawar
872*c83a76b0SSuyog Pawar s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873*c83a76b0SSuyog Pawar
874*c83a76b0SSuyog Pawar pf_qpel_interp(
875*c83a76b0SSuyog Pawar &s_interp_prms,
876*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx,
877*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy,
878*c83a76b0SSuyog Pawar i4_interp_buf_id);
879*c83a76b0SSuyog Pawar
880*c83a76b0SSuyog Pawar pred_lx = i4_search_idx;
881*c83a76b0SSuyog Pawar ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882*c83a76b0SSuyog Pawar
883*c83a76b0SSuyog Pawar s_result_prms.u1_pred_lx = pred_lx;
884*c83a76b0SSuyog Pawar s_result_prms.ps_search_node_base = ps_search_node;
885*c83a76b0SSuyog Pawar s_err_prms.pu1_inp =
886*c83a76b0SSuyog Pawar ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887*c83a76b0SSuyog Pawar s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888*c83a76b0SSuyog Pawar s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889*c83a76b0SSuyog Pawar
890*c83a76b0SSuyog Pawar /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891*c83a76b0SSuyog Pawar Here the tu recursion logic is restricted with the size of the PU*/
892*c83a76b0SSuyog Pawar pf_err_compute(&s_err_prms);
893*c83a76b0SSuyog Pawar
894*c83a76b0SSuyog Pawar if(ps_subpel_prms->u1_is_cu_noisy &&
895*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896*c83a76b0SSuyog Pawar {
897*c83a76b0SSuyog Pawar ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898*c83a76b0SSuyog Pawar s_err_prms.pu1_ref,
899*c83a76b0SSuyog Pawar s_err_prms.i4_ref_stride,
900*c83a76b0SSuyog Pawar ai4_valid_part_ids,
901*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903*c83a76b0SSuyog Pawar s_err_prms.pi4_sad_grid,
904*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905*c83a76b0SSuyog Pawar ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906*c83a76b0SSuyog Pawar ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907*c83a76b0SSuyog Pawar i4_num_valid_parts,
908*c83a76b0SSuyog Pawar ps_wt_inp_prms->wpred_log_wdc,
909*c83a76b0SSuyog Pawar (BLK_32x32 == e_blk_size) ? 32 : 64);
910*c83a76b0SSuyog Pawar }
911*c83a76b0SSuyog Pawar
912*c83a76b0SSuyog Pawar /* Update the mv's */
913*c83a76b0SSuyog Pawar s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914*c83a76b0SSuyog Pawar s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915*c83a76b0SSuyog Pawar
916*c83a76b0SSuyog Pawar /* Update best results */
917*c83a76b0SSuyog Pawar hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918*c83a76b0SSuyog Pawar }
919*c83a76b0SSuyog Pawar
920*c83a76b0SSuyog Pawar /************************************************************************/
921*c83a76b0SSuyog Pawar /* Update mv cost and total cost for each valid partition in the CU */
922*c83a76b0SSuyog Pawar /************************************************************************/
923*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
924*c83a76b0SSuyog Pawar {
925*c83a76b0SSuyog Pawar if(i4_part_mask & (1 << i))
926*c83a76b0SSuyog Pawar {
927*c83a76b0SSuyog Pawar WORD32 j;
928*c83a76b0SSuyog Pawar WORD32 i4_mv_cost;
929*c83a76b0SSuyog Pawar
930*c83a76b0SSuyog Pawar ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931*c83a76b0SSuyog Pawar
932*c83a76b0SSuyog Pawar for(j = 0;
933*c83a76b0SSuyog Pawar j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934*c83a76b0SSuyog Pawar j++)
935*c83a76b0SSuyog Pawar {
936*c83a76b0SSuyog Pawar if(ps_search_node->i1_ref_idx != -1)
937*c83a76b0SSuyog Pawar {
938*c83a76b0SSuyog Pawar pred_lx = i4_search_idx;
939*c83a76b0SSuyog Pawar ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940*c83a76b0SSuyog Pawar
941*c83a76b0SSuyog Pawar /* Prediction context should now deal with qpel units */
942*c83a76b0SSuyog Pawar HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943*c83a76b0SSuyog Pawar
944*c83a76b0SSuyog Pawar ps_search_node->u1_subpel_done = 1;
945*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 1;
946*c83a76b0SSuyog Pawar
947*c83a76b0SSuyog Pawar i4_mv_cost =
948*c83a76b0SSuyog Pawar pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949*c83a76b0SSuyog Pawar
950*c83a76b0SSuyog Pawar ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951*c83a76b0SSuyog Pawar ps_search_node->i4_mv_cost = i4_mv_cost;
952*c83a76b0SSuyog Pawar
953*c83a76b0SSuyog Pawar ps_search_node++;
954*c83a76b0SSuyog Pawar }
955*c83a76b0SSuyog Pawar }
956*c83a76b0SSuyog Pawar }
957*c83a76b0SSuyog Pawar }
958*c83a76b0SSuyog Pawar
959*c83a76b0SSuyog Pawar return num_unique_nodes_cu_merge;
960*c83a76b0SSuyog Pawar }
961*c83a76b0SSuyog Pawar
962*c83a76b0SSuyog Pawar #define CU_MERGE_MAX_INTRA_PARTS 4
963*c83a76b0SSuyog Pawar
964*c83a76b0SSuyog Pawar /**
965*c83a76b0SSuyog Pawar ********************************************************************************
966*c83a76b0SSuyog Pawar * @fn hme_try_merge_high_speed
967*c83a76b0SSuyog Pawar *
968*c83a76b0SSuyog Pawar * @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969*c83a76b0SSuyog Pawar entity or with partititons for high speed preset
970*c83a76b0SSuyog Pawar *
971*c83a76b0SSuyog Pawar * @param[in,out] hme_merge_prms_t: Params for CU merge
972*c83a76b0SSuyog Pawar *
973*c83a76b0SSuyog Pawar * @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974*c83a76b0SSuyog Pawar ********************************************************************************
975*c83a76b0SSuyog Pawar */
hme_try_merge_high_speed(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,hme_subpel_prms_t * ps_subpel_prms,hme_merge_prms_t * ps_merge_prms,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result)976*c83a76b0SSuyog Pawar CU_MERGE_RESULT_T hme_try_merge_high_speed(
977*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt,
978*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt,
979*c83a76b0SSuyog Pawar ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980*c83a76b0SSuyog Pawar hme_subpel_prms_t *ps_subpel_prms,
981*c83a76b0SSuyog Pawar hme_merge_prms_t *ps_merge_prms,
982*c83a76b0SSuyog Pawar inter_pu_results_t *ps_pu_results,
983*c83a76b0SSuyog Pawar pu_result_t *ps_pu_result)
984*c83a76b0SSuyog Pawar {
985*c83a76b0SSuyog Pawar search_results_t *ps_results_tl, *ps_results_tr;
986*c83a76b0SSuyog Pawar search_results_t *ps_results_bl, *ps_results_br;
987*c83a76b0SSuyog Pawar
988*c83a76b0SSuyog Pawar S32 i;
989*c83a76b0SSuyog Pawar S32 i4_search_idx;
990*c83a76b0SSuyog Pawar S32 i4_cost_parent;
991*c83a76b0SSuyog Pawar S32 intra_cu_size;
992*c83a76b0SSuyog Pawar ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993*c83a76b0SSuyog Pawar
994*c83a76b0SSuyog Pawar search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995*c83a76b0SSuyog Pawar wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996*c83a76b0SSuyog Pawar
997*c83a76b0SSuyog Pawar S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998*c83a76b0SSuyog Pawar S32 is_vert = 0, i4_best_part_type = -1;
999*c83a76b0SSuyog Pawar S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000*c83a76b0SSuyog Pawar S32 i4_cost_children = 0;
1001*c83a76b0SSuyog Pawar S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002*c83a76b0SSuyog Pawar S32 i4_num_merge_cands_evaluated = 0;
1003*c83a76b0SSuyog Pawar U08 u1_x_off = ps_results_merge->u1_x_off;
1004*c83a76b0SSuyog Pawar U08 u1_y_off = ps_results_merge->u1_y_off;
1005*c83a76b0SSuyog Pawar S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006*c83a76b0SSuyog Pawar
1007*c83a76b0SSuyog Pawar ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008*c83a76b0SSuyog Pawar ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009*c83a76b0SSuyog Pawar ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010*c83a76b0SSuyog Pawar ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011*c83a76b0SSuyog Pawar ps_results_tl = ps_merge_prms->ps_results_tl;
1012*c83a76b0SSuyog Pawar ps_results_tr = ps_merge_prms->ps_results_tr;
1013*c83a76b0SSuyog Pawar ps_results_bl = ps_merge_prms->ps_results_bl;
1014*c83a76b0SSuyog Pawar ps_results_br = ps_merge_prms->ps_results_br;
1015*c83a76b0SSuyog Pawar
1016*c83a76b0SSuyog Pawar if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017*c83a76b0SSuyog Pawar {
1018*c83a76b0SSuyog Pawar i4_part_mask &= ~ENABLE_AMP;
1019*c83a76b0SSuyog Pawar }
1020*c83a76b0SSuyog Pawar
1021*c83a76b0SSuyog Pawar if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022*c83a76b0SSuyog Pawar {
1023*c83a76b0SSuyog Pawar i4_part_mask &= ~ENABLE_AMP;
1024*c83a76b0SSuyog Pawar
1025*c83a76b0SSuyog Pawar i4_part_mask &= ~ENABLE_SMP;
1026*c83a76b0SSuyog Pawar }
1027*c83a76b0SSuyog Pawar
1028*c83a76b0SSuyog Pawar ps_merge_prms->i4_num_pred_dir_actual = 0;
1029*c83a76b0SSuyog Pawar
1030*c83a76b0SSuyog Pawar /*************************************************************************/
1031*c83a76b0SSuyog Pawar /* The logic for High speed CU merge goes as follows: */
1032*c83a76b0SSuyog Pawar /* */
1033*c83a76b0SSuyog Pawar /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034*c83a76b0SSuyog Pawar /* exceed 7 */
1035*c83a76b0SSuyog Pawar /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036*c83a76b0SSuyog Pawar /* are identical */
1037*c83a76b0SSuyog Pawar /* 3. Find the all unique mvs of best partitions of children CUs and */
1038*c83a76b0SSuyog Pawar /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */
1039*c83a76b0SSuyog Pawar /* best parent cost is lower than sum of the best children costs */
1040*c83a76b0SSuyog Pawar /* return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041*c83a76b0SSuyog Pawar /* */
1042*c83a76b0SSuyog Pawar /*************************************************************************/
1043*c83a76b0SSuyog Pawar
1044*c83a76b0SSuyog Pawar /* Count the number of best partitions in child CUs, early exit if > 7 */
1045*c83a76b0SSuyog Pawar if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046*c83a76b0SSuyog Pawar (CU_32x32 == ps_results_merge->e_cu_size))
1047*c83a76b0SSuyog Pawar {
1048*c83a76b0SSuyog Pawar S32 num_parts_in_32x32 = 0;
1049*c83a76b0SSuyog Pawar WORD32 i4_part_type;
1050*c83a76b0SSuyog Pawar
1051*c83a76b0SSuyog Pawar if(ps_results_tl->u1_split_flag)
1052*c83a76b0SSuyog Pawar {
1053*c83a76b0SSuyog Pawar num_parts_in_32x32 += 4;
1054*c83a76b0SSuyog Pawar
1055*c83a76b0SSuyog Pawar #define COST_INTERCHANGE 0
1056*c83a76b0SSuyog Pawar i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060*c83a76b0SSuyog Pawar }
1061*c83a76b0SSuyog Pawar else
1062*c83a76b0SSuyog Pawar {
1063*c83a76b0SSuyog Pawar i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064*c83a76b0SSuyog Pawar num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065*c83a76b0SSuyog Pawar i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066*c83a76b0SSuyog Pawar }
1067*c83a76b0SSuyog Pawar
1068*c83a76b0SSuyog Pawar if(ps_results_tr->u1_split_flag)
1069*c83a76b0SSuyog Pawar {
1070*c83a76b0SSuyog Pawar num_parts_in_32x32 += 4;
1071*c83a76b0SSuyog Pawar
1072*c83a76b0SSuyog Pawar i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076*c83a76b0SSuyog Pawar }
1077*c83a76b0SSuyog Pawar else
1078*c83a76b0SSuyog Pawar {
1079*c83a76b0SSuyog Pawar i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080*c83a76b0SSuyog Pawar num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081*c83a76b0SSuyog Pawar i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082*c83a76b0SSuyog Pawar }
1083*c83a76b0SSuyog Pawar
1084*c83a76b0SSuyog Pawar if(ps_results_bl->u1_split_flag)
1085*c83a76b0SSuyog Pawar {
1086*c83a76b0SSuyog Pawar num_parts_in_32x32 += 4;
1087*c83a76b0SSuyog Pawar
1088*c83a76b0SSuyog Pawar i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092*c83a76b0SSuyog Pawar }
1093*c83a76b0SSuyog Pawar else
1094*c83a76b0SSuyog Pawar {
1095*c83a76b0SSuyog Pawar i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096*c83a76b0SSuyog Pawar num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097*c83a76b0SSuyog Pawar i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098*c83a76b0SSuyog Pawar }
1099*c83a76b0SSuyog Pawar
1100*c83a76b0SSuyog Pawar if(ps_results_br->u1_split_flag)
1101*c83a76b0SSuyog Pawar {
1102*c83a76b0SSuyog Pawar num_parts_in_32x32 += 4;
1103*c83a76b0SSuyog Pawar
1104*c83a76b0SSuyog Pawar i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107*c83a76b0SSuyog Pawar ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108*c83a76b0SSuyog Pawar }
1109*c83a76b0SSuyog Pawar else
1110*c83a76b0SSuyog Pawar {
1111*c83a76b0SSuyog Pawar i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112*c83a76b0SSuyog Pawar num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113*c83a76b0SSuyog Pawar i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114*c83a76b0SSuyog Pawar }
1115*c83a76b0SSuyog Pawar
1116*c83a76b0SSuyog Pawar if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117*c83a76b0SSuyog Pawar {
1118*c83a76b0SSuyog Pawar return CU_SPLIT;
1119*c83a76b0SSuyog Pawar }
1120*c83a76b0SSuyog Pawar
1121*c83a76b0SSuyog Pawar if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122*c83a76b0SSuyog Pawar (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123*c83a76b0SSuyog Pawar {
1124*c83a76b0SSuyog Pawar return CU_SPLIT;
1125*c83a76b0SSuyog Pawar }
1126*c83a76b0SSuyog Pawar }
1127*c83a76b0SSuyog Pawar
1128*c83a76b0SSuyog Pawar /* Accumulate intra percentage before merge for early CU_SPLIT decision */
1129*c83a76b0SSuyog Pawar /* Note : Each intra part represent a NxN unit of the children CUs */
1130*c83a76b0SSuyog Pawar /* This is essentially 1/16th of the CUsize under consideration for merge */
1131*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132*c83a76b0SSuyog Pawar {
1133*c83a76b0SSuyog Pawar if(CU_64x64 == ps_results_merge->e_cu_size)
1134*c83a76b0SSuyog Pawar {
1135*c83a76b0SSuyog Pawar i4_intra_parts =
1136*c83a76b0SSuyog Pawar (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137*c83a76b0SSuyog Pawar ? 16
1138*c83a76b0SSuyog Pawar : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139*c83a76b0SSuyog Pawar }
1140*c83a76b0SSuyog Pawar else
1141*c83a76b0SSuyog Pawar {
1142*c83a76b0SSuyog Pawar switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143*c83a76b0SSuyog Pawar {
1144*c83a76b0SSuyog Pawar case 0:
1145*c83a76b0SSuyog Pawar {
1146*c83a76b0SSuyog Pawar i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147*c83a76b0SSuyog Pawar ->u1_inter_eval_enable)
1148*c83a76b0SSuyog Pawar ? 16
1149*c83a76b0SSuyog Pawar : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150*c83a76b0SSuyog Pawar ->ps_child_node_tl->u1_intra_eval_enable);
1151*c83a76b0SSuyog Pawar
1152*c83a76b0SSuyog Pawar break;
1153*c83a76b0SSuyog Pawar }
1154*c83a76b0SSuyog Pawar case 1:
1155*c83a76b0SSuyog Pawar {
1156*c83a76b0SSuyog Pawar i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157*c83a76b0SSuyog Pawar ->u1_inter_eval_enable)
1158*c83a76b0SSuyog Pawar ? 16
1159*c83a76b0SSuyog Pawar : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160*c83a76b0SSuyog Pawar ->ps_child_node_tr->u1_intra_eval_enable);
1161*c83a76b0SSuyog Pawar
1162*c83a76b0SSuyog Pawar break;
1163*c83a76b0SSuyog Pawar }
1164*c83a76b0SSuyog Pawar case 2:
1165*c83a76b0SSuyog Pawar {
1166*c83a76b0SSuyog Pawar i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167*c83a76b0SSuyog Pawar ->u1_inter_eval_enable)
1168*c83a76b0SSuyog Pawar ? 16
1169*c83a76b0SSuyog Pawar : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170*c83a76b0SSuyog Pawar ->ps_child_node_bl->u1_intra_eval_enable);
1171*c83a76b0SSuyog Pawar
1172*c83a76b0SSuyog Pawar break;
1173*c83a76b0SSuyog Pawar }
1174*c83a76b0SSuyog Pawar case 3:
1175*c83a76b0SSuyog Pawar {
1176*c83a76b0SSuyog Pawar i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177*c83a76b0SSuyog Pawar ->u1_inter_eval_enable)
1178*c83a76b0SSuyog Pawar ? 16
1179*c83a76b0SSuyog Pawar : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180*c83a76b0SSuyog Pawar ->ps_child_node_br->u1_intra_eval_enable);
1181*c83a76b0SSuyog Pawar
1182*c83a76b0SSuyog Pawar break;
1183*c83a76b0SSuyog Pawar }
1184*c83a76b0SSuyog Pawar }
1185*c83a76b0SSuyog Pawar }
1186*c83a76b0SSuyog Pawar }
1187*c83a76b0SSuyog Pawar else
1188*c83a76b0SSuyog Pawar {
1189*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
1190*c83a76b0SSuyog Pawar {
1191*c83a76b0SSuyog Pawar search_results_t *ps_results =
1192*c83a76b0SSuyog Pawar (i == 0) ? ps_results_tl
1193*c83a76b0SSuyog Pawar : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194*c83a76b0SSuyog Pawar
1195*c83a76b0SSuyog Pawar part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196*c83a76b0SSuyog Pawar
1197*c83a76b0SSuyog Pawar if(ps_results->u1_split_flag)
1198*c83a76b0SSuyog Pawar {
1199*c83a76b0SSuyog Pawar U08 u1_x_off = ps_results->u1_x_off;
1200*c83a76b0SSuyog Pawar U08 u1_y_off = ps_results->u1_y_off;
1201*c83a76b0SSuyog Pawar U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202*c83a76b0SSuyog Pawar 2;
1203*c83a76b0SSuyog Pawar
1204*c83a76b0SSuyog Pawar /* Special case to handle 8x8 CUs when 16x16 is split */
1205*c83a76b0SSuyog Pawar ASSERT(ps_results->e_cu_size == CU_16x16);
1206*c83a76b0SSuyog Pawar
1207*c83a76b0SSuyog Pawar ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208*c83a76b0SSuyog Pawar
1209*c83a76b0SSuyog Pawar if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210*c83a76b0SSuyog Pawar i4_intra_parts += 1;
1211*c83a76b0SSuyog Pawar
1212*c83a76b0SSuyog Pawar ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213*c83a76b0SSuyog Pawar
1214*c83a76b0SSuyog Pawar if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215*c83a76b0SSuyog Pawar i4_intra_parts += 1;
1216*c83a76b0SSuyog Pawar
1217*c83a76b0SSuyog Pawar ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218*c83a76b0SSuyog Pawar
1219*c83a76b0SSuyog Pawar if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220*c83a76b0SSuyog Pawar i4_intra_parts += 1;
1221*c83a76b0SSuyog Pawar
1222*c83a76b0SSuyog Pawar ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223*c83a76b0SSuyog Pawar
1224*c83a76b0SSuyog Pawar if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225*c83a76b0SSuyog Pawar i4_intra_parts += 1;
1226*c83a76b0SSuyog Pawar }
1227*c83a76b0SSuyog Pawar else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228*c83a76b0SSuyog Pawar {
1229*c83a76b0SSuyog Pawar i4_intra_parts += 4;
1230*c83a76b0SSuyog Pawar }
1231*c83a76b0SSuyog Pawar }
1232*c83a76b0SSuyog Pawar }
1233*c83a76b0SSuyog Pawar
1234*c83a76b0SSuyog Pawar /* Determine the max intra CU size indicated by IPE */
1235*c83a76b0SSuyog Pawar intra_cu_size = CU_64x64;
1236*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->u1_split_flag)
1237*c83a76b0SSuyog Pawar {
1238*c83a76b0SSuyog Pawar intra_cu_size = CU_32x32;
1239*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240*c83a76b0SSuyog Pawar {
1241*c83a76b0SSuyog Pawar intra_cu_size = CU_16x16;
1242*c83a76b0SSuyog Pawar }
1243*c83a76b0SSuyog Pawar }
1244*c83a76b0SSuyog Pawar
1245*c83a76b0SSuyog Pawar if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246*c83a76b0SSuyog Pawar (intra_cu_size < ps_results_merge->e_cu_size) &&
1247*c83a76b0SSuyog Pawar (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248*c83a76b0SSuyog Pawar (i4_intra_parts == 16))
1249*c83a76b0SSuyog Pawar {
1250*c83a76b0SSuyog Pawar S32 i4_merge_outcome;
1251*c83a76b0SSuyog Pawar
1252*c83a76b0SSuyog Pawar i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253*c83a76b0SSuyog Pawar ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254*c83a76b0SSuyog Pawar ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255*c83a76b0SSuyog Pawar : (!ps_cur_ipe_ctb->u1_split_flag);
1256*c83a76b0SSuyog Pawar
1257*c83a76b0SSuyog Pawar i4_merge_outcome = i4_merge_outcome ||
1258*c83a76b0SSuyog Pawar (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259*c83a76b0SSuyog Pawar
1260*c83a76b0SSuyog Pawar i4_merge_outcome = i4_merge_outcome &&
1261*c83a76b0SSuyog Pawar !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262*c83a76b0SSuyog Pawar
1263*c83a76b0SSuyog Pawar if(i4_merge_outcome)
1264*c83a76b0SSuyog Pawar {
1265*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266*c83a76b0SSuyog Pawar part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267*c83a76b0SSuyog Pawar pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268*c83a76b0SSuyog Pawar
1269*c83a76b0SSuyog Pawar ps_cu_results->u1_num_best_results = 1;
1270*c83a76b0SSuyog Pawar ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271*c83a76b0SSuyog Pawar ps_cu_results->u1_x_off = u1_x_off;
1272*c83a76b0SSuyog Pawar ps_cu_results->u1_y_off = u1_y_off;
1273*c83a76b0SSuyog Pawar
1274*c83a76b0SSuyog Pawar ps_best_result->u1_part_type = PRT_2Nx2N;
1275*c83a76b0SSuyog Pawar ps_best_result->ai4_tu_split_flag[0] = 0;
1276*c83a76b0SSuyog Pawar ps_best_result->ai4_tu_split_flag[1] = 0;
1277*c83a76b0SSuyog Pawar ps_best_result->ai4_tu_split_flag[2] = 0;
1278*c83a76b0SSuyog Pawar ps_best_result->ai4_tu_split_flag[3] = 0;
1279*c83a76b0SSuyog Pawar ps_best_result->i4_tot_cost =
1280*c83a76b0SSuyog Pawar (CU_64x64 == ps_results_merge->e_cu_size)
1281*c83a76b0SSuyog Pawar ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282*c83a76b0SSuyog Pawar : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283*c83a76b0SSuyog Pawar
1284*c83a76b0SSuyog Pawar ps_pu->b1_intra_flag = 1;
1285*c83a76b0SSuyog Pawar ps_pu->b4_pos_x = u1_x_off >> 2;
1286*c83a76b0SSuyog Pawar ps_pu->b4_pos_y = u1_y_off >> 2;
1287*c83a76b0SSuyog Pawar ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288*c83a76b0SSuyog Pawar ps_pu->b4_ht = ps_pu->b4_wd;
1289*c83a76b0SSuyog Pawar ps_pu->mv.i1_l0_ref_idx = -1;
1290*c83a76b0SSuyog Pawar ps_pu->mv.i1_l1_ref_idx = -1;
1291*c83a76b0SSuyog Pawar ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292*c83a76b0SSuyog Pawar ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293*c83a76b0SSuyog Pawar ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294*c83a76b0SSuyog Pawar ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295*c83a76b0SSuyog Pawar
1296*c83a76b0SSuyog Pawar return CU_MERGED;
1297*c83a76b0SSuyog Pawar }
1298*c83a76b0SSuyog Pawar else
1299*c83a76b0SSuyog Pawar {
1300*c83a76b0SSuyog Pawar return CU_SPLIT;
1301*c83a76b0SSuyog Pawar }
1302*c83a76b0SSuyog Pawar }
1303*c83a76b0SSuyog Pawar
1304*c83a76b0SSuyog Pawar if(i4_intra_parts)
1305*c83a76b0SSuyog Pawar {
1306*c83a76b0SSuyog Pawar i4_part_mask = ENABLE_2Nx2N;
1307*c83a76b0SSuyog Pawar }
1308*c83a76b0SSuyog Pawar
1309*c83a76b0SSuyog Pawar ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310*c83a76b0SSuyog Pawar
1311*c83a76b0SSuyog Pawar hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312*c83a76b0SSuyog Pawar
1313*c83a76b0SSuyog Pawar ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314*c83a76b0SSuyog Pawar ps_merge_prms->i4_num_pred_dir_actual = 0;
1315*c83a76b0SSuyog Pawar
1316*c83a76b0SSuyog Pawar if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317*c83a76b0SSuyog Pawar {
1318*c83a76b0SSuyog Pawar S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319*c83a76b0SSuyog Pawar S32 i4_num_valid_parts;
1320*c83a76b0SSuyog Pawar S32 i4_sigma_array_offset;
1321*c83a76b0SSuyog Pawar
1322*c83a76b0SSuyog Pawar i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323*c83a76b0SSuyog Pawar
1324*c83a76b0SSuyog Pawar /*********************************************************************************************************************************************/
1325*c83a76b0SSuyog Pawar /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */
1326*c83a76b0SSuyog Pawar /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327*c83a76b0SSuyog Pawar /* increment as there will be 256 4x4 blocks in a CTB */
1328*c83a76b0SSuyog Pawar /*********************************************************************************************************************************************/
1329*c83a76b0SSuyog Pawar i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330*c83a76b0SSuyog Pawar (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331*c83a76b0SSuyog Pawar
1332*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_valid_parts; i++)
1333*c83a76b0SSuyog Pawar {
1334*c83a76b0SSuyog Pawar S32 i4_part_id = ai4_valid_part_ids[i];
1335*c83a76b0SSuyog Pawar
1336*c83a76b0SSuyog Pawar hme_compute_final_sigma_of_pu_from_base_blocks(
1337*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339*c83a76b0SSuyog Pawar au8_final_src_sigmaX,
1340*c83a76b0SSuyog Pawar au8_final_src_sigmaXSquared,
1341*c83a76b0SSuyog Pawar (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342*c83a76b0SSuyog Pawar 4,
1343*c83a76b0SSuyog Pawar i4_part_id,
1344*c83a76b0SSuyog Pawar 16);
1345*c83a76b0SSuyog Pawar }
1346*c83a76b0SSuyog Pawar
1347*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349*c83a76b0SSuyog Pawar }
1350*c83a76b0SSuyog Pawar
1351*c83a76b0SSuyog Pawar /*************************************************************************/
1352*c83a76b0SSuyog Pawar /* Loop through all ref idx and pick the merge candts and refine based */
1353*c83a76b0SSuyog Pawar /* on the active partitions. At this stage num ref will be 1 or 2 */
1354*c83a76b0SSuyog Pawar /*************************************************************************/
1355*c83a76b0SSuyog Pawar for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356*c83a76b0SSuyog Pawar {
1357*c83a76b0SSuyog Pawar S32 i4_cands;
1358*c83a76b0SSuyog Pawar U08 u1_pred_dir = 0;
1359*c83a76b0SSuyog Pawar
1360*c83a76b0SSuyog Pawar if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361*c83a76b0SSuyog Pawar {
1362*c83a76b0SSuyog Pawar u1_pred_dir = i4_search_idx;
1363*c83a76b0SSuyog Pawar }
1364*c83a76b0SSuyog Pawar else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365*c83a76b0SSuyog Pawar {
1366*c83a76b0SSuyog Pawar u1_pred_dir = 1;
1367*c83a76b0SSuyog Pawar }
1368*c83a76b0SSuyog Pawar else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369*c83a76b0SSuyog Pawar {
1370*c83a76b0SSuyog Pawar u1_pred_dir = 0;
1371*c83a76b0SSuyog Pawar }
1372*c83a76b0SSuyog Pawar else
1373*c83a76b0SSuyog Pawar {
1374*c83a76b0SSuyog Pawar ASSERT(0);
1375*c83a76b0SSuyog Pawar }
1376*c83a76b0SSuyog Pawar
1377*c83a76b0SSuyog Pawar /* call the function to pick and evaluate the merge candts, given */
1378*c83a76b0SSuyog Pawar /* a ref id and a part mask. */
1379*c83a76b0SSuyog Pawar i4_cands = hme_pick_eval_merge_candts(
1380*c83a76b0SSuyog Pawar ps_merge_prms,
1381*c83a76b0SSuyog Pawar ps_subpel_prms,
1382*c83a76b0SSuyog Pawar u1_pred_dir,
1383*c83a76b0SSuyog Pawar i4_best_part_type,
1384*c83a76b0SSuyog Pawar is_vert,
1385*c83a76b0SSuyog Pawar ps_wt_inp_prms,
1386*c83a76b0SSuyog Pawar i4_frm_qstep,
1387*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list,
1388*c83a76b0SSuyog Pawar ps_me_optimised_function_list);
1389*c83a76b0SSuyog Pawar
1390*c83a76b0SSuyog Pawar if(i4_cands)
1391*c83a76b0SSuyog Pawar {
1392*c83a76b0SSuyog Pawar ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393*c83a76b0SSuyog Pawar u1_pred_dir;
1394*c83a76b0SSuyog Pawar ps_merge_prms->i4_num_pred_dir_actual++;
1395*c83a76b0SSuyog Pawar }
1396*c83a76b0SSuyog Pawar
1397*c83a76b0SSuyog Pawar i4_num_merge_cands_evaluated += i4_cands;
1398*c83a76b0SSuyog Pawar }
1399*c83a76b0SSuyog Pawar
1400*c83a76b0SSuyog Pawar /* Call the decide_part_types function here */
1401*c83a76b0SSuyog Pawar /* Populate the new PU struct with the results post subpel refinement*/
1402*c83a76b0SSuyog Pawar if(i4_num_merge_cands_evaluated)
1403*c83a76b0SSuyog Pawar {
1404*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405*c83a76b0SSuyog Pawar
1406*c83a76b0SSuyog Pawar hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407*c83a76b0SSuyog Pawar
1408*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410*c83a76b0SSuyog Pawar
1411*c83a76b0SSuyog Pawar hme_populate_pus(
1412*c83a76b0SSuyog Pawar ps_thrd_ctxt,
1413*c83a76b0SSuyog Pawar ps_ctxt,
1414*c83a76b0SSuyog Pawar ps_subpel_prms,
1415*c83a76b0SSuyog Pawar ps_results_merge,
1416*c83a76b0SSuyog Pawar ps_cu_results,
1417*c83a76b0SSuyog Pawar ps_pu_results,
1418*c83a76b0SSuyog Pawar ps_pu_result,
1419*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms,
1420*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
1421*c83a76b0SSuyog Pawar ps_merge_prms->ps_layer_ctxt,
1422*c83a76b0SSuyog Pawar ps_merge_prms->au1_pred_dir_searched,
1423*c83a76b0SSuyog Pawar ps_merge_prms->i4_num_pred_dir_actual);
1424*c83a76b0SSuyog Pawar
1425*c83a76b0SSuyog Pawar ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426*c83a76b0SSuyog Pawar
1427*c83a76b0SSuyog Pawar hme_decide_part_types(
1428*c83a76b0SSuyog Pawar ps_cu_results,
1429*c83a76b0SSuyog Pawar ps_pu_results,
1430*c83a76b0SSuyog Pawar ps_merge_prms->ps_inter_ctb_prms,
1431*c83a76b0SSuyog Pawar ps_ctxt,
1432*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list,
1433*c83a76b0SSuyog Pawar ps_me_optimised_function_list
1434*c83a76b0SSuyog Pawar
1435*c83a76b0SSuyog Pawar );
1436*c83a76b0SSuyog Pawar
1437*c83a76b0SSuyog Pawar /*****************************************************************/
1438*c83a76b0SSuyog Pawar /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */
1439*c83a76b0SSuyog Pawar /*****************************************************************/
1440*c83a76b0SSuyog Pawar #if DISABLE_INTRA_IN_BPICS
1441*c83a76b0SSuyog Pawar if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443*c83a76b0SSuyog Pawar #endif
1444*c83a76b0SSuyog Pawar {
1445*c83a76b0SSuyog Pawar if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446*c83a76b0SSuyog Pawar {
1447*c83a76b0SSuyog Pawar hme_insert_intra_nodes_post_bipred(
1448*c83a76b0SSuyog Pawar ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449*c83a76b0SSuyog Pawar }
1450*c83a76b0SSuyog Pawar }
1451*c83a76b0SSuyog Pawar }
1452*c83a76b0SSuyog Pawar else
1453*c83a76b0SSuyog Pawar {
1454*c83a76b0SSuyog Pawar return CU_SPLIT;
1455*c83a76b0SSuyog Pawar }
1456*c83a76b0SSuyog Pawar
1457*c83a76b0SSuyog Pawar /* We check the best result of ref idx 0 and compare for parent vs child */
1458*c83a76b0SSuyog Pawar if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459*c83a76b0SSuyog Pawar (CU_32x32 == ps_results_merge->e_cu_size))
1460*c83a76b0SSuyog Pawar {
1461*c83a76b0SSuyog Pawar i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462*c83a76b0SSuyog Pawar /*********************************************************************/
1463*c83a76b0SSuyog Pawar /* Add the cost of signaling the CU tree bits. */
1464*c83a76b0SSuyog Pawar /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465*c83a76b0SSuyog Pawar /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466*c83a76b0SSuyog Pawar /* So, 4*lambda is extra for children cost. :Lokesh */
1467*c83a76b0SSuyog Pawar /*********************************************************************/
1468*c83a76b0SSuyog Pawar {
1469*c83a76b0SSuyog Pawar pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470*c83a76b0SSuyog Pawar
1471*c83a76b0SSuyog Pawar i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472*c83a76b0SSuyog Pawar }
1473*c83a76b0SSuyog Pawar
1474*c83a76b0SSuyog Pawar if(i4_cost_parent < i4_cost_children)
1475*c83a76b0SSuyog Pawar {
1476*c83a76b0SSuyog Pawar return CU_MERGED;
1477*c83a76b0SSuyog Pawar }
1478*c83a76b0SSuyog Pawar
1479*c83a76b0SSuyog Pawar return CU_SPLIT;
1480*c83a76b0SSuyog Pawar }
1481*c83a76b0SSuyog Pawar else
1482*c83a76b0SSuyog Pawar {
1483*c83a76b0SSuyog Pawar return CU_MERGED;
1484*c83a76b0SSuyog Pawar }
1485*c83a76b0SSuyog Pawar }
1486*c83a76b0SSuyog Pawar
1487*c83a76b0SSuyog Pawar #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
1488*c83a76b0SSuyog Pawar { \
1489*c83a76b0SSuyog Pawar (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \
1490*c83a76b0SSuyog Pawar (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \
1491*c83a76b0SSuyog Pawar *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \
1492*c83a76b0SSuyog Pawar }
1493*c83a76b0SSuyog Pawar
1494*c83a76b0SSuyog Pawar /**
1495*c83a76b0SSuyog Pawar ********************************************************************************
1496*c83a76b0SSuyog Pawar * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497*c83a76b0SSuyog Pawar * layer_mv_t *ps_layer_mv,
1498*c83a76b0SSuyog Pawar * S32 i4_search_blk_x,
1499*c83a76b0SSuyog Pawar * S32 i4_search_blk_y,
1500*c83a76b0SSuyog Pawar * mvbank_update_prms_t *ps_prms)
1501*c83a76b0SSuyog Pawar *
1502*c83a76b0SSuyog Pawar * @brief Updates the mv bank in case there is no further encodign to be done
1503*c83a76b0SSuyog Pawar *
1504*c83a76b0SSuyog Pawar * @param[in] ps_search_results: contains results for the block just searched
1505*c83a76b0SSuyog Pawar *
1506*c83a76b0SSuyog Pawar * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1507*c83a76b0SSuyog Pawar *
1508*c83a76b0SSuyog Pawar * @param[in] i4_search_blk_x : col num of blk being searched
1509*c83a76b0SSuyog Pawar *
1510*c83a76b0SSuyog Pawar * @param[in] i4_search_blk_y : row num of blk being searched
1511*c83a76b0SSuyog Pawar *
1512*c83a76b0SSuyog Pawar * @param[in] ps_prms : contains certain parameters which govern how updatedone
1513*c83a76b0SSuyog Pawar *
1514*c83a76b0SSuyog Pawar * @return None
1515*c83a76b0SSuyog Pawar ********************************************************************************
1516*c83a76b0SSuyog Pawar */
1517*c83a76b0SSuyog Pawar
hme_update_mv_bank_noencode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1518*c83a76b0SSuyog Pawar void hme_update_mv_bank_noencode(
1519*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
1520*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mv,
1521*c83a76b0SSuyog Pawar S32 i4_search_blk_x,
1522*c83a76b0SSuyog Pawar S32 i4_search_blk_y,
1523*c83a76b0SSuyog Pawar mvbank_update_prms_t *ps_prms)
1524*c83a76b0SSuyog Pawar {
1525*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
1526*c83a76b0SSuyog Pawar hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528*c83a76b0SSuyog Pawar S32 i4_blk_x, i4_blk_y, i4_offset;
1529*c83a76b0SSuyog Pawar S32 i4_j, i4_ref_id;
1530*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
1531*c83a76b0SSuyog Pawar search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532*c83a76b0SSuyog Pawar search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533*c83a76b0SSuyog Pawar search_node_t *ps_search_node_4x4_4;
1534*c83a76b0SSuyog Pawar
1535*c83a76b0SSuyog Pawar i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536*c83a76b0SSuyog Pawar i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537*c83a76b0SSuyog Pawar i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538*c83a76b0SSuyog Pawar
1539*c83a76b0SSuyog Pawar i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540*c83a76b0SSuyog Pawar
1541*c83a76b0SSuyog Pawar /* Identify the correct offset in the mvbank and the reference id buf */
1542*c83a76b0SSuyog Pawar ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544*c83a76b0SSuyog Pawar
1545*c83a76b0SSuyog Pawar /*************************************************************************/
1546*c83a76b0SSuyog Pawar /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1547*c83a76b0SSuyog Pawar /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1548*c83a76b0SSuyog Pawar /* do a straightforward single update of results. This will have a 1-1 */
1549*c83a76b0SSuyog Pawar /* correspondence. */
1550*c83a76b0SSuyog Pawar /*************************************************************************/
1551*c83a76b0SSuyog Pawar if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552*c83a76b0SSuyog Pawar {
1553*c83a76b0SSuyog Pawar for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554*c83a76b0SSuyog Pawar {
1555*c83a76b0SSuyog Pawar ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557*c83a76b0SSuyog Pawar {
1558*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559*c83a76b0SSuyog Pawar ps_mv++;
1560*c83a76b0SSuyog Pawar pi1_ref_idx++;
1561*c83a76b0SSuyog Pawar ps_search_node++;
1562*c83a76b0SSuyog Pawar }
1563*c83a76b0SSuyog Pawar }
1564*c83a76b0SSuyog Pawar return;
1565*c83a76b0SSuyog Pawar }
1566*c83a76b0SSuyog Pawar
1567*c83a76b0SSuyog Pawar /*************************************************************************/
1568*c83a76b0SSuyog Pawar /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569*c83a76b0SSuyog Pawar /* case, we need to have NxN partitions enabled in search. */
1570*c83a76b0SSuyog Pawar /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571*c83a76b0SSuyog Pawar /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572*c83a76b0SSuyog Pawar /*************************************************************************/
1573*c83a76b0SSuyog Pawar ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574*c83a76b0SSuyog Pawar ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575*c83a76b0SSuyog Pawar ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576*c83a76b0SSuyog Pawar
1577*c83a76b0SSuyog Pawar /*************************************************************************/
1578*c83a76b0SSuyog Pawar /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579*c83a76b0SSuyog Pawar /* hence the below check. */
1580*c83a76b0SSuyog Pawar /*************************************************************************/
1581*c83a76b0SSuyog Pawar ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582*c83a76b0SSuyog Pawar
1583*c83a76b0SSuyog Pawar ps_mv1 = ps_mv;
1584*c83a76b0SSuyog Pawar ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585*c83a76b0SSuyog Pawar ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586*c83a76b0SSuyog Pawar ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587*c83a76b0SSuyog Pawar pi1_ref_idx1 = pi1_ref_idx;
1588*c83a76b0SSuyog Pawar pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589*c83a76b0SSuyog Pawar pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590*c83a76b0SSuyog Pawar pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591*c83a76b0SSuyog Pawar
1592*c83a76b0SSuyog Pawar for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593*c83a76b0SSuyog Pawar {
1594*c83a76b0SSuyog Pawar ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595*c83a76b0SSuyog Pawar
1596*c83a76b0SSuyog Pawar ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597*c83a76b0SSuyog Pawar
1598*c83a76b0SSuyog Pawar ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599*c83a76b0SSuyog Pawar
1600*c83a76b0SSuyog Pawar ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601*c83a76b0SSuyog Pawar
1602*c83a76b0SSuyog Pawar ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603*c83a76b0SSuyog Pawar
1604*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605*c83a76b0SSuyog Pawar ps_mv1++;
1606*c83a76b0SSuyog Pawar pi1_ref_idx1++;
1607*c83a76b0SSuyog Pawar ps_search_node_4x4_1++;
1608*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609*c83a76b0SSuyog Pawar ps_mv2++;
1610*c83a76b0SSuyog Pawar pi1_ref_idx2++;
1611*c83a76b0SSuyog Pawar ps_search_node_4x4_2++;
1612*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613*c83a76b0SSuyog Pawar ps_mv3++;
1614*c83a76b0SSuyog Pawar pi1_ref_idx3++;
1615*c83a76b0SSuyog Pawar ps_search_node_4x4_3++;
1616*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617*c83a76b0SSuyog Pawar ps_mv4++;
1618*c83a76b0SSuyog Pawar pi1_ref_idx4++;
1619*c83a76b0SSuyog Pawar ps_search_node_4x4_4++;
1620*c83a76b0SSuyog Pawar
1621*c83a76b0SSuyog Pawar if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622*c83a76b0SSuyog Pawar {
1623*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624*c83a76b0SSuyog Pawar ps_mv1++;
1625*c83a76b0SSuyog Pawar pi1_ref_idx1++;
1626*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627*c83a76b0SSuyog Pawar ps_mv2++;
1628*c83a76b0SSuyog Pawar pi1_ref_idx2++;
1629*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630*c83a76b0SSuyog Pawar ps_mv3++;
1631*c83a76b0SSuyog Pawar pi1_ref_idx3++;
1632*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633*c83a76b0SSuyog Pawar ps_mv4++;
1634*c83a76b0SSuyog Pawar pi1_ref_idx4++;
1635*c83a76b0SSuyog Pawar }
1636*c83a76b0SSuyog Pawar
1637*c83a76b0SSuyog Pawar for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638*c83a76b0SSuyog Pawar {
1639*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640*c83a76b0SSuyog Pawar ps_mv1++;
1641*c83a76b0SSuyog Pawar pi1_ref_idx1++;
1642*c83a76b0SSuyog Pawar ps_search_node_4x4_1++;
1643*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644*c83a76b0SSuyog Pawar ps_mv2++;
1645*c83a76b0SSuyog Pawar pi1_ref_idx2++;
1646*c83a76b0SSuyog Pawar ps_search_node_4x4_2++;
1647*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648*c83a76b0SSuyog Pawar ps_mv3++;
1649*c83a76b0SSuyog Pawar pi1_ref_idx3++;
1650*c83a76b0SSuyog Pawar ps_search_node_4x4_3++;
1651*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652*c83a76b0SSuyog Pawar ps_mv4++;
1653*c83a76b0SSuyog Pawar pi1_ref_idx4++;
1654*c83a76b0SSuyog Pawar ps_search_node_4x4_4++;
1655*c83a76b0SSuyog Pawar }
1656*c83a76b0SSuyog Pawar }
1657*c83a76b0SSuyog Pawar }
1658*c83a76b0SSuyog Pawar
hme_update_mv_bank_encode(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms,U08 * pu1_pred_dir_searched,S32 i4_num_act_ref_l0)1659*c83a76b0SSuyog Pawar void hme_update_mv_bank_encode(
1660*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
1661*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mv,
1662*c83a76b0SSuyog Pawar S32 i4_search_blk_x,
1663*c83a76b0SSuyog Pawar S32 i4_search_blk_y,
1664*c83a76b0SSuyog Pawar mvbank_update_prms_t *ps_prms,
1665*c83a76b0SSuyog Pawar U08 *pu1_pred_dir_searched,
1666*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0)
1667*c83a76b0SSuyog Pawar {
1668*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
1669*c83a76b0SSuyog Pawar hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671*c83a76b0SSuyog Pawar S32 i4_blk_x, i4_blk_y, i4_offset;
1672*c83a76b0SSuyog Pawar S32 j, i, num_parts;
1673*c83a76b0SSuyog Pawar search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674*c83a76b0SSuyog Pawar search_node_t *ps_search_node_bl, *ps_search_node_br;
1675*c83a76b0SSuyog Pawar search_node_t s_zero_mv;
1676*c83a76b0SSuyog Pawar WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677*c83a76b0SSuyog Pawar
1678*c83a76b0SSuyog Pawar i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679*c83a76b0SSuyog Pawar i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680*c83a76b0SSuyog Pawar i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681*c83a76b0SSuyog Pawar
1682*c83a76b0SSuyog Pawar i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683*c83a76b0SSuyog Pawar
1684*c83a76b0SSuyog Pawar /* Identify the correct offset in the mvbank and the reference id buf */
1685*c83a76b0SSuyog Pawar ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687*c83a76b0SSuyog Pawar
1688*c83a76b0SSuyog Pawar ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689*c83a76b0SSuyog Pawar ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690*c83a76b0SSuyog Pawar
1691*c83a76b0SSuyog Pawar /*************************************************************************/
1692*c83a76b0SSuyog Pawar /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693*c83a76b0SSuyog Pawar /* hence the below check. */
1694*c83a76b0SSuyog Pawar /*************************************************************************/
1695*c83a76b0SSuyog Pawar ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696*c83a76b0SSuyog Pawar
1697*c83a76b0SSuyog Pawar ps_mv1 = ps_mv;
1698*c83a76b0SSuyog Pawar ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699*c83a76b0SSuyog Pawar ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700*c83a76b0SSuyog Pawar ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701*c83a76b0SSuyog Pawar pi1_ref_idx1 = pi1_ref_idx;
1702*c83a76b0SSuyog Pawar pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703*c83a76b0SSuyog Pawar pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704*c83a76b0SSuyog Pawar pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705*c83a76b0SSuyog Pawar
1706*c83a76b0SSuyog Pawar /* Initialize zero mv: default mv used for intra mvs */
1707*c83a76b0SSuyog Pawar s_zero_mv.s_mv.i2_mvx = 0;
1708*c83a76b0SSuyog Pawar s_zero_mv.s_mv.i2_mvy = 0;
1709*c83a76b0SSuyog Pawar s_zero_mv.i1_ref_idx = 0;
1710*c83a76b0SSuyog Pawar
1711*c83a76b0SSuyog Pawar if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712*c83a76b0SSuyog Pawar (ps_search_results->i4_part_mask & ENABLE_NxN))
1713*c83a76b0SSuyog Pawar {
1714*c83a76b0SSuyog Pawar i4_part_type = PRT_NxN;
1715*c83a76b0SSuyog Pawar }
1716*c83a76b0SSuyog Pawar
1717*c83a76b0SSuyog Pawar for(i = 0; i < ps_prms->i4_num_ref; i++)
1718*c83a76b0SSuyog Pawar {
1719*c83a76b0SSuyog Pawar for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720*c83a76b0SSuyog Pawar {
1721*c83a76b0SSuyog Pawar WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722*c83a76b0SSuyog Pawar
1723*c83a76b0SSuyog Pawar num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724*c83a76b0SSuyog Pawar
1725*c83a76b0SSuyog Pawar ps_search_node_tl =
1726*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727*c83a76b0SSuyog Pawar
1728*c83a76b0SSuyog Pawar if(num_parts == 1)
1729*c83a76b0SSuyog Pawar {
1730*c83a76b0SSuyog Pawar ps_search_node_tr = ps_search_node_tl;
1731*c83a76b0SSuyog Pawar ps_search_node_bl = ps_search_node_tl;
1732*c83a76b0SSuyog Pawar ps_search_node_br = ps_search_node_tl;
1733*c83a76b0SSuyog Pawar }
1734*c83a76b0SSuyog Pawar else if(num_parts == 2)
1735*c83a76b0SSuyog Pawar {
1736*c83a76b0SSuyog Pawar /* For vertically oriented partitions, tl, bl pt to same result */
1737*c83a76b0SSuyog Pawar /* For horizontally oriented partition, tl, tr pt to same result */
1738*c83a76b0SSuyog Pawar /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739*c83a76b0SSuyog Pawar /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740*c83a76b0SSuyog Pawar /* and right 2 8x8 have 12x16R partition */
1741*c83a76b0SSuyog Pawar if(gau1_is_vert_part[i4_part_type])
1742*c83a76b0SSuyog Pawar {
1743*c83a76b0SSuyog Pawar ps_search_node_tr =
1744*c83a76b0SSuyog Pawar ps_search_results
1745*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746*c83a76b0SSuyog Pawar ps_search_node_bl = ps_search_node_tl;
1747*c83a76b0SSuyog Pawar }
1748*c83a76b0SSuyog Pawar else
1749*c83a76b0SSuyog Pawar {
1750*c83a76b0SSuyog Pawar ps_search_node_tr = ps_search_node_tl;
1751*c83a76b0SSuyog Pawar ps_search_node_bl =
1752*c83a76b0SSuyog Pawar ps_search_results
1753*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754*c83a76b0SSuyog Pawar }
1755*c83a76b0SSuyog Pawar ps_search_node_br =
1756*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757*c83a76b0SSuyog Pawar }
1758*c83a76b0SSuyog Pawar else
1759*c83a76b0SSuyog Pawar {
1760*c83a76b0SSuyog Pawar /* 4 unique results */
1761*c83a76b0SSuyog Pawar ps_search_node_tr =
1762*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763*c83a76b0SSuyog Pawar ps_search_node_bl =
1764*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765*c83a76b0SSuyog Pawar ps_search_node_br =
1766*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767*c83a76b0SSuyog Pawar }
1768*c83a76b0SSuyog Pawar
1769*c83a76b0SSuyog Pawar if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770*c83a76b0SSuyog Pawar ps_search_node_tl++;
1771*c83a76b0SSuyog Pawar if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772*c83a76b0SSuyog Pawar ps_search_node_tr++;
1773*c83a76b0SSuyog Pawar if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774*c83a76b0SSuyog Pawar ps_search_node_bl++;
1775*c83a76b0SSuyog Pawar if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776*c83a76b0SSuyog Pawar ps_search_node_br++;
1777*c83a76b0SSuyog Pawar
1778*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779*c83a76b0SSuyog Pawar ps_mv1++;
1780*c83a76b0SSuyog Pawar pi1_ref_idx1++;
1781*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782*c83a76b0SSuyog Pawar ps_mv2++;
1783*c83a76b0SSuyog Pawar pi1_ref_idx2++;
1784*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785*c83a76b0SSuyog Pawar ps_mv3++;
1786*c83a76b0SSuyog Pawar pi1_ref_idx3++;
1787*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788*c83a76b0SSuyog Pawar ps_mv4++;
1789*c83a76b0SSuyog Pawar pi1_ref_idx4++;
1790*c83a76b0SSuyog Pawar
1791*c83a76b0SSuyog Pawar if(ps_prms->i4_num_results_to_store > 1)
1792*c83a76b0SSuyog Pawar {
1793*c83a76b0SSuyog Pawar ps_search_node_tl =
1794*c83a76b0SSuyog Pawar &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795*c83a76b0SSuyog Pawar
1796*c83a76b0SSuyog Pawar if(num_parts == 1)
1797*c83a76b0SSuyog Pawar {
1798*c83a76b0SSuyog Pawar ps_search_node_tr = ps_search_node_tl;
1799*c83a76b0SSuyog Pawar ps_search_node_bl = ps_search_node_tl;
1800*c83a76b0SSuyog Pawar ps_search_node_br = ps_search_node_tl;
1801*c83a76b0SSuyog Pawar }
1802*c83a76b0SSuyog Pawar else if(num_parts == 2)
1803*c83a76b0SSuyog Pawar {
1804*c83a76b0SSuyog Pawar /* For vertically oriented partitions, tl, bl pt to same result */
1805*c83a76b0SSuyog Pawar /* For horizontally oriented partition, tl, tr pt to same result */
1806*c83a76b0SSuyog Pawar /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807*c83a76b0SSuyog Pawar /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808*c83a76b0SSuyog Pawar /* and right 2 8x8 have 12x16R partition */
1809*c83a76b0SSuyog Pawar if(gau1_is_vert_part[i4_part_type])
1810*c83a76b0SSuyog Pawar {
1811*c83a76b0SSuyog Pawar ps_search_node_tr =
1812*c83a76b0SSuyog Pawar &ps_search_results
1813*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814*c83a76b0SSuyog Pawar ps_search_node_bl = ps_search_node_tl;
1815*c83a76b0SSuyog Pawar }
1816*c83a76b0SSuyog Pawar else
1817*c83a76b0SSuyog Pawar {
1818*c83a76b0SSuyog Pawar ps_search_node_tr = ps_search_node_tl;
1819*c83a76b0SSuyog Pawar ps_search_node_bl =
1820*c83a76b0SSuyog Pawar &ps_search_results
1821*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822*c83a76b0SSuyog Pawar }
1823*c83a76b0SSuyog Pawar ps_search_node_br =
1824*c83a76b0SSuyog Pawar &ps_search_results
1825*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826*c83a76b0SSuyog Pawar }
1827*c83a76b0SSuyog Pawar else
1828*c83a76b0SSuyog Pawar {
1829*c83a76b0SSuyog Pawar /* 4 unique results */
1830*c83a76b0SSuyog Pawar ps_search_node_tr =
1831*c83a76b0SSuyog Pawar &ps_search_results
1832*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833*c83a76b0SSuyog Pawar ps_search_node_bl =
1834*c83a76b0SSuyog Pawar &ps_search_results
1835*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836*c83a76b0SSuyog Pawar ps_search_node_br =
1837*c83a76b0SSuyog Pawar &ps_search_results
1838*c83a76b0SSuyog Pawar ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839*c83a76b0SSuyog Pawar }
1840*c83a76b0SSuyog Pawar
1841*c83a76b0SSuyog Pawar if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842*c83a76b0SSuyog Pawar ps_search_node_tl++;
1843*c83a76b0SSuyog Pawar if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844*c83a76b0SSuyog Pawar ps_search_node_tr++;
1845*c83a76b0SSuyog Pawar if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846*c83a76b0SSuyog Pawar ps_search_node_bl++;
1847*c83a76b0SSuyog Pawar if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848*c83a76b0SSuyog Pawar ps_search_node_br++;
1849*c83a76b0SSuyog Pawar
1850*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851*c83a76b0SSuyog Pawar ps_mv1++;
1852*c83a76b0SSuyog Pawar pi1_ref_idx1++;
1853*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854*c83a76b0SSuyog Pawar ps_mv2++;
1855*c83a76b0SSuyog Pawar pi1_ref_idx2++;
1856*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857*c83a76b0SSuyog Pawar ps_mv3++;
1858*c83a76b0SSuyog Pawar pi1_ref_idx3++;
1859*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860*c83a76b0SSuyog Pawar ps_mv4++;
1861*c83a76b0SSuyog Pawar pi1_ref_idx4++;
1862*c83a76b0SSuyog Pawar }
1863*c83a76b0SSuyog Pawar }
1864*c83a76b0SSuyog Pawar }
1865*c83a76b0SSuyog Pawar }
1866*c83a76b0SSuyog Pawar
1867*c83a76b0SSuyog Pawar /**
1868*c83a76b0SSuyog Pawar ********************************************************************************
1869*c83a76b0SSuyog Pawar * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870*c83a76b0SSuyog Pawar * layer_mv_t *ps_layer_mv,
1871*c83a76b0SSuyog Pawar * S32 i4_search_blk_x,
1872*c83a76b0SSuyog Pawar * S32 i4_search_blk_y,
1873*c83a76b0SSuyog Pawar * mvbank_update_prms_t *ps_prms)
1874*c83a76b0SSuyog Pawar *
1875*c83a76b0SSuyog Pawar * @brief Updates the mv bank in case there is no further encodign to be done
1876*c83a76b0SSuyog Pawar *
1877*c83a76b0SSuyog Pawar * @param[in] ps_search_results: contains results for the block just searched
1878*c83a76b0SSuyog Pawar *
1879*c83a76b0SSuyog Pawar * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
1880*c83a76b0SSuyog Pawar *
1881*c83a76b0SSuyog Pawar * @param[in] i4_search_blk_x : col num of blk being searched
1882*c83a76b0SSuyog Pawar *
1883*c83a76b0SSuyog Pawar * @param[in] i4_search_blk_y : row num of blk being searched
1884*c83a76b0SSuyog Pawar *
1885*c83a76b0SSuyog Pawar * @param[in] ps_prms : contains certain parameters which govern how updatedone
1886*c83a76b0SSuyog Pawar *
1887*c83a76b0SSuyog Pawar * @return None
1888*c83a76b0SSuyog Pawar ********************************************************************************
1889*c83a76b0SSuyog Pawar */
1890*c83a76b0SSuyog Pawar
hme_update_mv_bank_in_l1_me(search_results_t * ps_search_results,layer_mv_t * ps_layer_mv,S32 i4_search_blk_x,S32 i4_search_blk_y,mvbank_update_prms_t * ps_prms)1891*c83a76b0SSuyog Pawar void hme_update_mv_bank_in_l1_me(
1892*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
1893*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mv,
1894*c83a76b0SSuyog Pawar S32 i4_search_blk_x,
1895*c83a76b0SSuyog Pawar S32 i4_search_blk_y,
1896*c83a76b0SSuyog Pawar mvbank_update_prms_t *ps_prms)
1897*c83a76b0SSuyog Pawar {
1898*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
1899*c83a76b0SSuyog Pawar hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901*c83a76b0SSuyog Pawar S32 i4_blk_x, i4_blk_y, i4_offset;
1902*c83a76b0SSuyog Pawar S32 i4_j, i4_ref_id;
1903*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
1904*c83a76b0SSuyog Pawar search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905*c83a76b0SSuyog Pawar
1906*c83a76b0SSuyog Pawar i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907*c83a76b0SSuyog Pawar i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908*c83a76b0SSuyog Pawar i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909*c83a76b0SSuyog Pawar
1910*c83a76b0SSuyog Pawar i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911*c83a76b0SSuyog Pawar
1912*c83a76b0SSuyog Pawar /* Identify the correct offset in the mvbank and the reference id buf */
1913*c83a76b0SSuyog Pawar ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915*c83a76b0SSuyog Pawar
1916*c83a76b0SSuyog Pawar /*************************************************************************/
1917*c83a76b0SSuyog Pawar /* Supposing we store the mvs in the same blk size as we searched (e.g. */
1918*c83a76b0SSuyog Pawar /* we searched 8x8 blks and store results for 8x8 blks), then we can */
1919*c83a76b0SSuyog Pawar /* do a straightforward single update of results. This will have a 1-1 */
1920*c83a76b0SSuyog Pawar /* correspondence. */
1921*c83a76b0SSuyog Pawar /*************************************************************************/
1922*c83a76b0SSuyog Pawar if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923*c83a76b0SSuyog Pawar {
1924*c83a76b0SSuyog Pawar search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925*c83a76b0SSuyog Pawar
1926*c83a76b0SSuyog Pawar hme_mv_t *ps_mv_l0_root = ps_mv;
1927*c83a76b0SSuyog Pawar hme_mv_t *ps_mv_l1_root =
1928*c83a76b0SSuyog Pawar ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929*c83a76b0SSuyog Pawar
1930*c83a76b0SSuyog Pawar U32 u4_num_l0_results_updated = 0;
1931*c83a76b0SSuyog Pawar U32 u4_num_l1_results_updated = 0;
1932*c83a76b0SSuyog Pawar
1933*c83a76b0SSuyog Pawar S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934*c83a76b0SSuyog Pawar S08 *pi1_ref_idx_l1_root =
1935*c83a76b0SSuyog Pawar pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936*c83a76b0SSuyog Pawar
1937*c83a76b0SSuyog Pawar for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938*c83a76b0SSuyog Pawar {
1939*c83a76b0SSuyog Pawar U32 *pu4_num_results_updated;
1940*c83a76b0SSuyog Pawar search_node_t **pps_result_nodes;
1941*c83a76b0SSuyog Pawar
1942*c83a76b0SSuyog Pawar U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943*c83a76b0SSuyog Pawar
1944*c83a76b0SSuyog Pawar if(u1_pred_dir_of_cur_ref)
1945*c83a76b0SSuyog Pawar {
1946*c83a76b0SSuyog Pawar pu4_num_results_updated = &u4_num_l1_results_updated;
1947*c83a76b0SSuyog Pawar pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948*c83a76b0SSuyog Pawar }
1949*c83a76b0SSuyog Pawar else
1950*c83a76b0SSuyog Pawar {
1951*c83a76b0SSuyog Pawar pu4_num_results_updated = &u4_num_l0_results_updated;
1952*c83a76b0SSuyog Pawar pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953*c83a76b0SSuyog Pawar }
1954*c83a76b0SSuyog Pawar
1955*c83a76b0SSuyog Pawar ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956*c83a76b0SSuyog Pawar
1957*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958*c83a76b0SSuyog Pawar {
1959*c83a76b0SSuyog Pawar hme_add_new_node_to_a_sorted_array(
1960*c83a76b0SSuyog Pawar &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961*c83a76b0SSuyog Pawar
1962*c83a76b0SSuyog Pawar ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963*c83a76b0SSuyog Pawar (*pu4_num_results_updated)++;
1964*c83a76b0SSuyog Pawar }
1965*c83a76b0SSuyog Pawar }
1966*c83a76b0SSuyog Pawar
1967*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968*c83a76b0SSuyog Pawar {
1969*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(
1970*c83a76b0SSuyog Pawar &ps_mv_l0_root[i4_j],
1971*c83a76b0SSuyog Pawar &pi1_ref_idx_l0_root[i4_j],
1972*c83a76b0SSuyog Pawar aps_result_nodes_sorted[0][i4_j],
1973*c83a76b0SSuyog Pawar 0);
1974*c83a76b0SSuyog Pawar }
1975*c83a76b0SSuyog Pawar
1976*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977*c83a76b0SSuyog Pawar {
1978*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(
1979*c83a76b0SSuyog Pawar &ps_mv_l1_root[i4_j],
1980*c83a76b0SSuyog Pawar &pi1_ref_idx_l1_root[i4_j],
1981*c83a76b0SSuyog Pawar aps_result_nodes_sorted[1][i4_j],
1982*c83a76b0SSuyog Pawar 0);
1983*c83a76b0SSuyog Pawar }
1984*c83a76b0SSuyog Pawar
1985*c83a76b0SSuyog Pawar return;
1986*c83a76b0SSuyog Pawar }
1987*c83a76b0SSuyog Pawar
1988*c83a76b0SSuyog Pawar /*************************************************************************/
1989*c83a76b0SSuyog Pawar /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990*c83a76b0SSuyog Pawar /* case, we need to have NxN partitions enabled in search. */
1991*c83a76b0SSuyog Pawar /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992*c83a76b0SSuyog Pawar /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993*c83a76b0SSuyog Pawar /*************************************************************************/
1994*c83a76b0SSuyog Pawar ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995*c83a76b0SSuyog Pawar ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996*c83a76b0SSuyog Pawar ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997*c83a76b0SSuyog Pawar
1998*c83a76b0SSuyog Pawar /*************************************************************************/
1999*c83a76b0SSuyog Pawar /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000*c83a76b0SSuyog Pawar /* hence the below check. */
2001*c83a76b0SSuyog Pawar /*************************************************************************/
2002*c83a76b0SSuyog Pawar ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003*c83a76b0SSuyog Pawar
2004*c83a76b0SSuyog Pawar ps_mv1 = ps_mv;
2005*c83a76b0SSuyog Pawar ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006*c83a76b0SSuyog Pawar ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007*c83a76b0SSuyog Pawar ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008*c83a76b0SSuyog Pawar pi1_ref_idx1 = pi1_ref_idx;
2009*c83a76b0SSuyog Pawar pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010*c83a76b0SSuyog Pawar pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011*c83a76b0SSuyog Pawar pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012*c83a76b0SSuyog Pawar
2013*c83a76b0SSuyog Pawar {
2014*c83a76b0SSuyog Pawar /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015*c83a76b0SSuyog Pawar search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016*c83a76b0SSuyog Pawar U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017*c83a76b0SSuyog Pawar
2018*c83a76b0SSuyog Pawar S32 i;
2019*c83a76b0SSuyog Pawar
2020*c83a76b0SSuyog Pawar hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021*c83a76b0SSuyog Pawar hme_mv_t *ps_mv1_l1_root =
2022*c83a76b0SSuyog Pawar ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023*c83a76b0SSuyog Pawar hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024*c83a76b0SSuyog Pawar hme_mv_t *ps_mv2_l1_root =
2025*c83a76b0SSuyog Pawar ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026*c83a76b0SSuyog Pawar hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027*c83a76b0SSuyog Pawar hme_mv_t *ps_mv3_l1_root =
2028*c83a76b0SSuyog Pawar ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029*c83a76b0SSuyog Pawar hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030*c83a76b0SSuyog Pawar hme_mv_t *ps_mv4_l1_root =
2031*c83a76b0SSuyog Pawar ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032*c83a76b0SSuyog Pawar
2033*c83a76b0SSuyog Pawar U32 u4_num_l0_results_updated = 0;
2034*c83a76b0SSuyog Pawar U32 u4_num_l1_results_updated = 0;
2035*c83a76b0SSuyog Pawar
2036*c83a76b0SSuyog Pawar S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037*c83a76b0SSuyog Pawar S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038*c83a76b0SSuyog Pawar ps_layer_mv->i4_num_mvs_per_ref);
2039*c83a76b0SSuyog Pawar S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040*c83a76b0SSuyog Pawar S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041*c83a76b0SSuyog Pawar ps_layer_mv->i4_num_mvs_per_ref);
2042*c83a76b0SSuyog Pawar S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043*c83a76b0SSuyog Pawar S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044*c83a76b0SSuyog Pawar ps_layer_mv->i4_num_mvs_per_ref);
2045*c83a76b0SSuyog Pawar S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046*c83a76b0SSuyog Pawar S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047*c83a76b0SSuyog Pawar ps_layer_mv->i4_num_mvs_per_ref);
2048*c83a76b0SSuyog Pawar
2049*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
2050*c83a76b0SSuyog Pawar {
2051*c83a76b0SSuyog Pawar hme_mv_t *ps_mv_l0_root;
2052*c83a76b0SSuyog Pawar hme_mv_t *ps_mv_l1_root;
2053*c83a76b0SSuyog Pawar
2054*c83a76b0SSuyog Pawar S08 *pi1_ref_idx_l0_root;
2055*c83a76b0SSuyog Pawar S08 *pi1_ref_idx_l1_root;
2056*c83a76b0SSuyog Pawar
2057*c83a76b0SSuyog Pawar for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058*c83a76b0SSuyog Pawar {
2059*c83a76b0SSuyog Pawar U32 *pu4_num_results_updated;
2060*c83a76b0SSuyog Pawar search_node_t **pps_result_nodes;
2061*c83a76b0SSuyog Pawar U08 *pu1_cost_shifts_for_sorted_node;
2062*c83a76b0SSuyog Pawar
2063*c83a76b0SSuyog Pawar U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064*c83a76b0SSuyog Pawar
2065*c83a76b0SSuyog Pawar if(u1_pred_dir_of_cur_ref)
2066*c83a76b0SSuyog Pawar {
2067*c83a76b0SSuyog Pawar pu4_num_results_updated = &u4_num_l1_results_updated;
2068*c83a76b0SSuyog Pawar pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069*c83a76b0SSuyog Pawar pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070*c83a76b0SSuyog Pawar }
2071*c83a76b0SSuyog Pawar else
2072*c83a76b0SSuyog Pawar {
2073*c83a76b0SSuyog Pawar pu4_num_results_updated = &u4_num_l0_results_updated;
2074*c83a76b0SSuyog Pawar pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075*c83a76b0SSuyog Pawar pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076*c83a76b0SSuyog Pawar }
2077*c83a76b0SSuyog Pawar
2078*c83a76b0SSuyog Pawar ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079*c83a76b0SSuyog Pawar
2080*c83a76b0SSuyog Pawar ps_search_node_4x4 =
2081*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082*c83a76b0SSuyog Pawar
2083*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084*c83a76b0SSuyog Pawar {
2085*c83a76b0SSuyog Pawar hme_add_new_node_to_a_sorted_array(
2086*c83a76b0SSuyog Pawar &ps_search_node_4x4[i4_j],
2087*c83a76b0SSuyog Pawar pps_result_nodes,
2088*c83a76b0SSuyog Pawar pu1_cost_shifts_for_sorted_node,
2089*c83a76b0SSuyog Pawar *pu4_num_results_updated,
2090*c83a76b0SSuyog Pawar 0);
2091*c83a76b0SSuyog Pawar
2092*c83a76b0SSuyog Pawar (*pu4_num_results_updated)++;
2093*c83a76b0SSuyog Pawar
2094*c83a76b0SSuyog Pawar hme_add_new_node_to_a_sorted_array(
2095*c83a76b0SSuyog Pawar &ps_search_node_8x8[i4_j],
2096*c83a76b0SSuyog Pawar pps_result_nodes,
2097*c83a76b0SSuyog Pawar pu1_cost_shifts_for_sorted_node,
2098*c83a76b0SSuyog Pawar *pu4_num_results_updated,
2099*c83a76b0SSuyog Pawar 2);
2100*c83a76b0SSuyog Pawar
2101*c83a76b0SSuyog Pawar (*pu4_num_results_updated)++;
2102*c83a76b0SSuyog Pawar }
2103*c83a76b0SSuyog Pawar }
2104*c83a76b0SSuyog Pawar
2105*c83a76b0SSuyog Pawar switch(i)
2106*c83a76b0SSuyog Pawar {
2107*c83a76b0SSuyog Pawar case 0:
2108*c83a76b0SSuyog Pawar {
2109*c83a76b0SSuyog Pawar ps_mv_l0_root = ps_mv1_l0_root;
2110*c83a76b0SSuyog Pawar ps_mv_l1_root = ps_mv1_l1_root;
2111*c83a76b0SSuyog Pawar
2112*c83a76b0SSuyog Pawar pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113*c83a76b0SSuyog Pawar pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114*c83a76b0SSuyog Pawar
2115*c83a76b0SSuyog Pawar break;
2116*c83a76b0SSuyog Pawar }
2117*c83a76b0SSuyog Pawar case 1:
2118*c83a76b0SSuyog Pawar {
2119*c83a76b0SSuyog Pawar ps_mv_l0_root = ps_mv2_l0_root;
2120*c83a76b0SSuyog Pawar ps_mv_l1_root = ps_mv2_l1_root;
2121*c83a76b0SSuyog Pawar
2122*c83a76b0SSuyog Pawar pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123*c83a76b0SSuyog Pawar pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124*c83a76b0SSuyog Pawar
2125*c83a76b0SSuyog Pawar break;
2126*c83a76b0SSuyog Pawar }
2127*c83a76b0SSuyog Pawar case 2:
2128*c83a76b0SSuyog Pawar {
2129*c83a76b0SSuyog Pawar ps_mv_l0_root = ps_mv3_l0_root;
2130*c83a76b0SSuyog Pawar ps_mv_l1_root = ps_mv3_l1_root;
2131*c83a76b0SSuyog Pawar
2132*c83a76b0SSuyog Pawar pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133*c83a76b0SSuyog Pawar pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134*c83a76b0SSuyog Pawar
2135*c83a76b0SSuyog Pawar break;
2136*c83a76b0SSuyog Pawar }
2137*c83a76b0SSuyog Pawar case 3:
2138*c83a76b0SSuyog Pawar {
2139*c83a76b0SSuyog Pawar ps_mv_l0_root = ps_mv4_l0_root;
2140*c83a76b0SSuyog Pawar ps_mv_l1_root = ps_mv4_l1_root;
2141*c83a76b0SSuyog Pawar
2142*c83a76b0SSuyog Pawar pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143*c83a76b0SSuyog Pawar pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144*c83a76b0SSuyog Pawar
2145*c83a76b0SSuyog Pawar break;
2146*c83a76b0SSuyog Pawar }
2147*c83a76b0SSuyog Pawar }
2148*c83a76b0SSuyog Pawar
2149*c83a76b0SSuyog Pawar u4_num_l0_results_updated =
2150*c83a76b0SSuyog Pawar MIN((S32)u4_num_l0_results_updated,
2151*c83a76b0SSuyog Pawar ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152*c83a76b0SSuyog Pawar
2153*c83a76b0SSuyog Pawar u4_num_l1_results_updated =
2154*c83a76b0SSuyog Pawar MIN((S32)u4_num_l1_results_updated,
2155*c83a76b0SSuyog Pawar ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156*c83a76b0SSuyog Pawar
2157*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158*c83a76b0SSuyog Pawar {
2159*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(
2160*c83a76b0SSuyog Pawar &ps_mv_l0_root[i4_j],
2161*c83a76b0SSuyog Pawar &pi1_ref_idx_l0_root[i4_j],
2162*c83a76b0SSuyog Pawar aps_result_nodes_sorted[0][i4_j],
2163*c83a76b0SSuyog Pawar 0);
2164*c83a76b0SSuyog Pawar }
2165*c83a76b0SSuyog Pawar
2166*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167*c83a76b0SSuyog Pawar {
2168*c83a76b0SSuyog Pawar COPY_SEARCH_RESULT(
2169*c83a76b0SSuyog Pawar &ps_mv_l1_root[i4_j],
2170*c83a76b0SSuyog Pawar &pi1_ref_idx_l1_root[i4_j],
2171*c83a76b0SSuyog Pawar aps_result_nodes_sorted[1][i4_j],
2172*c83a76b0SSuyog Pawar 0);
2173*c83a76b0SSuyog Pawar }
2174*c83a76b0SSuyog Pawar }
2175*c83a76b0SSuyog Pawar }
2176*c83a76b0SSuyog Pawar }
2177*c83a76b0SSuyog Pawar
2178*c83a76b0SSuyog Pawar /**
2179*c83a76b0SSuyog Pawar ******************************************************************************
2180*c83a76b0SSuyog Pawar * @brief Scales motion vector component projecte from a diff layer in same
2181*c83a76b0SSuyog Pawar * picture (so no ref id related delta poc scaling required)
2182*c83a76b0SSuyog Pawar ******************************************************************************
2183*c83a76b0SSuyog Pawar */
2184*c83a76b0SSuyog Pawar
2185*c83a76b0SSuyog Pawar #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \
2186*c83a76b0SSuyog Pawar ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187*c83a76b0SSuyog Pawar /**
2188*c83a76b0SSuyog Pawar ********************************************************************************
2189*c83a76b0SSuyog Pawar * @fn hme_project_coloc_candt(search_node_t *ps_search_node,
2190*c83a76b0SSuyog Pawar * layer_ctxt_t *ps_curr_layer,
2191*c83a76b0SSuyog Pawar * layer_ctxt_t *ps_coarse_layer,
2192*c83a76b0SSuyog Pawar * S32 i4_pos_x,
2193*c83a76b0SSuyog Pawar * S32 i4_pos_y,
2194*c83a76b0SSuyog Pawar * S08 i1_ref_id,
2195*c83a76b0SSuyog Pawar * S08 i1_result_id)
2196*c83a76b0SSuyog Pawar *
2197*c83a76b0SSuyog Pawar * @brief From a coarser layer, projects a candidated situated at "colocated"
2198*c83a76b0SSuyog Pawar * position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199*c83a76b0SSuyog Pawar *
2200*c83a76b0SSuyog Pawar * @param[out] ps_search_node : contains the projected result
2201*c83a76b0SSuyog Pawar *
2202*c83a76b0SSuyog Pawar * @param[in] ps_curr_layer : current layer context
2203*c83a76b0SSuyog Pawar *
2204*c83a76b0SSuyog Pawar * @param[in] ps_coarse_layer : coarser layer context
2205*c83a76b0SSuyog Pawar *
2206*c83a76b0SSuyog Pawar * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2207*c83a76b0SSuyog Pawar *
2208*c83a76b0SSuyog Pawar * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2209*c83a76b0SSuyog Pawar *
2210*c83a76b0SSuyog Pawar * @param[in] i1_ref_id : reference id for which the candidate required
2211*c83a76b0SSuyog Pawar *
2212*c83a76b0SSuyog Pawar * @param[in] i4_result_id : result id for which the candidate required
2213*c83a76b0SSuyog Pawar * (0 : best result, 1 : next best)
2214*c83a76b0SSuyog Pawar *
2215*c83a76b0SSuyog Pawar * @return None
2216*c83a76b0SSuyog Pawar ********************************************************************************
2217*c83a76b0SSuyog Pawar */
2218*c83a76b0SSuyog Pawar
hme_project_coloc_candt(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2219*c83a76b0SSuyog Pawar void hme_project_coloc_candt(
2220*c83a76b0SSuyog Pawar search_node_t *ps_search_node,
2221*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
2222*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer,
2223*c83a76b0SSuyog Pawar S32 i4_pos_x,
2224*c83a76b0SSuyog Pawar S32 i4_pos_y,
2225*c83a76b0SSuyog Pawar S08 i1_ref_id,
2226*c83a76b0SSuyog Pawar S32 i4_result_id)
2227*c83a76b0SSuyog Pawar {
2228*c83a76b0SSuyog Pawar S32 wd_c, ht_c, wd_p, ht_p;
2229*c83a76b0SSuyog Pawar S32 blksize_p, blk_x, blk_y, i4_offset;
2230*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank;
2231*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
2232*c83a76b0SSuyog Pawar S08 *pi1_ref_idx;
2233*c83a76b0SSuyog Pawar
2234*c83a76b0SSuyog Pawar /* Width and ht of current and prev layers */
2235*c83a76b0SSuyog Pawar wd_c = ps_curr_layer->i4_wd;
2236*c83a76b0SSuyog Pawar ht_c = ps_curr_layer->i4_ht;
2237*c83a76b0SSuyog Pawar wd_p = ps_coarse_layer->i4_wd;
2238*c83a76b0SSuyog Pawar ht_p = ps_coarse_layer->i4_ht;
2239*c83a76b0SSuyog Pawar
2240*c83a76b0SSuyog Pawar ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241*c83a76b0SSuyog Pawar blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242*c83a76b0SSuyog Pawar
2243*c83a76b0SSuyog Pawar /* Safety check to avoid uninitialized access across temporal layers */
2244*c83a76b0SSuyog Pawar i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245*c83a76b0SSuyog Pawar i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246*c83a76b0SSuyog Pawar
2247*c83a76b0SSuyog Pawar /* Project the positions to prev layer */
2248*c83a76b0SSuyog Pawar /* TODO: convert these to scale factors at pic level */
2249*c83a76b0SSuyog Pawar blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250*c83a76b0SSuyog Pawar blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251*c83a76b0SSuyog Pawar
2252*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
2253*c83a76b0SSuyog Pawar i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255*c83a76b0SSuyog Pawar
2256*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258*c83a76b0SSuyog Pawar
2259*c83a76b0SSuyog Pawar ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260*c83a76b0SSuyog Pawar pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261*c83a76b0SSuyog Pawar
2262*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265*c83a76b0SSuyog Pawar ps_search_node->u1_subpel_done = 0;
2266*c83a76b0SSuyog Pawar if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267*c83a76b0SSuyog Pawar {
2268*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_id;
2269*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
2270*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
2271*c83a76b0SSuyog Pawar }
2272*c83a76b0SSuyog Pawar }
2273*c83a76b0SSuyog Pawar
2274*c83a76b0SSuyog Pawar /**
2275*c83a76b0SSuyog Pawar ********************************************************************************
2276*c83a76b0SSuyog Pawar * @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277*c83a76b0SSuyog Pawar * layer_ctxt_t *ps_curr_layer,
2278*c83a76b0SSuyog Pawar * layer_ctxt_t *ps_coarse_layer,
2279*c83a76b0SSuyog Pawar * S32 i4_pos_x,
2280*c83a76b0SSuyog Pawar * S32 i4_pos_y,
2281*c83a76b0SSuyog Pawar * S08 i1_ref_id,
2282*c83a76b0SSuyog Pawar * S08 i1_result_id)
2283*c83a76b0SSuyog Pawar *
2284*c83a76b0SSuyog Pawar * @brief From a coarser layer, projects a candidated situated at "colocated"
2285*c83a76b0SSuyog Pawar * position in the picture when the ratios are dyadic
2286*c83a76b0SSuyog Pawar *
2287*c83a76b0SSuyog Pawar * @param[out] ps_search_node : contains the projected result
2288*c83a76b0SSuyog Pawar *
2289*c83a76b0SSuyog Pawar * @param[in] ps_curr_layer : current layer context
2290*c83a76b0SSuyog Pawar *
2291*c83a76b0SSuyog Pawar * @param[in] ps_coarse_layer : coarser layer context
2292*c83a76b0SSuyog Pawar *
2293*c83a76b0SSuyog Pawar * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
2294*c83a76b0SSuyog Pawar *
2295*c83a76b0SSuyog Pawar * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
2296*c83a76b0SSuyog Pawar *
2297*c83a76b0SSuyog Pawar * @param[in] i1_ref_id : reference id for which the candidate required
2298*c83a76b0SSuyog Pawar *
2299*c83a76b0SSuyog Pawar * @param[in] i4_result_id : result id for which the candidate required
2300*c83a76b0SSuyog Pawar * (0 : best result, 1 : next best)
2301*c83a76b0SSuyog Pawar *
2302*c83a76b0SSuyog Pawar * @return None
2303*c83a76b0SSuyog Pawar ********************************************************************************
2304*c83a76b0SSuyog Pawar */
2305*c83a76b0SSuyog Pawar
hme_project_coloc_candt_dyadic(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S08 i1_ref_id,S32 i4_result_id)2306*c83a76b0SSuyog Pawar void hme_project_coloc_candt_dyadic(
2307*c83a76b0SSuyog Pawar search_node_t *ps_search_node,
2308*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
2309*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer,
2310*c83a76b0SSuyog Pawar S32 i4_pos_x,
2311*c83a76b0SSuyog Pawar S32 i4_pos_y,
2312*c83a76b0SSuyog Pawar S08 i1_ref_id,
2313*c83a76b0SSuyog Pawar S32 i4_result_id)
2314*c83a76b0SSuyog Pawar {
2315*c83a76b0SSuyog Pawar S32 wd_c, ht_c, wd_p, ht_p;
2316*c83a76b0SSuyog Pawar S32 blksize_p, blk_x, blk_y, i4_offset;
2317*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank;
2318*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
2319*c83a76b0SSuyog Pawar S08 *pi1_ref_idx;
2320*c83a76b0SSuyog Pawar
2321*c83a76b0SSuyog Pawar /* Width and ht of current and prev layers */
2322*c83a76b0SSuyog Pawar wd_c = ps_curr_layer->i4_wd;
2323*c83a76b0SSuyog Pawar ht_c = ps_curr_layer->i4_ht;
2324*c83a76b0SSuyog Pawar wd_p = ps_coarse_layer->i4_wd;
2325*c83a76b0SSuyog Pawar ht_p = ps_coarse_layer->i4_ht;
2326*c83a76b0SSuyog Pawar
2327*c83a76b0SSuyog Pawar ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328*c83a76b0SSuyog Pawar /* blksize_p = log2(wd) + 1 */
2329*c83a76b0SSuyog Pawar blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330*c83a76b0SSuyog Pawar
2331*c83a76b0SSuyog Pawar /* ASSERT for valid sizes */
2332*c83a76b0SSuyog Pawar ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333*c83a76b0SSuyog Pawar
2334*c83a76b0SSuyog Pawar /* Safety check to avoid uninitialized access across temporal layers */
2335*c83a76b0SSuyog Pawar i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336*c83a76b0SSuyog Pawar i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337*c83a76b0SSuyog Pawar
2338*c83a76b0SSuyog Pawar /* Project the positions to prev layer */
2339*c83a76b0SSuyog Pawar /* TODO: convert these to scale factors at pic level */
2340*c83a76b0SSuyog Pawar blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2341*c83a76b0SSuyog Pawar blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2342*c83a76b0SSuyog Pawar
2343*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
2344*c83a76b0SSuyog Pawar i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346*c83a76b0SSuyog Pawar
2347*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349*c83a76b0SSuyog Pawar
2350*c83a76b0SSuyog Pawar ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351*c83a76b0SSuyog Pawar pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352*c83a76b0SSuyog Pawar
2353*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356*c83a76b0SSuyog Pawar if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357*c83a76b0SSuyog Pawar {
2358*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_id;
2359*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
2360*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
2361*c83a76b0SSuyog Pawar }
2362*c83a76b0SSuyog Pawar }
2363*c83a76b0SSuyog Pawar
hme_project_coloc_candt_dyadic_implicit(search_node_t * ps_search_node,layer_ctxt_t * ps_curr_layer,layer_ctxt_t * ps_coarse_layer,S32 i4_pos_x,S32 i4_pos_y,S32 i4_num_act_ref_l0,U08 u1_pred_dir,U08 u1_default_ref_id,S32 i4_result_id)2364*c83a76b0SSuyog Pawar void hme_project_coloc_candt_dyadic_implicit(
2365*c83a76b0SSuyog Pawar search_node_t *ps_search_node,
2366*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
2367*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer,
2368*c83a76b0SSuyog Pawar S32 i4_pos_x,
2369*c83a76b0SSuyog Pawar S32 i4_pos_y,
2370*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0,
2371*c83a76b0SSuyog Pawar U08 u1_pred_dir,
2372*c83a76b0SSuyog Pawar U08 u1_default_ref_id,
2373*c83a76b0SSuyog Pawar S32 i4_result_id)
2374*c83a76b0SSuyog Pawar {
2375*c83a76b0SSuyog Pawar S32 wd_c, ht_c, wd_p, ht_p;
2376*c83a76b0SSuyog Pawar S32 blksize_p, blk_x, blk_y, i4_offset;
2377*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank;
2378*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
2379*c83a76b0SSuyog Pawar S08 *pi1_ref_idx;
2380*c83a76b0SSuyog Pawar
2381*c83a76b0SSuyog Pawar /* Width and ht of current and prev layers */
2382*c83a76b0SSuyog Pawar wd_c = ps_curr_layer->i4_wd;
2383*c83a76b0SSuyog Pawar ht_c = ps_curr_layer->i4_ht;
2384*c83a76b0SSuyog Pawar wd_p = ps_coarse_layer->i4_wd;
2385*c83a76b0SSuyog Pawar ht_p = ps_coarse_layer->i4_ht;
2386*c83a76b0SSuyog Pawar
2387*c83a76b0SSuyog Pawar ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388*c83a76b0SSuyog Pawar blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389*c83a76b0SSuyog Pawar
2390*c83a76b0SSuyog Pawar /* ASSERT for valid sizes */
2391*c83a76b0SSuyog Pawar ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392*c83a76b0SSuyog Pawar
2393*c83a76b0SSuyog Pawar /* Safety check to avoid uninitialized access across temporal layers */
2394*c83a76b0SSuyog Pawar i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395*c83a76b0SSuyog Pawar i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396*c83a76b0SSuyog Pawar /* Project the positions to prev layer */
2397*c83a76b0SSuyog Pawar /* TODO: convert these to scale factors at pic level */
2398*c83a76b0SSuyog Pawar blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
2399*c83a76b0SSuyog Pawar blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
2400*c83a76b0SSuyog Pawar
2401*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
2402*c83a76b0SSuyog Pawar i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404*c83a76b0SSuyog Pawar
2405*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407*c83a76b0SSuyog Pawar
2408*c83a76b0SSuyog Pawar if(u1_pred_dir == 1)
2409*c83a76b0SSuyog Pawar {
2410*c83a76b0SSuyog Pawar ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411*c83a76b0SSuyog Pawar pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412*c83a76b0SSuyog Pawar }
2413*c83a76b0SSuyog Pawar
2414*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417*c83a76b0SSuyog Pawar if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418*c83a76b0SSuyog Pawar {
2419*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = u1_default_ref_id;
2420*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
2421*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
2422*c83a76b0SSuyog Pawar }
2423*c83a76b0SSuyog Pawar }
2424*c83a76b0SSuyog Pawar
2425*c83a76b0SSuyog Pawar #define SCALE_RANGE_PRMS(prm1, prm2, shift) \
2426*c83a76b0SSuyog Pawar { \
2427*c83a76b0SSuyog Pawar prm1.i2_min_x = prm2.i2_min_x << shift; \
2428*c83a76b0SSuyog Pawar prm1.i2_max_x = prm2.i2_max_x << shift; \
2429*c83a76b0SSuyog Pawar prm1.i2_min_y = prm2.i2_min_y << shift; \
2430*c83a76b0SSuyog Pawar prm1.i2_max_y = prm2.i2_max_y << shift; \
2431*c83a76b0SSuyog Pawar }
2432*c83a76b0SSuyog Pawar
2433*c83a76b0SSuyog Pawar #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \
2434*c83a76b0SSuyog Pawar { \
2435*c83a76b0SSuyog Pawar prm1->i2_min_x = prm2->i2_min_x << shift; \
2436*c83a76b0SSuyog Pawar prm1->i2_max_x = prm2->i2_max_x << shift; \
2437*c83a76b0SSuyog Pawar prm1->i2_min_y = prm2->i2_min_y << shift; \
2438*c83a76b0SSuyog Pawar prm1->i2_max_y = prm2->i2_max_y << shift; \
2439*c83a76b0SSuyog Pawar }
2440*c83a76b0SSuyog Pawar
2441*c83a76b0SSuyog Pawar /**
2442*c83a76b0SSuyog Pawar ********************************************************************************
2443*c83a76b0SSuyog Pawar * @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444*c83a76b0SSuyog Pawar * refine_layer_prms_t *ps_refine_prms)
2445*c83a76b0SSuyog Pawar *
2446*c83a76b0SSuyog Pawar * @brief Frame init of refinemnet layers in ME
2447*c83a76b0SSuyog Pawar *
2448*c83a76b0SSuyog Pawar * @param[in,out] ps_ctxt: ME Handle
2449*c83a76b0SSuyog Pawar *
2450*c83a76b0SSuyog Pawar * @param[in] ps_refine_prms : refinement layer prms
2451*c83a76b0SSuyog Pawar *
2452*c83a76b0SSuyog Pawar * @return None
2453*c83a76b0SSuyog Pawar ********************************************************************************
2454*c83a76b0SSuyog Pawar */
hme_refine_frm_init(layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,layer_ctxt_t * ps_coarse_layer)2455*c83a76b0SSuyog Pawar void hme_refine_frm_init(
2456*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457*c83a76b0SSuyog Pawar {
2458*c83a76b0SSuyog Pawar /* local variables */
2459*c83a76b0SSuyog Pawar BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460*c83a76b0SSuyog Pawar S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461*c83a76b0SSuyog Pawar
2462*c83a76b0SSuyog Pawar i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463*c83a76b0SSuyog Pawar
2464*c83a76b0SSuyog Pawar if(ps_refine_prms->explicit_ref)
2465*c83a76b0SSuyog Pawar {
2466*c83a76b0SSuyog Pawar i4_num_ref_fpel = i4_num_ref_prev_layer;
2467*c83a76b0SSuyog Pawar }
2468*c83a76b0SSuyog Pawar else
2469*c83a76b0SSuyog Pawar {
2470*c83a76b0SSuyog Pawar i4_num_ref_fpel = 2;
2471*c83a76b0SSuyog Pawar }
2472*c83a76b0SSuyog Pawar
2473*c83a76b0SSuyog Pawar if(ps_refine_prms->i4_enable_4x4_part)
2474*c83a76b0SSuyog Pawar {
2475*c83a76b0SSuyog Pawar e_result_blk_size = BLK_4x4;
2476*c83a76b0SSuyog Pawar }
2477*c83a76b0SSuyog Pawar
2478*c83a76b0SSuyog Pawar i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479*c83a76b0SSuyog Pawar
2480*c83a76b0SSuyog Pawar hme_init_mv_bank(
2481*c83a76b0SSuyog Pawar ps_curr_layer,
2482*c83a76b0SSuyog Pawar e_result_blk_size,
2483*c83a76b0SSuyog Pawar i4_num_ref_fpel,
2484*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_mvbank_results,
2485*c83a76b0SSuyog Pawar ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486*c83a76b0SSuyog Pawar }
2487*c83a76b0SSuyog Pawar
2488*c83a76b0SSuyog Pawar #if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489*c83a76b0SSuyog Pawar /**
2490*c83a76b0SSuyog Pawar ********************************************************************************
2491*c83a76b0SSuyog Pawar * @fn void hme_init_clusters_16x16
2492*c83a76b0SSuyog Pawar * (
2493*c83a76b0SSuyog Pawar * cluster_16x16_blk_t *ps_cluster_blk_16x16
2494*c83a76b0SSuyog Pawar * )
2495*c83a76b0SSuyog Pawar *
2496*c83a76b0SSuyog Pawar * @brief Intialisations for the structs used in clustering algorithm
2497*c83a76b0SSuyog Pawar *
2498*c83a76b0SSuyog Pawar * @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters
2499*c83a76b0SSuyog Pawar * of 16x16 block
2500*c83a76b0SSuyog Pawar *
2501*c83a76b0SSuyog Pawar * @return None
2502*c83a76b0SSuyog Pawar ********************************************************************************
2503*c83a76b0SSuyog Pawar */
2504*c83a76b0SSuyog Pawar static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t * ps_cluster_blk_16x16,S32 bidir_enabled)2505*c83a76b0SSuyog Pawar hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506*c83a76b0SSuyog Pawar {
2507*c83a76b0SSuyog Pawar S32 i;
2508*c83a76b0SSuyog Pawar
2509*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->num_clusters = 0;
2510*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->intra_mv_area = 0;
2511*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->best_inter_cost = 0;
2512*c83a76b0SSuyog Pawar
2513*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514*c83a76b0SSuyog Pawar {
2515*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516*c83a76b0SSuyog Pawar bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517*c83a76b0SSuyog Pawar
2518*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519*c83a76b0SSuyog Pawar
2520*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522*c83a76b0SSuyog Pawar }
2523*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_REF; i++)
2524*c83a76b0SSuyog Pawar {
2525*c83a76b0SSuyog Pawar ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526*c83a76b0SSuyog Pawar }
2527*c83a76b0SSuyog Pawar }
2528*c83a76b0SSuyog Pawar
2529*c83a76b0SSuyog Pawar /**
2530*c83a76b0SSuyog Pawar ********************************************************************************
2531*c83a76b0SSuyog Pawar * @fn void hme_init_clusters_32x32
2532*c83a76b0SSuyog Pawar * (
2533*c83a76b0SSuyog Pawar * cluster_32x32_blk_t *ps_cluster_blk_32x32
2534*c83a76b0SSuyog Pawar * )
2535*c83a76b0SSuyog Pawar *
2536*c83a76b0SSuyog Pawar * @brief Intialisations for the structs used in clustering algorithm
2537*c83a76b0SSuyog Pawar *
2538*c83a76b0SSuyog Pawar * @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters
2539*c83a76b0SSuyog Pawar * of 32x32 block
2540*c83a76b0SSuyog Pawar *
2541*c83a76b0SSuyog Pawar * @return None
2542*c83a76b0SSuyog Pawar ********************************************************************************
2543*c83a76b0SSuyog Pawar */
2544*c83a76b0SSuyog Pawar static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t * ps_cluster_blk_32x32,S32 bidir_enabled)2545*c83a76b0SSuyog Pawar hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546*c83a76b0SSuyog Pawar {
2547*c83a76b0SSuyog Pawar S32 i;
2548*c83a76b0SSuyog Pawar
2549*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->num_clusters = 0;
2550*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->intra_mv_area = 0;
2551*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->best_alt_ref = -1;
2552*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->best_uni_ref = -1;
2553*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->best_inter_cost = 0;
2554*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555*c83a76b0SSuyog Pawar
2556*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557*c83a76b0SSuyog Pawar {
2558*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559*c83a76b0SSuyog Pawar bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561*c83a76b0SSuyog Pawar
2562*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564*c83a76b0SSuyog Pawar }
2565*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_REF; i++)
2566*c83a76b0SSuyog Pawar {
2567*c83a76b0SSuyog Pawar ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568*c83a76b0SSuyog Pawar }
2569*c83a76b0SSuyog Pawar }
2570*c83a76b0SSuyog Pawar
2571*c83a76b0SSuyog Pawar /**
2572*c83a76b0SSuyog Pawar ********************************************************************************
2573*c83a76b0SSuyog Pawar * @fn void hme_init_clusters_64x64
2574*c83a76b0SSuyog Pawar * (
2575*c83a76b0SSuyog Pawar * cluster_64x64_blk_t *ps_cluster_blk_64x64
2576*c83a76b0SSuyog Pawar * )
2577*c83a76b0SSuyog Pawar *
2578*c83a76b0SSuyog Pawar * @brief Intialisations for the structs used in clustering algorithm
2579*c83a76b0SSuyog Pawar *
2580*c83a76b0SSuyog Pawar * @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters
2581*c83a76b0SSuyog Pawar * of 64x64 block
2582*c83a76b0SSuyog Pawar *
2583*c83a76b0SSuyog Pawar * @return None
2584*c83a76b0SSuyog Pawar ********************************************************************************
2585*c83a76b0SSuyog Pawar */
2586*c83a76b0SSuyog Pawar static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t * ps_cluster_blk_64x64,S32 bidir_enabled)2587*c83a76b0SSuyog Pawar hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588*c83a76b0SSuyog Pawar {
2589*c83a76b0SSuyog Pawar S32 i;
2590*c83a76b0SSuyog Pawar
2591*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->num_clusters = 0;
2592*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->intra_mv_area = 0;
2593*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->best_alt_ref = -1;
2594*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->best_uni_ref = -1;
2595*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->best_inter_cost = 0;
2596*c83a76b0SSuyog Pawar
2597*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598*c83a76b0SSuyog Pawar {
2599*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600*c83a76b0SSuyog Pawar bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602*c83a76b0SSuyog Pawar
2603*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605*c83a76b0SSuyog Pawar }
2606*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_REF; i++)
2607*c83a76b0SSuyog Pawar {
2608*c83a76b0SSuyog Pawar ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609*c83a76b0SSuyog Pawar }
2610*c83a76b0SSuyog Pawar }
2611*c83a76b0SSuyog Pawar
2612*c83a76b0SSuyog Pawar /**
2613*c83a76b0SSuyog Pawar ********************************************************************************
2614*c83a76b0SSuyog Pawar * @fn void hme_sort_and_assign_top_ref_ids_areawise
2615*c83a76b0SSuyog Pawar * (
2616*c83a76b0SSuyog Pawar * ctb_cluster_info_t *ps_ctb_cluster_info
2617*c83a76b0SSuyog Pawar * )
2618*c83a76b0SSuyog Pawar *
2619*c83a76b0SSuyog Pawar * @brief Finds best_uni_ref and best_alt_ref
2620*c83a76b0SSuyog Pawar *
2621*c83a76b0SSuyog Pawar * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2622*c83a76b0SSuyog Pawar *
2623*c83a76b0SSuyog Pawar * @param[in] bidir_enabled: flag that indicates whether or not bi-pred is
2624*c83a76b0SSuyog Pawar * enabled
2625*c83a76b0SSuyog Pawar *
2626*c83a76b0SSuyog Pawar * @param[in] block_width: width of the block in pels
2627*c83a76b0SSuyog Pawar *
2628*c83a76b0SSuyog Pawar * @param[in] e_cu_pos: position of the block within the CTB
2629*c83a76b0SSuyog Pawar *
2630*c83a76b0SSuyog Pawar * @return None
2631*c83a76b0SSuyog Pawar ********************************************************************************
2632*c83a76b0SSuyog Pawar */
hme_sort_and_assign_top_ref_ids_areawise(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width,CU_POS_T e_cu_pos)2633*c83a76b0SSuyog Pawar void hme_sort_and_assign_top_ref_ids_areawise(
2634*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635*c83a76b0SSuyog Pawar {
2636*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_32x32 = NULL;
2637*c83a76b0SSuyog Pawar cluster_64x64_blk_t *ps_64x64 = NULL;
2638*c83a76b0SSuyog Pawar cluster_data_t *ps_data;
2639*c83a76b0SSuyog Pawar
2640*c83a76b0SSuyog Pawar S32 j, k;
2641*c83a76b0SSuyog Pawar
2642*c83a76b0SSuyog Pawar S32 ai4_uni_area[MAX_NUM_REF];
2643*c83a76b0SSuyog Pawar S32 ai4_bi_area[MAX_NUM_REF];
2644*c83a76b0SSuyog Pawar S32 ai4_ref_id_found[MAX_NUM_REF];
2645*c83a76b0SSuyog Pawar S32 ai4_ref_id[MAX_NUM_REF];
2646*c83a76b0SSuyog Pawar
2647*c83a76b0SSuyog Pawar S32 best_uni_ref = -1, best_alt_ref = -1;
2648*c83a76b0SSuyog Pawar S32 num_clusters;
2649*c83a76b0SSuyog Pawar S32 num_ref = 0;
2650*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
2651*c83a76b0SSuyog Pawar S32 is_cur_blk_valid;
2652*c83a76b0SSuyog Pawar
2653*c83a76b0SSuyog Pawar if(32 == block_width)
2654*c83a76b0SSuyog Pawar {
2655*c83a76b0SSuyog Pawar is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656*c83a76b0SSuyog Pawar ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657*c83a76b0SSuyog Pawar num_clusters = ps_32x32->num_clusters;
2658*c83a76b0SSuyog Pawar ps_data = &ps_32x32->as_cluster_data[0];
2659*c83a76b0SSuyog Pawar }
2660*c83a76b0SSuyog Pawar else
2661*c83a76b0SSuyog Pawar {
2662*c83a76b0SSuyog Pawar is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663*c83a76b0SSuyog Pawar ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664*c83a76b0SSuyog Pawar num_clusters = ps_64x64->num_clusters;
2665*c83a76b0SSuyog Pawar ps_data = &ps_64x64->as_cluster_data[0];
2666*c83a76b0SSuyog Pawar }
2667*c83a76b0SSuyog Pawar
2668*c83a76b0SSuyog Pawar #if !ENABLE_4CTB_EVALUATION
2669*c83a76b0SSuyog Pawar if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670*c83a76b0SSuyog Pawar {
2671*c83a76b0SSuyog Pawar return;
2672*c83a76b0SSuyog Pawar }
2673*c83a76b0SSuyog Pawar #endif
2674*c83a76b0SSuyog Pawar if(num_clusters == 0)
2675*c83a76b0SSuyog Pawar {
2676*c83a76b0SSuyog Pawar return;
2677*c83a76b0SSuyog Pawar }
2678*c83a76b0SSuyog Pawar else if(!is_cur_blk_valid)
2679*c83a76b0SSuyog Pawar {
2680*c83a76b0SSuyog Pawar return;
2681*c83a76b0SSuyog Pawar }
2682*c83a76b0SSuyog Pawar
2683*c83a76b0SSuyog Pawar memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684*c83a76b0SSuyog Pawar memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685*c83a76b0SSuyog Pawar memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686*c83a76b0SSuyog Pawar memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687*c83a76b0SSuyog Pawar
2688*c83a76b0SSuyog Pawar for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689*c83a76b0SSuyog Pawar {
2690*c83a76b0SSuyog Pawar S32 ref_id;
2691*c83a76b0SSuyog Pawar
2692*c83a76b0SSuyog Pawar if(!ps_data->is_valid_cluster)
2693*c83a76b0SSuyog Pawar {
2694*c83a76b0SSuyog Pawar continue;
2695*c83a76b0SSuyog Pawar }
2696*c83a76b0SSuyog Pawar
2697*c83a76b0SSuyog Pawar ref_id = ps_data->ref_id;
2698*c83a76b0SSuyog Pawar
2699*c83a76b0SSuyog Pawar num_clusters_evaluated++;
2700*c83a76b0SSuyog Pawar
2701*c83a76b0SSuyog Pawar ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702*c83a76b0SSuyog Pawar ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703*c83a76b0SSuyog Pawar
2704*c83a76b0SSuyog Pawar if(!ai4_ref_id_found[ref_id])
2705*c83a76b0SSuyog Pawar {
2706*c83a76b0SSuyog Pawar ai4_ref_id[ref_id] = ref_id;
2707*c83a76b0SSuyog Pawar ai4_ref_id_found[ref_id] = 1;
2708*c83a76b0SSuyog Pawar num_ref++;
2709*c83a76b0SSuyog Pawar }
2710*c83a76b0SSuyog Pawar }
2711*c83a76b0SSuyog Pawar
2712*c83a76b0SSuyog Pawar {
2713*c83a76b0SSuyog Pawar S32 ai4_ref_id_temp[MAX_NUM_REF];
2714*c83a76b0SSuyog Pawar
2715*c83a76b0SSuyog Pawar memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716*c83a76b0SSuyog Pawar
2717*c83a76b0SSuyog Pawar for(k = 1; k < MAX_NUM_REF; k++)
2718*c83a76b0SSuyog Pawar {
2719*c83a76b0SSuyog Pawar if(ai4_uni_area[k] > ai4_uni_area[0])
2720*c83a76b0SSuyog Pawar {
2721*c83a76b0SSuyog Pawar SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722*c83a76b0SSuyog Pawar SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723*c83a76b0SSuyog Pawar }
2724*c83a76b0SSuyog Pawar }
2725*c83a76b0SSuyog Pawar
2726*c83a76b0SSuyog Pawar best_uni_ref = ai4_ref_id_temp[0];
2727*c83a76b0SSuyog Pawar }
2728*c83a76b0SSuyog Pawar
2729*c83a76b0SSuyog Pawar if(bidir_enabled)
2730*c83a76b0SSuyog Pawar {
2731*c83a76b0SSuyog Pawar for(k = 1; k < MAX_NUM_REF; k++)
2732*c83a76b0SSuyog Pawar {
2733*c83a76b0SSuyog Pawar if(ai4_bi_area[k] > ai4_bi_area[0])
2734*c83a76b0SSuyog Pawar {
2735*c83a76b0SSuyog Pawar SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736*c83a76b0SSuyog Pawar SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737*c83a76b0SSuyog Pawar }
2738*c83a76b0SSuyog Pawar }
2739*c83a76b0SSuyog Pawar
2740*c83a76b0SSuyog Pawar if(!ai4_bi_area[0])
2741*c83a76b0SSuyog Pawar {
2742*c83a76b0SSuyog Pawar best_alt_ref = -1;
2743*c83a76b0SSuyog Pawar
2744*c83a76b0SSuyog Pawar if(32 == block_width)
2745*c83a76b0SSuyog Pawar {
2746*c83a76b0SSuyog Pawar SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747*c83a76b0SSuyog Pawar }
2748*c83a76b0SSuyog Pawar else
2749*c83a76b0SSuyog Pawar {
2750*c83a76b0SSuyog Pawar SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751*c83a76b0SSuyog Pawar }
2752*c83a76b0SSuyog Pawar
2753*c83a76b0SSuyog Pawar return;
2754*c83a76b0SSuyog Pawar }
2755*c83a76b0SSuyog Pawar
2756*c83a76b0SSuyog Pawar if(best_uni_ref == ai4_ref_id[0])
2757*c83a76b0SSuyog Pawar {
2758*c83a76b0SSuyog Pawar for(k = 2; k < MAX_NUM_REF; k++)
2759*c83a76b0SSuyog Pawar {
2760*c83a76b0SSuyog Pawar if(ai4_bi_area[k] > ai4_bi_area[1])
2761*c83a76b0SSuyog Pawar {
2762*c83a76b0SSuyog Pawar SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763*c83a76b0SSuyog Pawar SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764*c83a76b0SSuyog Pawar }
2765*c83a76b0SSuyog Pawar }
2766*c83a76b0SSuyog Pawar
2767*c83a76b0SSuyog Pawar best_alt_ref = ai4_ref_id[1];
2768*c83a76b0SSuyog Pawar }
2769*c83a76b0SSuyog Pawar else
2770*c83a76b0SSuyog Pawar {
2771*c83a76b0SSuyog Pawar best_alt_ref = ai4_ref_id[0];
2772*c83a76b0SSuyog Pawar }
2773*c83a76b0SSuyog Pawar }
2774*c83a76b0SSuyog Pawar
2775*c83a76b0SSuyog Pawar if(32 == block_width)
2776*c83a76b0SSuyog Pawar {
2777*c83a76b0SSuyog Pawar SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778*c83a76b0SSuyog Pawar }
2779*c83a76b0SSuyog Pawar else
2780*c83a76b0SSuyog Pawar {
2781*c83a76b0SSuyog Pawar SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782*c83a76b0SSuyog Pawar }
2783*c83a76b0SSuyog Pawar }
2784*c83a76b0SSuyog Pawar
2785*c83a76b0SSuyog Pawar /**
2786*c83a76b0SSuyog Pawar ********************************************************************************
2787*c83a76b0SSuyog Pawar * @fn void hme_find_top_ref_ids
2788*c83a76b0SSuyog Pawar * (
2789*c83a76b0SSuyog Pawar * ctb_cluster_info_t *ps_ctb_cluster_info
2790*c83a76b0SSuyog Pawar * )
2791*c83a76b0SSuyog Pawar *
2792*c83a76b0SSuyog Pawar * @brief Finds best_uni_ref and best_alt_ref
2793*c83a76b0SSuyog Pawar *
2794*c83a76b0SSuyog Pawar * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2795*c83a76b0SSuyog Pawar *
2796*c83a76b0SSuyog Pawar * @return None
2797*c83a76b0SSuyog Pawar ********************************************************************************
2798*c83a76b0SSuyog Pawar */
hme_find_top_ref_ids(ctb_cluster_info_t * ps_ctb_cluster_info,S32 bidir_enabled,S32 block_width)2799*c83a76b0SSuyog Pawar void hme_find_top_ref_ids(
2800*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801*c83a76b0SSuyog Pawar {
2802*c83a76b0SSuyog Pawar S32 i;
2803*c83a76b0SSuyog Pawar
2804*c83a76b0SSuyog Pawar if(32 == block_width)
2805*c83a76b0SSuyog Pawar {
2806*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
2807*c83a76b0SSuyog Pawar {
2808*c83a76b0SSuyog Pawar hme_sort_and_assign_top_ref_ids_areawise(
2809*c83a76b0SSuyog Pawar ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810*c83a76b0SSuyog Pawar }
2811*c83a76b0SSuyog Pawar }
2812*c83a76b0SSuyog Pawar else if(64 == block_width)
2813*c83a76b0SSuyog Pawar {
2814*c83a76b0SSuyog Pawar hme_sort_and_assign_top_ref_ids_areawise(
2815*c83a76b0SSuyog Pawar ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816*c83a76b0SSuyog Pawar }
2817*c83a76b0SSuyog Pawar }
2818*c83a76b0SSuyog Pawar
2819*c83a76b0SSuyog Pawar /**
2820*c83a76b0SSuyog Pawar ********************************************************************************
2821*c83a76b0SSuyog Pawar * @fn void hme_boot_out_outlier
2822*c83a76b0SSuyog Pawar * (
2823*c83a76b0SSuyog Pawar * ctb_cluster_info_t *ps_ctb_cluster_info
2824*c83a76b0SSuyog Pawar * )
2825*c83a76b0SSuyog Pawar *
2826*c83a76b0SSuyog Pawar * @brief Removes outlier clusters before CU tree population
2827*c83a76b0SSuyog Pawar *
2828*c83a76b0SSuyog Pawar * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
2829*c83a76b0SSuyog Pawar *
2830*c83a76b0SSuyog Pawar * @return None
2831*c83a76b0SSuyog Pawar ********************************************************************************
2832*c83a76b0SSuyog Pawar */
hme_boot_out_outlier(ctb_cluster_info_t * ps_ctb_cluster_info,S32 blk_width)2833*c83a76b0SSuyog Pawar void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834*c83a76b0SSuyog Pawar {
2835*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_32x32;
2836*c83a76b0SSuyog Pawar
2837*c83a76b0SSuyog Pawar S32 i;
2838*c83a76b0SSuyog Pawar
2839*c83a76b0SSuyog Pawar cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840*c83a76b0SSuyog Pawar
2841*c83a76b0SSuyog Pawar S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842*c83a76b0SSuyog Pawar
2843*c83a76b0SSuyog Pawar if(32 == blk_width)
2844*c83a76b0SSuyog Pawar {
2845*c83a76b0SSuyog Pawar /* 32x32 clusters */
2846*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
2847*c83a76b0SSuyog Pawar {
2848*c83a76b0SSuyog Pawar ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849*c83a76b0SSuyog Pawar
2850*c83a76b0SSuyog Pawar if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851*c83a76b0SSuyog Pawar {
2852*c83a76b0SSuyog Pawar BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853*c83a76b0SSuyog Pawar }
2854*c83a76b0SSuyog Pawar }
2855*c83a76b0SSuyog Pawar }
2856*c83a76b0SSuyog Pawar else if(64 == blk_width)
2857*c83a76b0SSuyog Pawar {
2858*c83a76b0SSuyog Pawar /* 64x64 clusters */
2859*c83a76b0SSuyog Pawar if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860*c83a76b0SSuyog Pawar {
2861*c83a76b0SSuyog Pawar BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862*c83a76b0SSuyog Pawar }
2863*c83a76b0SSuyog Pawar }
2864*c83a76b0SSuyog Pawar }
2865*c83a76b0SSuyog Pawar
2866*c83a76b0SSuyog Pawar /**
2867*c83a76b0SSuyog Pawar ********************************************************************************
2868*c83a76b0SSuyog Pawar * @fn void hme_update_cluster_attributes
2869*c83a76b0SSuyog Pawar * (
2870*c83a76b0SSuyog Pawar * cluster_data_t *ps_cluster_data,
2871*c83a76b0SSuyog Pawar * S32 mvx,
2872*c83a76b0SSuyog Pawar * S32 mvy,
2873*c83a76b0SSuyog Pawar * PART_ID_T e_part_id
2874*c83a76b0SSuyog Pawar * )
2875*c83a76b0SSuyog Pawar *
2876*c83a76b0SSuyog Pawar * @brief Implementation fo the clustering algorithm
2877*c83a76b0SSuyog Pawar *
2878*c83a76b0SSuyog Pawar * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2879*c83a76b0SSuyog Pawar *
2880*c83a76b0SSuyog Pawar * @param[in] mvx : x co-ordinate of the motion vector
2881*c83a76b0SSuyog Pawar *
2882*c83a76b0SSuyog Pawar * @param[in] mvy : y co-ordinate of the motion vector
2883*c83a76b0SSuyog Pawar *
2884*c83a76b0SSuyog Pawar * @param[in] ref_idx : ref_id of the motion vector
2885*c83a76b0SSuyog Pawar *
2886*c83a76b0SSuyog Pawar * @param[in] e_part_id : partition id of the motion vector
2887*c83a76b0SSuyog Pawar *
2888*c83a76b0SSuyog Pawar * @return None
2889*c83a76b0SSuyog Pawar ********************************************************************************
2890*c83a76b0SSuyog Pawar */
hme_update_cluster_attributes(cluster_data_t * ps_cluster_data,S32 mvx,S32 mvy,S32 mvdx,S32 mvdy,S32 ref_id,S32 sdi,U08 is_part_of_bi,PART_ID_T e_part_id)2891*c83a76b0SSuyog Pawar static __inline void hme_update_cluster_attributes(
2892*c83a76b0SSuyog Pawar cluster_data_t *ps_cluster_data,
2893*c83a76b0SSuyog Pawar S32 mvx,
2894*c83a76b0SSuyog Pawar S32 mvy,
2895*c83a76b0SSuyog Pawar S32 mvdx,
2896*c83a76b0SSuyog Pawar S32 mvdy,
2897*c83a76b0SSuyog Pawar S32 ref_id,
2898*c83a76b0SSuyog Pawar S32 sdi,
2899*c83a76b0SSuyog Pawar U08 is_part_of_bi,
2900*c83a76b0SSuyog Pawar PART_ID_T e_part_id)
2901*c83a76b0SSuyog Pawar {
2902*c83a76b0SSuyog Pawar LWORD64 i8_mvx_sum_q8;
2903*c83a76b0SSuyog Pawar LWORD64 i8_mvy_sum_q8;
2904*c83a76b0SSuyog Pawar
2905*c83a76b0SSuyog Pawar S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906*c83a76b0SSuyog Pawar S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907*c83a76b0SSuyog Pawar
2908*c83a76b0SSuyog Pawar if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909*c83a76b0SSuyog Pawar {
2910*c83a76b0SSuyog Pawar ps_cluster_data->min_x = mvx;
2911*c83a76b0SSuyog Pawar }
2912*c83a76b0SSuyog Pawar else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913*c83a76b0SSuyog Pawar {
2914*c83a76b0SSuyog Pawar ps_cluster_data->max_x = mvx;
2915*c83a76b0SSuyog Pawar }
2916*c83a76b0SSuyog Pawar
2917*c83a76b0SSuyog Pawar if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918*c83a76b0SSuyog Pawar {
2919*c83a76b0SSuyog Pawar ps_cluster_data->min_y = mvy;
2920*c83a76b0SSuyog Pawar }
2921*c83a76b0SSuyog Pawar else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922*c83a76b0SSuyog Pawar {
2923*c83a76b0SSuyog Pawar ps_cluster_data->max_y = mvy;
2924*c83a76b0SSuyog Pawar }
2925*c83a76b0SSuyog Pawar
2926*c83a76b0SSuyog Pawar {
2927*c83a76b0SSuyog Pawar S32 num_mvs = ps_cluster_data->num_mvs;
2928*c83a76b0SSuyog Pawar
2929*c83a76b0SSuyog Pawar ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930*c83a76b0SSuyog Pawar ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931*c83a76b0SSuyog Pawar ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932*c83a76b0SSuyog Pawar
2933*c83a76b0SSuyog Pawar /***************************/
2934*c83a76b0SSuyog Pawar ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935*c83a76b0SSuyog Pawar ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936*c83a76b0SSuyog Pawar /**************************/
2937*c83a76b0SSuyog Pawar }
2938*c83a76b0SSuyog Pawar
2939*c83a76b0SSuyog Pawar /* Updation of centroid */
2940*c83a76b0SSuyog Pawar {
2941*c83a76b0SSuyog Pawar i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942*c83a76b0SSuyog Pawar i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943*c83a76b0SSuyog Pawar
2944*c83a76b0SSuyog Pawar ps_cluster_data->num_mvs++;
2945*c83a76b0SSuyog Pawar
2946*c83a76b0SSuyog Pawar ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947*c83a76b0SSuyog Pawar (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948*c83a76b0SSuyog Pawar ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949*c83a76b0SSuyog Pawar (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950*c83a76b0SSuyog Pawar }
2951*c83a76b0SSuyog Pawar
2952*c83a76b0SSuyog Pawar ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953*c83a76b0SSuyog Pawar
2954*c83a76b0SSuyog Pawar if(is_part_of_bi)
2955*c83a76b0SSuyog Pawar {
2956*c83a76b0SSuyog Pawar ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957*c83a76b0SSuyog Pawar }
2958*c83a76b0SSuyog Pawar else
2959*c83a76b0SSuyog Pawar {
2960*c83a76b0SSuyog Pawar ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961*c83a76b0SSuyog Pawar }
2962*c83a76b0SSuyog Pawar }
2963*c83a76b0SSuyog Pawar
2964*c83a76b0SSuyog Pawar /**
2965*c83a76b0SSuyog Pawar ********************************************************************************
2966*c83a76b0SSuyog Pawar * @fn void hme_try_cluster_merge
2967*c83a76b0SSuyog Pawar * (
2968*c83a76b0SSuyog Pawar * cluster_data_t *ps_cluster_data,
2969*c83a76b0SSuyog Pawar * S32 *pi4_num_clusters,
2970*c83a76b0SSuyog Pawar * S32 idx_of_updated_cluster
2971*c83a76b0SSuyog Pawar * )
2972*c83a76b0SSuyog Pawar *
2973*c83a76b0SSuyog Pawar * @brief Implementation fo the clustering algorithm
2974*c83a76b0SSuyog Pawar *
2975*c83a76b0SSuyog Pawar * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
2976*c83a76b0SSuyog Pawar *
2977*c83a76b0SSuyog Pawar * @param[in/out] pi4_num_clusters : pointer to number of clusters
2978*c83a76b0SSuyog Pawar *
2979*c83a76b0SSuyog Pawar * @param[in] idx_of_updated_cluster : index of the cluster most recently
2980*c83a76b0SSuyog Pawar * updated
2981*c83a76b0SSuyog Pawar *
2982*c83a76b0SSuyog Pawar * @return Nothing
2983*c83a76b0SSuyog Pawar ********************************************************************************
2984*c83a76b0SSuyog Pawar */
hme_try_cluster_merge(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S32 idx_of_updated_cluster)2985*c83a76b0SSuyog Pawar void hme_try_cluster_merge(
2986*c83a76b0SSuyog Pawar cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987*c83a76b0SSuyog Pawar {
2988*c83a76b0SSuyog Pawar centroid_t *ps_centroid;
2989*c83a76b0SSuyog Pawar
2990*c83a76b0SSuyog Pawar S32 cur_pos_x_q8;
2991*c83a76b0SSuyog Pawar S32 cur_pos_y_q8;
2992*c83a76b0SSuyog Pawar S32 i;
2993*c83a76b0SSuyog Pawar S32 max_dist_from_centroid;
2994*c83a76b0SSuyog Pawar S32 mvd;
2995*c83a76b0SSuyog Pawar S32 mvdx_q8;
2996*c83a76b0SSuyog Pawar S32 mvdx;
2997*c83a76b0SSuyog Pawar S32 mvdy_q8;
2998*c83a76b0SSuyog Pawar S32 mvdy;
2999*c83a76b0SSuyog Pawar S32 num_clusters, num_clusters_evaluated;
3000*c83a76b0SSuyog Pawar S32 other_pos_x_q8;
3001*c83a76b0SSuyog Pawar S32 other_pos_y_q8;
3002*c83a76b0SSuyog Pawar
3003*c83a76b0SSuyog Pawar cluster_data_t *ps_root = ps_cluster_data;
3004*c83a76b0SSuyog Pawar cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005*c83a76b0SSuyog Pawar centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006*c83a76b0SSuyog Pawar
3007*c83a76b0SSuyog Pawar /* Merge is superfluous if num_clusters is 1 */
3008*c83a76b0SSuyog Pawar if(*pu1_num_clusters == 1)
3009*c83a76b0SSuyog Pawar {
3010*c83a76b0SSuyog Pawar return;
3011*c83a76b0SSuyog Pawar }
3012*c83a76b0SSuyog Pawar
3013*c83a76b0SSuyog Pawar cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014*c83a76b0SSuyog Pawar cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015*c83a76b0SSuyog Pawar
3016*c83a76b0SSuyog Pawar max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017*c83a76b0SSuyog Pawar
3018*c83a76b0SSuyog Pawar num_clusters = *pu1_num_clusters;
3019*c83a76b0SSuyog Pawar num_clusters_evaluated = 0;
3020*c83a76b0SSuyog Pawar
3021*c83a76b0SSuyog Pawar for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022*c83a76b0SSuyog Pawar {
3023*c83a76b0SSuyog Pawar if(!ps_cluster_data->is_valid_cluster)
3024*c83a76b0SSuyog Pawar {
3025*c83a76b0SSuyog Pawar continue;
3026*c83a76b0SSuyog Pawar }
3027*c83a76b0SSuyog Pawar if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028*c83a76b0SSuyog Pawar {
3029*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3030*c83a76b0SSuyog Pawar continue;
3031*c83a76b0SSuyog Pawar }
3032*c83a76b0SSuyog Pawar
3033*c83a76b0SSuyog Pawar ps_centroid = &ps_cluster_data->s_centroid;
3034*c83a76b0SSuyog Pawar
3035*c83a76b0SSuyog Pawar other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036*c83a76b0SSuyog Pawar other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037*c83a76b0SSuyog Pawar
3038*c83a76b0SSuyog Pawar mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039*c83a76b0SSuyog Pawar mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042*c83a76b0SSuyog Pawar
3043*c83a76b0SSuyog Pawar mvd = ABS(mvdx) + ABS(mvdy);
3044*c83a76b0SSuyog Pawar
3045*c83a76b0SSuyog Pawar if(mvd <= (max_dist_from_centroid >> 1))
3046*c83a76b0SSuyog Pawar {
3047*c83a76b0SSuyog Pawar /* 0 => no updates */
3048*c83a76b0SSuyog Pawar /* 1 => min updated */
3049*c83a76b0SSuyog Pawar /* 2 => max updated */
3050*c83a76b0SSuyog Pawar S32 minmax_x_update_id;
3051*c83a76b0SSuyog Pawar S32 minmax_y_update_id;
3052*c83a76b0SSuyog Pawar
3053*c83a76b0SSuyog Pawar LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054*c83a76b0SSuyog Pawar LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055*c83a76b0SSuyog Pawar LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056*c83a76b0SSuyog Pawar LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057*c83a76b0SSuyog Pawar
3058*c83a76b0SSuyog Pawar (*pu1_num_clusters)--;
3059*c83a76b0SSuyog Pawar
3060*c83a76b0SSuyog Pawar ps_cluster_data->is_valid_cluster = 0;
3061*c83a76b0SSuyog Pawar
3062*c83a76b0SSuyog Pawar memcpy(
3063*c83a76b0SSuyog Pawar &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064*c83a76b0SSuyog Pawar ps_cluster_data->as_mv,
3065*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066*c83a76b0SSuyog Pawar
3067*c83a76b0SSuyog Pawar ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068*c83a76b0SSuyog Pawar ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069*c83a76b0SSuyog Pawar ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070*c83a76b0SSuyog Pawar ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071*c83a76b0SSuyog Pawar i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072*c83a76b0SSuyog Pawar i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073*c83a76b0SSuyog Pawar
3074*c83a76b0SSuyog Pawar ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075*c83a76b0SSuyog Pawar ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076*c83a76b0SSuyog Pawar
3077*c83a76b0SSuyog Pawar minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078*c83a76b0SSuyog Pawar ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079*c83a76b0SSuyog Pawar : 1;
3080*c83a76b0SSuyog Pawar minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081*c83a76b0SSuyog Pawar ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082*c83a76b0SSuyog Pawar : 1;
3083*c83a76b0SSuyog Pawar
3084*c83a76b0SSuyog Pawar /* Updation of centroid spread */
3085*c83a76b0SSuyog Pawar switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086*c83a76b0SSuyog Pawar {
3087*c83a76b0SSuyog Pawar case 1:
3088*c83a76b0SSuyog Pawar {
3089*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3090*c83a76b0SSuyog Pawar
3091*c83a76b0SSuyog Pawar ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092*c83a76b0SSuyog Pawar
3093*c83a76b0SSuyog Pawar mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3095*c83a76b0SSuyog Pawar
3096*c83a76b0SSuyog Pawar if(mvd > (max_dist_from_centroid))
3097*c83a76b0SSuyog Pawar {
3098*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3099*c83a76b0SSuyog Pawar }
3100*c83a76b0SSuyog Pawar break;
3101*c83a76b0SSuyog Pawar }
3102*c83a76b0SSuyog Pawar case 2:
3103*c83a76b0SSuyog Pawar {
3104*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3105*c83a76b0SSuyog Pawar
3106*c83a76b0SSuyog Pawar ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107*c83a76b0SSuyog Pawar
3108*c83a76b0SSuyog Pawar mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3110*c83a76b0SSuyog Pawar
3111*c83a76b0SSuyog Pawar if(mvd > (max_dist_from_centroid))
3112*c83a76b0SSuyog Pawar {
3113*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3114*c83a76b0SSuyog Pawar }
3115*c83a76b0SSuyog Pawar break;
3116*c83a76b0SSuyog Pawar }
3117*c83a76b0SSuyog Pawar case 4:
3118*c83a76b0SSuyog Pawar {
3119*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3120*c83a76b0SSuyog Pawar
3121*c83a76b0SSuyog Pawar ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122*c83a76b0SSuyog Pawar
3123*c83a76b0SSuyog Pawar mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3125*c83a76b0SSuyog Pawar
3126*c83a76b0SSuyog Pawar if(mvd > (max_dist_from_centroid))
3127*c83a76b0SSuyog Pawar {
3128*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3129*c83a76b0SSuyog Pawar }
3130*c83a76b0SSuyog Pawar break;
3131*c83a76b0SSuyog Pawar }
3132*c83a76b0SSuyog Pawar case 5:
3133*c83a76b0SSuyog Pawar {
3134*c83a76b0SSuyog Pawar S32 mvd;
3135*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3136*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3137*c83a76b0SSuyog Pawar
3138*c83a76b0SSuyog Pawar mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140*c83a76b0SSuyog Pawar
3141*c83a76b0SSuyog Pawar mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143*c83a76b0SSuyog Pawar
3144*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145*c83a76b0SSuyog Pawar
3146*c83a76b0SSuyog Pawar ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147*c83a76b0SSuyog Pawar ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148*c83a76b0SSuyog Pawar
3149*c83a76b0SSuyog Pawar if(mvd > max_dist_from_centroid)
3150*c83a76b0SSuyog Pawar {
3151*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3152*c83a76b0SSuyog Pawar }
3153*c83a76b0SSuyog Pawar break;
3154*c83a76b0SSuyog Pawar }
3155*c83a76b0SSuyog Pawar case 6:
3156*c83a76b0SSuyog Pawar {
3157*c83a76b0SSuyog Pawar S32 mvd;
3158*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3159*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3160*c83a76b0SSuyog Pawar
3161*c83a76b0SSuyog Pawar mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163*c83a76b0SSuyog Pawar
3164*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166*c83a76b0SSuyog Pawar
3167*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168*c83a76b0SSuyog Pawar
3169*c83a76b0SSuyog Pawar ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170*c83a76b0SSuyog Pawar ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171*c83a76b0SSuyog Pawar
3172*c83a76b0SSuyog Pawar if(mvd > max_dist_from_centroid)
3173*c83a76b0SSuyog Pawar {
3174*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3175*c83a76b0SSuyog Pawar }
3176*c83a76b0SSuyog Pawar break;
3177*c83a76b0SSuyog Pawar }
3178*c83a76b0SSuyog Pawar case 8:
3179*c83a76b0SSuyog Pawar {
3180*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3181*c83a76b0SSuyog Pawar
3182*c83a76b0SSuyog Pawar ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183*c83a76b0SSuyog Pawar
3184*c83a76b0SSuyog Pawar mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3186*c83a76b0SSuyog Pawar
3187*c83a76b0SSuyog Pawar if(mvd > (max_dist_from_centroid))
3188*c83a76b0SSuyog Pawar {
3189*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3190*c83a76b0SSuyog Pawar }
3191*c83a76b0SSuyog Pawar break;
3192*c83a76b0SSuyog Pawar }
3193*c83a76b0SSuyog Pawar case 9:
3194*c83a76b0SSuyog Pawar {
3195*c83a76b0SSuyog Pawar S32 mvd;
3196*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3197*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3198*c83a76b0SSuyog Pawar
3199*c83a76b0SSuyog Pawar mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201*c83a76b0SSuyog Pawar
3202*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204*c83a76b0SSuyog Pawar
3205*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206*c83a76b0SSuyog Pawar
3207*c83a76b0SSuyog Pawar ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208*c83a76b0SSuyog Pawar ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209*c83a76b0SSuyog Pawar
3210*c83a76b0SSuyog Pawar if(mvd > max_dist_from_centroid)
3211*c83a76b0SSuyog Pawar {
3212*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3213*c83a76b0SSuyog Pawar }
3214*c83a76b0SSuyog Pawar break;
3215*c83a76b0SSuyog Pawar }
3216*c83a76b0SSuyog Pawar case 10:
3217*c83a76b0SSuyog Pawar {
3218*c83a76b0SSuyog Pawar S32 mvd;
3219*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3220*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3221*c83a76b0SSuyog Pawar
3222*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224*c83a76b0SSuyog Pawar
3225*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227*c83a76b0SSuyog Pawar
3228*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229*c83a76b0SSuyog Pawar
3230*c83a76b0SSuyog Pawar ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231*c83a76b0SSuyog Pawar ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232*c83a76b0SSuyog Pawar
3233*c83a76b0SSuyog Pawar if(mvd > ps_cluster_data->max_dist_from_centroid)
3234*c83a76b0SSuyog Pawar {
3235*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
3236*c83a76b0SSuyog Pawar }
3237*c83a76b0SSuyog Pawar break;
3238*c83a76b0SSuyog Pawar }
3239*c83a76b0SSuyog Pawar default:
3240*c83a76b0SSuyog Pawar {
3241*c83a76b0SSuyog Pawar break;
3242*c83a76b0SSuyog Pawar }
3243*c83a76b0SSuyog Pawar }
3244*c83a76b0SSuyog Pawar
3245*c83a76b0SSuyog Pawar hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246*c83a76b0SSuyog Pawar
3247*c83a76b0SSuyog Pawar return;
3248*c83a76b0SSuyog Pawar }
3249*c83a76b0SSuyog Pawar
3250*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3251*c83a76b0SSuyog Pawar }
3252*c83a76b0SSuyog Pawar }
3253*c83a76b0SSuyog Pawar
3254*c83a76b0SSuyog Pawar /**
3255*c83a76b0SSuyog Pawar ********************************************************************************
3256*c83a76b0SSuyog Pawar * @fn void hme_find_and_update_clusters
3257*c83a76b0SSuyog Pawar * (
3258*c83a76b0SSuyog Pawar * cluster_data_t *ps_cluster_data,
3259*c83a76b0SSuyog Pawar * S32 *pi4_num_clusters,
3260*c83a76b0SSuyog Pawar * S32 mvx,
3261*c83a76b0SSuyog Pawar * S32 mvy,
3262*c83a76b0SSuyog Pawar * S32 ref_idx,
3263*c83a76b0SSuyog Pawar * PART_ID_T e_part_id
3264*c83a76b0SSuyog Pawar * )
3265*c83a76b0SSuyog Pawar *
3266*c83a76b0SSuyog Pawar * @brief Implementation fo the clustering algorithm
3267*c83a76b0SSuyog Pawar *
3268*c83a76b0SSuyog Pawar * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
3269*c83a76b0SSuyog Pawar *
3270*c83a76b0SSuyog Pawar * @param[in/out] pi4_num_clusters : pointer to number of clusters
3271*c83a76b0SSuyog Pawar *
3272*c83a76b0SSuyog Pawar * @param[in] mvx : x co-ordinate of the motion vector
3273*c83a76b0SSuyog Pawar *
3274*c83a76b0SSuyog Pawar * @param[in] mvy : y co-ordinate of the motion vector
3275*c83a76b0SSuyog Pawar *
3276*c83a76b0SSuyog Pawar * @param[in] ref_idx : ref_id of the motion vector
3277*c83a76b0SSuyog Pawar *
3278*c83a76b0SSuyog Pawar * @param[in] e_part_id : partition id of the motion vector
3279*c83a76b0SSuyog Pawar *
3280*c83a76b0SSuyog Pawar * @return None
3281*c83a76b0SSuyog Pawar ********************************************************************************
3282*c83a76b0SSuyog Pawar */
hme_find_and_update_clusters(cluster_data_t * ps_cluster_data,U08 * pu1_num_clusters,S16 i2_mv_x,S16 i2_mv_y,U08 i1_ref_idx,S32 i4_sdi,PART_ID_T e_part_id,U08 is_part_of_bi)3283*c83a76b0SSuyog Pawar void hme_find_and_update_clusters(
3284*c83a76b0SSuyog Pawar cluster_data_t *ps_cluster_data,
3285*c83a76b0SSuyog Pawar U08 *pu1_num_clusters,
3286*c83a76b0SSuyog Pawar S16 i2_mv_x,
3287*c83a76b0SSuyog Pawar S16 i2_mv_y,
3288*c83a76b0SSuyog Pawar U08 i1_ref_idx,
3289*c83a76b0SSuyog Pawar S32 i4_sdi,
3290*c83a76b0SSuyog Pawar PART_ID_T e_part_id,
3291*c83a76b0SSuyog Pawar U08 is_part_of_bi)
3292*c83a76b0SSuyog Pawar {
3293*c83a76b0SSuyog Pawar S32 i;
3294*c83a76b0SSuyog Pawar S32 min_mvd_cluster_id = -1;
3295*c83a76b0SSuyog Pawar S32 mvd, mvd_limit, mvdx, mvdy;
3296*c83a76b0SSuyog Pawar S32 min_mvdx, min_mvdy;
3297*c83a76b0SSuyog Pawar
3298*c83a76b0SSuyog Pawar S32 min_mvd = MAX_32BIT_VAL;
3299*c83a76b0SSuyog Pawar S32 num_clusters = *pu1_num_clusters;
3300*c83a76b0SSuyog Pawar
3301*c83a76b0SSuyog Pawar S32 mvx = i2_mv_x;
3302*c83a76b0SSuyog Pawar S32 mvy = i2_mv_y;
3303*c83a76b0SSuyog Pawar S32 ref_idx = i1_ref_idx;
3304*c83a76b0SSuyog Pawar S32 sdi = i4_sdi;
3305*c83a76b0SSuyog Pawar S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306*c83a76b0SSuyog Pawar
3307*c83a76b0SSuyog Pawar if(num_clusters == 0)
3308*c83a76b0SSuyog Pawar {
3309*c83a76b0SSuyog Pawar cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310*c83a76b0SSuyog Pawar
3311*c83a76b0SSuyog Pawar ps_data->num_mvs = 1;
3312*c83a76b0SSuyog Pawar ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313*c83a76b0SSuyog Pawar ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314*c83a76b0SSuyog Pawar ps_data->ref_id = ref_idx;
3315*c83a76b0SSuyog Pawar ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316*c83a76b0SSuyog Pawar ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317*c83a76b0SSuyog Pawar ps_data->as_mv[0].mvx = mvx;
3318*c83a76b0SSuyog Pawar ps_data->as_mv[0].mvy = mvy;
3319*c83a76b0SSuyog Pawar
3320*c83a76b0SSuyog Pawar /***************************/
3321*c83a76b0SSuyog Pawar ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322*c83a76b0SSuyog Pawar ps_data->as_mv[0].sdi = sdi;
3323*c83a76b0SSuyog Pawar if(is_part_of_bi)
3324*c83a76b0SSuyog Pawar {
3325*c83a76b0SSuyog Pawar ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326*c83a76b0SSuyog Pawar }
3327*c83a76b0SSuyog Pawar else
3328*c83a76b0SSuyog Pawar {
3329*c83a76b0SSuyog Pawar ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330*c83a76b0SSuyog Pawar }
3331*c83a76b0SSuyog Pawar /**************************/
3332*c83a76b0SSuyog Pawar ps_data->max_x = mvx;
3333*c83a76b0SSuyog Pawar ps_data->min_x = mvx;
3334*c83a76b0SSuyog Pawar ps_data->max_y = mvy;
3335*c83a76b0SSuyog Pawar ps_data->min_y = mvy;
3336*c83a76b0SSuyog Pawar
3337*c83a76b0SSuyog Pawar ps_data->is_valid_cluster = 1;
3338*c83a76b0SSuyog Pawar
3339*c83a76b0SSuyog Pawar *pu1_num_clusters = 1;
3340*c83a76b0SSuyog Pawar }
3341*c83a76b0SSuyog Pawar else
3342*c83a76b0SSuyog Pawar {
3343*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
3344*c83a76b0SSuyog Pawar
3345*c83a76b0SSuyog Pawar for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346*c83a76b0SSuyog Pawar {
3347*c83a76b0SSuyog Pawar cluster_data_t *ps_data = &ps_cluster_data[i];
3348*c83a76b0SSuyog Pawar
3349*c83a76b0SSuyog Pawar centroid_t *ps_centroid;
3350*c83a76b0SSuyog Pawar
3351*c83a76b0SSuyog Pawar S32 mvx_q8;
3352*c83a76b0SSuyog Pawar S32 mvy_q8;
3353*c83a76b0SSuyog Pawar S32 posx_q8;
3354*c83a76b0SSuyog Pawar S32 posy_q8;
3355*c83a76b0SSuyog Pawar S32 mvdx_q8;
3356*c83a76b0SSuyog Pawar S32 mvdy_q8;
3357*c83a76b0SSuyog Pawar
3358*c83a76b0SSuyog Pawar /* In anticipation of a possible merging of clusters */
3359*c83a76b0SSuyog Pawar if(ps_data->is_valid_cluster == 0)
3360*c83a76b0SSuyog Pawar {
3361*c83a76b0SSuyog Pawar new_cluster_idx = i;
3362*c83a76b0SSuyog Pawar continue;
3363*c83a76b0SSuyog Pawar }
3364*c83a76b0SSuyog Pawar
3365*c83a76b0SSuyog Pawar if(ref_idx != ps_data->ref_id)
3366*c83a76b0SSuyog Pawar {
3367*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3368*c83a76b0SSuyog Pawar continue;
3369*c83a76b0SSuyog Pawar }
3370*c83a76b0SSuyog Pawar
3371*c83a76b0SSuyog Pawar ps_centroid = &ps_data->s_centroid;
3372*c83a76b0SSuyog Pawar posx_q8 = ps_centroid->i4_pos_x_q8;
3373*c83a76b0SSuyog Pawar posy_q8 = ps_centroid->i4_pos_y_q8;
3374*c83a76b0SSuyog Pawar
3375*c83a76b0SSuyog Pawar mvx_q8 = mvx << 8;
3376*c83a76b0SSuyog Pawar mvy_q8 = mvy << 8;
3377*c83a76b0SSuyog Pawar
3378*c83a76b0SSuyog Pawar mvdx_q8 = posx_q8 - mvx_q8;
3379*c83a76b0SSuyog Pawar mvdy_q8 = posy_q8 - mvy_q8;
3380*c83a76b0SSuyog Pawar
3381*c83a76b0SSuyog Pawar mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382*c83a76b0SSuyog Pawar mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383*c83a76b0SSuyog Pawar
3384*c83a76b0SSuyog Pawar mvd = ABS(mvdx) + ABS(mvdy);
3385*c83a76b0SSuyog Pawar
3386*c83a76b0SSuyog Pawar if(mvd < min_mvd)
3387*c83a76b0SSuyog Pawar {
3388*c83a76b0SSuyog Pawar min_mvd = mvd;
3389*c83a76b0SSuyog Pawar min_mvdx = mvdx;
3390*c83a76b0SSuyog Pawar min_mvdy = mvdy;
3391*c83a76b0SSuyog Pawar min_mvd_cluster_id = i;
3392*c83a76b0SSuyog Pawar }
3393*c83a76b0SSuyog Pawar
3394*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3395*c83a76b0SSuyog Pawar }
3396*c83a76b0SSuyog Pawar
3397*c83a76b0SSuyog Pawar mvd_limit = (min_mvd_cluster_id == -1)
3398*c83a76b0SSuyog Pawar ? ps_cluster_data[0].max_dist_from_centroid
3399*c83a76b0SSuyog Pawar : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400*c83a76b0SSuyog Pawar
3401*c83a76b0SSuyog Pawar /* This condition implies that min_mvd has been updated */
3402*c83a76b0SSuyog Pawar if(min_mvd <= mvd_limit)
3403*c83a76b0SSuyog Pawar {
3404*c83a76b0SSuyog Pawar hme_update_cluster_attributes(
3405*c83a76b0SSuyog Pawar &ps_cluster_data[min_mvd_cluster_id],
3406*c83a76b0SSuyog Pawar mvx,
3407*c83a76b0SSuyog Pawar mvy,
3408*c83a76b0SSuyog Pawar min_mvdx,
3409*c83a76b0SSuyog Pawar min_mvdy,
3410*c83a76b0SSuyog Pawar ref_idx,
3411*c83a76b0SSuyog Pawar sdi,
3412*c83a76b0SSuyog Pawar is_part_of_bi,
3413*c83a76b0SSuyog Pawar e_part_id);
3414*c83a76b0SSuyog Pawar
3415*c83a76b0SSuyog Pawar if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416*c83a76b0SSuyog Pawar {
3417*c83a76b0SSuyog Pawar hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418*c83a76b0SSuyog Pawar }
3419*c83a76b0SSuyog Pawar }
3420*c83a76b0SSuyog Pawar else
3421*c83a76b0SSuyog Pawar {
3422*c83a76b0SSuyog Pawar cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423*c83a76b0SSuyog Pawar ? &ps_cluster_data[num_clusters]
3424*c83a76b0SSuyog Pawar : &ps_cluster_data[new_cluster_idx];
3425*c83a76b0SSuyog Pawar
3426*c83a76b0SSuyog Pawar ps_data->num_mvs = 1;
3427*c83a76b0SSuyog Pawar ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428*c83a76b0SSuyog Pawar ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429*c83a76b0SSuyog Pawar ps_data->ref_id = ref_idx;
3430*c83a76b0SSuyog Pawar ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431*c83a76b0SSuyog Pawar ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432*c83a76b0SSuyog Pawar ps_data->as_mv[0].mvx = mvx;
3433*c83a76b0SSuyog Pawar ps_data->as_mv[0].mvy = mvy;
3434*c83a76b0SSuyog Pawar
3435*c83a76b0SSuyog Pawar /***************************/
3436*c83a76b0SSuyog Pawar ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437*c83a76b0SSuyog Pawar ps_data->as_mv[0].sdi = sdi;
3438*c83a76b0SSuyog Pawar if(is_part_of_bi)
3439*c83a76b0SSuyog Pawar {
3440*c83a76b0SSuyog Pawar ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441*c83a76b0SSuyog Pawar }
3442*c83a76b0SSuyog Pawar else
3443*c83a76b0SSuyog Pawar {
3444*c83a76b0SSuyog Pawar ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445*c83a76b0SSuyog Pawar }
3446*c83a76b0SSuyog Pawar /**************************/
3447*c83a76b0SSuyog Pawar ps_data->max_x = mvx;
3448*c83a76b0SSuyog Pawar ps_data->min_x = mvx;
3449*c83a76b0SSuyog Pawar ps_data->max_y = mvy;
3450*c83a76b0SSuyog Pawar ps_data->min_y = mvy;
3451*c83a76b0SSuyog Pawar
3452*c83a76b0SSuyog Pawar ps_data->is_valid_cluster = 1;
3453*c83a76b0SSuyog Pawar
3454*c83a76b0SSuyog Pawar num_clusters++;
3455*c83a76b0SSuyog Pawar *pu1_num_clusters = num_clusters;
3456*c83a76b0SSuyog Pawar }
3457*c83a76b0SSuyog Pawar }
3458*c83a76b0SSuyog Pawar }
3459*c83a76b0SSuyog Pawar
3460*c83a76b0SSuyog Pawar /**
3461*c83a76b0SSuyog Pawar ********************************************************************************
3462*c83a76b0SSuyog Pawar * @fn void hme_update_32x32_cluster_attributes
3463*c83a76b0SSuyog Pawar * (
3464*c83a76b0SSuyog Pawar * cluster_32x32_blk_t *ps_blk_32x32,
3465*c83a76b0SSuyog Pawar * cluster_data_t *ps_cluster_data
3466*c83a76b0SSuyog Pawar * )
3467*c83a76b0SSuyog Pawar *
3468*c83a76b0SSuyog Pawar * @brief Updates attributes for 32x32 clusters based on the attributes of
3469*c83a76b0SSuyog Pawar * the constituent 16x16 clusters
3470*c83a76b0SSuyog Pawar *
3471*c83a76b0SSuyog Pawar * @param[out] ps_blk_32x32: structure containing 32x32 block results
3472*c83a76b0SSuyog Pawar *
3473*c83a76b0SSuyog Pawar * @param[in] ps_cluster_data : structure containing 16x16 block results
3474*c83a76b0SSuyog Pawar *
3475*c83a76b0SSuyog Pawar * @return None
3476*c83a76b0SSuyog Pawar ********************************************************************************
3477*c83a76b0SSuyog Pawar */
hme_update_32x32_cluster_attributes(cluster_32x32_blk_t * ps_blk_32x32,cluster_data_t * ps_cluster_data)3478*c83a76b0SSuyog Pawar void hme_update_32x32_cluster_attributes(
3479*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480*c83a76b0SSuyog Pawar {
3481*c83a76b0SSuyog Pawar cluster_data_t *ps_cur_cluster_32;
3482*c83a76b0SSuyog Pawar
3483*c83a76b0SSuyog Pawar S32 i;
3484*c83a76b0SSuyog Pawar S32 mvd_limit;
3485*c83a76b0SSuyog Pawar
3486*c83a76b0SSuyog Pawar S32 num_clusters = ps_blk_32x32->num_clusters;
3487*c83a76b0SSuyog Pawar
3488*c83a76b0SSuyog Pawar if(0 == num_clusters)
3489*c83a76b0SSuyog Pawar {
3490*c83a76b0SSuyog Pawar ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491*c83a76b0SSuyog Pawar
3492*c83a76b0SSuyog Pawar ps_blk_32x32->num_clusters++;
3493*c83a76b0SSuyog Pawar ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494*c83a76b0SSuyog Pawar
3495*c83a76b0SSuyog Pawar ps_cur_cluster_32->is_valid_cluster = 1;
3496*c83a76b0SSuyog Pawar
3497*c83a76b0SSuyog Pawar ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498*c83a76b0SSuyog Pawar ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499*c83a76b0SSuyog Pawar ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500*c83a76b0SSuyog Pawar
3501*c83a76b0SSuyog Pawar memcpy(
3502*c83a76b0SSuyog Pawar ps_cur_cluster_32->as_mv,
3503*c83a76b0SSuyog Pawar ps_cluster_data->as_mv,
3504*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505*c83a76b0SSuyog Pawar
3506*c83a76b0SSuyog Pawar ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507*c83a76b0SSuyog Pawar
3508*c83a76b0SSuyog Pawar ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509*c83a76b0SSuyog Pawar
3510*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512*c83a76b0SSuyog Pawar ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513*c83a76b0SSuyog Pawar ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514*c83a76b0SSuyog Pawar
3515*c83a76b0SSuyog Pawar ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516*c83a76b0SSuyog Pawar }
3517*c83a76b0SSuyog Pawar else
3518*c83a76b0SSuyog Pawar {
3519*c83a76b0SSuyog Pawar centroid_t *ps_centroid;
3520*c83a76b0SSuyog Pawar
3521*c83a76b0SSuyog Pawar S32 cur_posx_q8, cur_posy_q8;
3522*c83a76b0SSuyog Pawar S32 min_mvd_cluster_id = -1;
3523*c83a76b0SSuyog Pawar S32 mvd;
3524*c83a76b0SSuyog Pawar S32 mvdx;
3525*c83a76b0SSuyog Pawar S32 mvdy;
3526*c83a76b0SSuyog Pawar S32 mvdx_min;
3527*c83a76b0SSuyog Pawar S32 mvdy_min;
3528*c83a76b0SSuyog Pawar S32 mvdx_q8;
3529*c83a76b0SSuyog Pawar S32 mvdy_q8;
3530*c83a76b0SSuyog Pawar
3531*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
3532*c83a76b0SSuyog Pawar
3533*c83a76b0SSuyog Pawar S32 mvd_min = MAX_32BIT_VAL;
3534*c83a76b0SSuyog Pawar
3535*c83a76b0SSuyog Pawar S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536*c83a76b0SSuyog Pawar S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537*c83a76b0SSuyog Pawar
3538*c83a76b0SSuyog Pawar for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539*c83a76b0SSuyog Pawar {
3540*c83a76b0SSuyog Pawar ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541*c83a76b0SSuyog Pawar
3542*c83a76b0SSuyog Pawar if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543*c83a76b0SSuyog Pawar {
3544*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3545*c83a76b0SSuyog Pawar continue;
3546*c83a76b0SSuyog Pawar }
3547*c83a76b0SSuyog Pawar if(!ps_cluster_data->is_valid_cluster)
3548*c83a76b0SSuyog Pawar {
3549*c83a76b0SSuyog Pawar continue;
3550*c83a76b0SSuyog Pawar }
3551*c83a76b0SSuyog Pawar
3552*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3553*c83a76b0SSuyog Pawar
3554*c83a76b0SSuyog Pawar ps_centroid = &ps_cur_cluster_32->s_centroid;
3555*c83a76b0SSuyog Pawar
3556*c83a76b0SSuyog Pawar cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557*c83a76b0SSuyog Pawar cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558*c83a76b0SSuyog Pawar
3559*c83a76b0SSuyog Pawar mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560*c83a76b0SSuyog Pawar mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561*c83a76b0SSuyog Pawar
3562*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564*c83a76b0SSuyog Pawar
3565*c83a76b0SSuyog Pawar mvd = ABS(mvdx) + ABS(mvdy);
3566*c83a76b0SSuyog Pawar
3567*c83a76b0SSuyog Pawar if(mvd < mvd_min)
3568*c83a76b0SSuyog Pawar {
3569*c83a76b0SSuyog Pawar mvd_min = mvd;
3570*c83a76b0SSuyog Pawar mvdx_min = mvdx;
3571*c83a76b0SSuyog Pawar mvdy_min = mvdy;
3572*c83a76b0SSuyog Pawar min_mvd_cluster_id = i;
3573*c83a76b0SSuyog Pawar }
3574*c83a76b0SSuyog Pawar }
3575*c83a76b0SSuyog Pawar
3576*c83a76b0SSuyog Pawar ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577*c83a76b0SSuyog Pawar
3578*c83a76b0SSuyog Pawar mvd_limit = (min_mvd_cluster_id == -1)
3579*c83a76b0SSuyog Pawar ? ps_cur_cluster_32[0].max_dist_from_centroid
3580*c83a76b0SSuyog Pawar : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581*c83a76b0SSuyog Pawar
3582*c83a76b0SSuyog Pawar if(mvd_min <= mvd_limit)
3583*c83a76b0SSuyog Pawar {
3584*c83a76b0SSuyog Pawar LWORD64 i8_updated_posx;
3585*c83a76b0SSuyog Pawar LWORD64 i8_updated_posy;
3586*c83a76b0SSuyog Pawar WORD32 minmax_updated_x = 0;
3587*c83a76b0SSuyog Pawar WORD32 minmax_updated_y = 0;
3588*c83a76b0SSuyog Pawar
3589*c83a76b0SSuyog Pawar ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590*c83a76b0SSuyog Pawar
3591*c83a76b0SSuyog Pawar ps_centroid = &ps_cur_cluster_32->s_centroid;
3592*c83a76b0SSuyog Pawar
3593*c83a76b0SSuyog Pawar ps_cur_cluster_32->is_valid_cluster = 1;
3594*c83a76b0SSuyog Pawar
3595*c83a76b0SSuyog Pawar ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596*c83a76b0SSuyog Pawar ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597*c83a76b0SSuyog Pawar ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598*c83a76b0SSuyog Pawar
3599*c83a76b0SSuyog Pawar memcpy(
3600*c83a76b0SSuyog Pawar &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601*c83a76b0SSuyog Pawar ps_cluster_data->as_mv,
3602*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603*c83a76b0SSuyog Pawar
3604*c83a76b0SSuyog Pawar if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605*c83a76b0SSuyog Pawar {
3606*c83a76b0SSuyog Pawar ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607*c83a76b0SSuyog Pawar minmax_updated_x = 1;
3608*c83a76b0SSuyog Pawar }
3609*c83a76b0SSuyog Pawar else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610*c83a76b0SSuyog Pawar {
3611*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612*c83a76b0SSuyog Pawar minmax_updated_x = 2;
3613*c83a76b0SSuyog Pawar }
3614*c83a76b0SSuyog Pawar
3615*c83a76b0SSuyog Pawar if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616*c83a76b0SSuyog Pawar {
3617*c83a76b0SSuyog Pawar ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618*c83a76b0SSuyog Pawar minmax_updated_y = 1;
3619*c83a76b0SSuyog Pawar }
3620*c83a76b0SSuyog Pawar else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621*c83a76b0SSuyog Pawar {
3622*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623*c83a76b0SSuyog Pawar minmax_updated_y = 2;
3624*c83a76b0SSuyog Pawar }
3625*c83a76b0SSuyog Pawar
3626*c83a76b0SSuyog Pawar switch((minmax_updated_y << 2) + minmax_updated_x)
3627*c83a76b0SSuyog Pawar {
3628*c83a76b0SSuyog Pawar case 1:
3629*c83a76b0SSuyog Pawar {
3630*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3631*c83a76b0SSuyog Pawar
3632*c83a76b0SSuyog Pawar mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3634*c83a76b0SSuyog Pawar
3635*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
3636*c83a76b0SSuyog Pawar {
3637*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638*c83a76b0SSuyog Pawar }
3639*c83a76b0SSuyog Pawar break;
3640*c83a76b0SSuyog Pawar }
3641*c83a76b0SSuyog Pawar case 2:
3642*c83a76b0SSuyog Pawar {
3643*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3644*c83a76b0SSuyog Pawar
3645*c83a76b0SSuyog Pawar mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3647*c83a76b0SSuyog Pawar
3648*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
3649*c83a76b0SSuyog Pawar {
3650*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651*c83a76b0SSuyog Pawar }
3652*c83a76b0SSuyog Pawar break;
3653*c83a76b0SSuyog Pawar }
3654*c83a76b0SSuyog Pawar case 4:
3655*c83a76b0SSuyog Pawar {
3656*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3657*c83a76b0SSuyog Pawar
3658*c83a76b0SSuyog Pawar mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3660*c83a76b0SSuyog Pawar
3661*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
3662*c83a76b0SSuyog Pawar {
3663*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664*c83a76b0SSuyog Pawar }
3665*c83a76b0SSuyog Pawar break;
3666*c83a76b0SSuyog Pawar }
3667*c83a76b0SSuyog Pawar case 5:
3668*c83a76b0SSuyog Pawar {
3669*c83a76b0SSuyog Pawar S32 mvd;
3670*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3671*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3672*c83a76b0SSuyog Pawar
3673*c83a76b0SSuyog Pawar mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675*c83a76b0SSuyog Pawar
3676*c83a76b0SSuyog Pawar mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678*c83a76b0SSuyog Pawar
3679*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680*c83a76b0SSuyog Pawar
3681*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
3682*c83a76b0SSuyog Pawar {
3683*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684*c83a76b0SSuyog Pawar }
3685*c83a76b0SSuyog Pawar break;
3686*c83a76b0SSuyog Pawar }
3687*c83a76b0SSuyog Pawar case 6:
3688*c83a76b0SSuyog Pawar {
3689*c83a76b0SSuyog Pawar S32 mvd;
3690*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3691*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3692*c83a76b0SSuyog Pawar
3693*c83a76b0SSuyog Pawar mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695*c83a76b0SSuyog Pawar
3696*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698*c83a76b0SSuyog Pawar
3699*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700*c83a76b0SSuyog Pawar
3701*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
3702*c83a76b0SSuyog Pawar {
3703*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704*c83a76b0SSuyog Pawar }
3705*c83a76b0SSuyog Pawar break;
3706*c83a76b0SSuyog Pawar }
3707*c83a76b0SSuyog Pawar case 8:
3708*c83a76b0SSuyog Pawar {
3709*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3710*c83a76b0SSuyog Pawar
3711*c83a76b0SSuyog Pawar mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3713*c83a76b0SSuyog Pawar
3714*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
3715*c83a76b0SSuyog Pawar {
3716*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717*c83a76b0SSuyog Pawar }
3718*c83a76b0SSuyog Pawar break;
3719*c83a76b0SSuyog Pawar }
3720*c83a76b0SSuyog Pawar case 9:
3721*c83a76b0SSuyog Pawar {
3722*c83a76b0SSuyog Pawar S32 mvd;
3723*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3724*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3725*c83a76b0SSuyog Pawar
3726*c83a76b0SSuyog Pawar mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728*c83a76b0SSuyog Pawar
3729*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731*c83a76b0SSuyog Pawar
3732*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733*c83a76b0SSuyog Pawar
3734*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
3735*c83a76b0SSuyog Pawar {
3736*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737*c83a76b0SSuyog Pawar }
3738*c83a76b0SSuyog Pawar break;
3739*c83a76b0SSuyog Pawar }
3740*c83a76b0SSuyog Pawar case 10:
3741*c83a76b0SSuyog Pawar {
3742*c83a76b0SSuyog Pawar S32 mvd;
3743*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
3744*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
3745*c83a76b0SSuyog Pawar
3746*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748*c83a76b0SSuyog Pawar
3749*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751*c83a76b0SSuyog Pawar
3752*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753*c83a76b0SSuyog Pawar
3754*c83a76b0SSuyog Pawar if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755*c83a76b0SSuyog Pawar {
3756*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757*c83a76b0SSuyog Pawar }
3758*c83a76b0SSuyog Pawar break;
3759*c83a76b0SSuyog Pawar }
3760*c83a76b0SSuyog Pawar default:
3761*c83a76b0SSuyog Pawar {
3762*c83a76b0SSuyog Pawar break;
3763*c83a76b0SSuyog Pawar }
3764*c83a76b0SSuyog Pawar }
3765*c83a76b0SSuyog Pawar
3766*c83a76b0SSuyog Pawar i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767*c83a76b0SSuyog Pawar ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768*c83a76b0SSuyog Pawar i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769*c83a76b0SSuyog Pawar ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770*c83a76b0SSuyog Pawar
3771*c83a76b0SSuyog Pawar ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772*c83a76b0SSuyog Pawar
3773*c83a76b0SSuyog Pawar ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774*c83a76b0SSuyog Pawar ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775*c83a76b0SSuyog Pawar }
3776*c83a76b0SSuyog Pawar else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777*c83a76b0SSuyog Pawar {
3778*c83a76b0SSuyog Pawar ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779*c83a76b0SSuyog Pawar
3780*c83a76b0SSuyog Pawar ps_blk_32x32->num_clusters++;
3781*c83a76b0SSuyog Pawar ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782*c83a76b0SSuyog Pawar
3783*c83a76b0SSuyog Pawar ps_cur_cluster_32->is_valid_cluster = 1;
3784*c83a76b0SSuyog Pawar
3785*c83a76b0SSuyog Pawar ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786*c83a76b0SSuyog Pawar ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787*c83a76b0SSuyog Pawar ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788*c83a76b0SSuyog Pawar
3789*c83a76b0SSuyog Pawar memcpy(
3790*c83a76b0SSuyog Pawar ps_cur_cluster_32->as_mv,
3791*c83a76b0SSuyog Pawar ps_cluster_data->as_mv,
3792*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793*c83a76b0SSuyog Pawar
3794*c83a76b0SSuyog Pawar ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795*c83a76b0SSuyog Pawar
3796*c83a76b0SSuyog Pawar ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797*c83a76b0SSuyog Pawar
3798*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799*c83a76b0SSuyog Pawar ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800*c83a76b0SSuyog Pawar ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801*c83a76b0SSuyog Pawar ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802*c83a76b0SSuyog Pawar
3803*c83a76b0SSuyog Pawar ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804*c83a76b0SSuyog Pawar }
3805*c83a76b0SSuyog Pawar }
3806*c83a76b0SSuyog Pawar }
3807*c83a76b0SSuyog Pawar
3808*c83a76b0SSuyog Pawar /**
3809*c83a76b0SSuyog Pawar ********************************************************************************
3810*c83a76b0SSuyog Pawar * @fn void hme_update_64x64_cluster_attributes
3811*c83a76b0SSuyog Pawar * (
3812*c83a76b0SSuyog Pawar * cluster_64x64_blk_t *ps_blk_32x32,
3813*c83a76b0SSuyog Pawar * cluster_data_t *ps_cluster_data
3814*c83a76b0SSuyog Pawar * )
3815*c83a76b0SSuyog Pawar *
3816*c83a76b0SSuyog Pawar * @brief Updates attributes for 64x64 clusters based on the attributes of
3817*c83a76b0SSuyog Pawar * the constituent 16x16 clusters
3818*c83a76b0SSuyog Pawar *
3819*c83a76b0SSuyog Pawar * @param[out] ps_blk_64x64: structure containing 64x64 block results
3820*c83a76b0SSuyog Pawar *
3821*c83a76b0SSuyog Pawar * @param[in] ps_cluster_data : structure containing 32x32 block results
3822*c83a76b0SSuyog Pawar *
3823*c83a76b0SSuyog Pawar * @return None
3824*c83a76b0SSuyog Pawar ********************************************************************************
3825*c83a76b0SSuyog Pawar */
hme_update_64x64_cluster_attributes(cluster_64x64_blk_t * ps_blk_64x64,cluster_data_t * ps_cluster_data)3826*c83a76b0SSuyog Pawar void hme_update_64x64_cluster_attributes(
3827*c83a76b0SSuyog Pawar cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828*c83a76b0SSuyog Pawar {
3829*c83a76b0SSuyog Pawar cluster_data_t *ps_cur_cluster_64;
3830*c83a76b0SSuyog Pawar
3831*c83a76b0SSuyog Pawar S32 i;
3832*c83a76b0SSuyog Pawar S32 mvd_limit;
3833*c83a76b0SSuyog Pawar
3834*c83a76b0SSuyog Pawar S32 num_clusters = ps_blk_64x64->num_clusters;
3835*c83a76b0SSuyog Pawar
3836*c83a76b0SSuyog Pawar if(0 == num_clusters)
3837*c83a76b0SSuyog Pawar {
3838*c83a76b0SSuyog Pawar ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839*c83a76b0SSuyog Pawar
3840*c83a76b0SSuyog Pawar ps_blk_64x64->num_clusters++;
3841*c83a76b0SSuyog Pawar ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842*c83a76b0SSuyog Pawar
3843*c83a76b0SSuyog Pawar ps_cur_cluster_64->is_valid_cluster = 1;
3844*c83a76b0SSuyog Pawar
3845*c83a76b0SSuyog Pawar ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846*c83a76b0SSuyog Pawar ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847*c83a76b0SSuyog Pawar ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848*c83a76b0SSuyog Pawar
3849*c83a76b0SSuyog Pawar memcpy(
3850*c83a76b0SSuyog Pawar ps_cur_cluster_64->as_mv,
3851*c83a76b0SSuyog Pawar ps_cluster_data->as_mv,
3852*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853*c83a76b0SSuyog Pawar
3854*c83a76b0SSuyog Pawar ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855*c83a76b0SSuyog Pawar
3856*c83a76b0SSuyog Pawar ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857*c83a76b0SSuyog Pawar
3858*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860*c83a76b0SSuyog Pawar ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861*c83a76b0SSuyog Pawar ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862*c83a76b0SSuyog Pawar
3863*c83a76b0SSuyog Pawar ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864*c83a76b0SSuyog Pawar }
3865*c83a76b0SSuyog Pawar else
3866*c83a76b0SSuyog Pawar {
3867*c83a76b0SSuyog Pawar centroid_t *ps_centroid;
3868*c83a76b0SSuyog Pawar
3869*c83a76b0SSuyog Pawar S32 cur_posx_q8, cur_posy_q8;
3870*c83a76b0SSuyog Pawar S32 min_mvd_cluster_id = -1;
3871*c83a76b0SSuyog Pawar S32 mvd;
3872*c83a76b0SSuyog Pawar S32 mvdx;
3873*c83a76b0SSuyog Pawar S32 mvdy;
3874*c83a76b0SSuyog Pawar S32 mvdx_min;
3875*c83a76b0SSuyog Pawar S32 mvdy_min;
3876*c83a76b0SSuyog Pawar S32 mvdx_q8;
3877*c83a76b0SSuyog Pawar S32 mvdy_q8;
3878*c83a76b0SSuyog Pawar
3879*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
3880*c83a76b0SSuyog Pawar
3881*c83a76b0SSuyog Pawar S32 mvd_min = MAX_32BIT_VAL;
3882*c83a76b0SSuyog Pawar
3883*c83a76b0SSuyog Pawar S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884*c83a76b0SSuyog Pawar S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885*c83a76b0SSuyog Pawar
3886*c83a76b0SSuyog Pawar for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887*c83a76b0SSuyog Pawar {
3888*c83a76b0SSuyog Pawar ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889*c83a76b0SSuyog Pawar
3890*c83a76b0SSuyog Pawar if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891*c83a76b0SSuyog Pawar {
3892*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3893*c83a76b0SSuyog Pawar continue;
3894*c83a76b0SSuyog Pawar }
3895*c83a76b0SSuyog Pawar
3896*c83a76b0SSuyog Pawar if(!ps_cur_cluster_64->is_valid_cluster)
3897*c83a76b0SSuyog Pawar {
3898*c83a76b0SSuyog Pawar continue;
3899*c83a76b0SSuyog Pawar }
3900*c83a76b0SSuyog Pawar
3901*c83a76b0SSuyog Pawar num_clusters_evaluated++;
3902*c83a76b0SSuyog Pawar
3903*c83a76b0SSuyog Pawar ps_centroid = &ps_cur_cluster_64->s_centroid;
3904*c83a76b0SSuyog Pawar
3905*c83a76b0SSuyog Pawar cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906*c83a76b0SSuyog Pawar cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907*c83a76b0SSuyog Pawar
3908*c83a76b0SSuyog Pawar mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909*c83a76b0SSuyog Pawar mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910*c83a76b0SSuyog Pawar
3911*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913*c83a76b0SSuyog Pawar
3914*c83a76b0SSuyog Pawar mvd = ABS(mvdx) + ABS(mvdy);
3915*c83a76b0SSuyog Pawar
3916*c83a76b0SSuyog Pawar if(mvd < mvd_min)
3917*c83a76b0SSuyog Pawar {
3918*c83a76b0SSuyog Pawar mvd_min = mvd;
3919*c83a76b0SSuyog Pawar mvdx_min = mvdx;
3920*c83a76b0SSuyog Pawar mvdy_min = mvdy;
3921*c83a76b0SSuyog Pawar min_mvd_cluster_id = i;
3922*c83a76b0SSuyog Pawar }
3923*c83a76b0SSuyog Pawar }
3924*c83a76b0SSuyog Pawar
3925*c83a76b0SSuyog Pawar ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926*c83a76b0SSuyog Pawar
3927*c83a76b0SSuyog Pawar mvd_limit = (min_mvd_cluster_id == -1)
3928*c83a76b0SSuyog Pawar ? ps_cur_cluster_64[0].max_dist_from_centroid
3929*c83a76b0SSuyog Pawar : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930*c83a76b0SSuyog Pawar
3931*c83a76b0SSuyog Pawar if(mvd_min <= mvd_limit)
3932*c83a76b0SSuyog Pawar {
3933*c83a76b0SSuyog Pawar LWORD64 i8_updated_posx;
3934*c83a76b0SSuyog Pawar LWORD64 i8_updated_posy;
3935*c83a76b0SSuyog Pawar WORD32 minmax_updated_x = 0;
3936*c83a76b0SSuyog Pawar WORD32 minmax_updated_y = 0;
3937*c83a76b0SSuyog Pawar
3938*c83a76b0SSuyog Pawar ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939*c83a76b0SSuyog Pawar
3940*c83a76b0SSuyog Pawar ps_centroid = &ps_cur_cluster_64->s_centroid;
3941*c83a76b0SSuyog Pawar
3942*c83a76b0SSuyog Pawar ps_cur_cluster_64->is_valid_cluster = 1;
3943*c83a76b0SSuyog Pawar
3944*c83a76b0SSuyog Pawar ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945*c83a76b0SSuyog Pawar ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946*c83a76b0SSuyog Pawar ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947*c83a76b0SSuyog Pawar
3948*c83a76b0SSuyog Pawar memcpy(
3949*c83a76b0SSuyog Pawar &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950*c83a76b0SSuyog Pawar ps_cluster_data->as_mv,
3951*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952*c83a76b0SSuyog Pawar
3953*c83a76b0SSuyog Pawar if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954*c83a76b0SSuyog Pawar {
3955*c83a76b0SSuyog Pawar ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956*c83a76b0SSuyog Pawar minmax_updated_x = 1;
3957*c83a76b0SSuyog Pawar }
3958*c83a76b0SSuyog Pawar else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959*c83a76b0SSuyog Pawar {
3960*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961*c83a76b0SSuyog Pawar minmax_updated_x = 2;
3962*c83a76b0SSuyog Pawar }
3963*c83a76b0SSuyog Pawar
3964*c83a76b0SSuyog Pawar if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965*c83a76b0SSuyog Pawar {
3966*c83a76b0SSuyog Pawar ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967*c83a76b0SSuyog Pawar minmax_updated_y = 1;
3968*c83a76b0SSuyog Pawar }
3969*c83a76b0SSuyog Pawar else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970*c83a76b0SSuyog Pawar {
3971*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972*c83a76b0SSuyog Pawar minmax_updated_y = 2;
3973*c83a76b0SSuyog Pawar }
3974*c83a76b0SSuyog Pawar
3975*c83a76b0SSuyog Pawar switch((minmax_updated_y << 2) + minmax_updated_x)
3976*c83a76b0SSuyog Pawar {
3977*c83a76b0SSuyog Pawar case 1:
3978*c83a76b0SSuyog Pawar {
3979*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3980*c83a76b0SSuyog Pawar
3981*c83a76b0SSuyog Pawar mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3983*c83a76b0SSuyog Pawar
3984*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
3985*c83a76b0SSuyog Pawar {
3986*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987*c83a76b0SSuyog Pawar }
3988*c83a76b0SSuyog Pawar break;
3989*c83a76b0SSuyog Pawar }
3990*c83a76b0SSuyog Pawar case 2:
3991*c83a76b0SSuyog Pawar {
3992*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
3993*c83a76b0SSuyog Pawar
3994*c83a76b0SSuyog Pawar mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
3996*c83a76b0SSuyog Pawar
3997*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
3998*c83a76b0SSuyog Pawar {
3999*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000*c83a76b0SSuyog Pawar }
4001*c83a76b0SSuyog Pawar break;
4002*c83a76b0SSuyog Pawar }
4003*c83a76b0SSuyog Pawar case 4:
4004*c83a76b0SSuyog Pawar {
4005*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
4006*c83a76b0SSuyog Pawar
4007*c83a76b0SSuyog Pawar mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
4009*c83a76b0SSuyog Pawar
4010*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
4011*c83a76b0SSuyog Pawar {
4012*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013*c83a76b0SSuyog Pawar }
4014*c83a76b0SSuyog Pawar break;
4015*c83a76b0SSuyog Pawar }
4016*c83a76b0SSuyog Pawar case 5:
4017*c83a76b0SSuyog Pawar {
4018*c83a76b0SSuyog Pawar S32 mvd;
4019*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4020*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4021*c83a76b0SSuyog Pawar
4022*c83a76b0SSuyog Pawar mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024*c83a76b0SSuyog Pawar
4025*c83a76b0SSuyog Pawar mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027*c83a76b0SSuyog Pawar
4028*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029*c83a76b0SSuyog Pawar
4030*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
4031*c83a76b0SSuyog Pawar {
4032*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033*c83a76b0SSuyog Pawar }
4034*c83a76b0SSuyog Pawar break;
4035*c83a76b0SSuyog Pawar }
4036*c83a76b0SSuyog Pawar case 6:
4037*c83a76b0SSuyog Pawar {
4038*c83a76b0SSuyog Pawar S32 mvd;
4039*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4040*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4041*c83a76b0SSuyog Pawar
4042*c83a76b0SSuyog Pawar mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044*c83a76b0SSuyog Pawar
4045*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047*c83a76b0SSuyog Pawar
4048*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049*c83a76b0SSuyog Pawar
4050*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
4051*c83a76b0SSuyog Pawar {
4052*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053*c83a76b0SSuyog Pawar }
4054*c83a76b0SSuyog Pawar break;
4055*c83a76b0SSuyog Pawar }
4056*c83a76b0SSuyog Pawar case 8:
4057*c83a76b0SSuyog Pawar {
4058*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
4059*c83a76b0SSuyog Pawar
4060*c83a76b0SSuyog Pawar mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
4062*c83a76b0SSuyog Pawar
4063*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
4064*c83a76b0SSuyog Pawar {
4065*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066*c83a76b0SSuyog Pawar }
4067*c83a76b0SSuyog Pawar break;
4068*c83a76b0SSuyog Pawar }
4069*c83a76b0SSuyog Pawar case 9:
4070*c83a76b0SSuyog Pawar {
4071*c83a76b0SSuyog Pawar S32 mvd;
4072*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4073*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4074*c83a76b0SSuyog Pawar
4075*c83a76b0SSuyog Pawar mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077*c83a76b0SSuyog Pawar
4078*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080*c83a76b0SSuyog Pawar
4081*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082*c83a76b0SSuyog Pawar
4083*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
4084*c83a76b0SSuyog Pawar {
4085*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086*c83a76b0SSuyog Pawar }
4087*c83a76b0SSuyog Pawar break;
4088*c83a76b0SSuyog Pawar }
4089*c83a76b0SSuyog Pawar case 10:
4090*c83a76b0SSuyog Pawar {
4091*c83a76b0SSuyog Pawar S32 mvd;
4092*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4093*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4094*c83a76b0SSuyog Pawar
4095*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097*c83a76b0SSuyog Pawar
4098*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100*c83a76b0SSuyog Pawar
4101*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102*c83a76b0SSuyog Pawar
4103*c83a76b0SSuyog Pawar if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104*c83a76b0SSuyog Pawar {
4105*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106*c83a76b0SSuyog Pawar }
4107*c83a76b0SSuyog Pawar break;
4108*c83a76b0SSuyog Pawar }
4109*c83a76b0SSuyog Pawar default:
4110*c83a76b0SSuyog Pawar {
4111*c83a76b0SSuyog Pawar break;
4112*c83a76b0SSuyog Pawar }
4113*c83a76b0SSuyog Pawar }
4114*c83a76b0SSuyog Pawar
4115*c83a76b0SSuyog Pawar i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116*c83a76b0SSuyog Pawar ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117*c83a76b0SSuyog Pawar i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118*c83a76b0SSuyog Pawar ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119*c83a76b0SSuyog Pawar
4120*c83a76b0SSuyog Pawar ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121*c83a76b0SSuyog Pawar
4122*c83a76b0SSuyog Pawar ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123*c83a76b0SSuyog Pawar ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124*c83a76b0SSuyog Pawar }
4125*c83a76b0SSuyog Pawar else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126*c83a76b0SSuyog Pawar {
4127*c83a76b0SSuyog Pawar ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128*c83a76b0SSuyog Pawar
4129*c83a76b0SSuyog Pawar ps_blk_64x64->num_clusters++;
4130*c83a76b0SSuyog Pawar ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131*c83a76b0SSuyog Pawar
4132*c83a76b0SSuyog Pawar ps_cur_cluster_64->is_valid_cluster = 1;
4133*c83a76b0SSuyog Pawar
4134*c83a76b0SSuyog Pawar ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135*c83a76b0SSuyog Pawar ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136*c83a76b0SSuyog Pawar ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137*c83a76b0SSuyog Pawar
4138*c83a76b0SSuyog Pawar memcpy(
4139*c83a76b0SSuyog Pawar &ps_cur_cluster_64->as_mv[0],
4140*c83a76b0SSuyog Pawar ps_cluster_data->as_mv,
4141*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142*c83a76b0SSuyog Pawar
4143*c83a76b0SSuyog Pawar ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144*c83a76b0SSuyog Pawar
4145*c83a76b0SSuyog Pawar ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146*c83a76b0SSuyog Pawar
4147*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148*c83a76b0SSuyog Pawar ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149*c83a76b0SSuyog Pawar ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150*c83a76b0SSuyog Pawar ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151*c83a76b0SSuyog Pawar
4152*c83a76b0SSuyog Pawar ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153*c83a76b0SSuyog Pawar }
4154*c83a76b0SSuyog Pawar }
4155*c83a76b0SSuyog Pawar }
4156*c83a76b0SSuyog Pawar
4157*c83a76b0SSuyog Pawar /**
4158*c83a76b0SSuyog Pawar ********************************************************************************
4159*c83a76b0SSuyog Pawar * @fn void hme_update_32x32_clusters
4160*c83a76b0SSuyog Pawar * (
4161*c83a76b0SSuyog Pawar * cluster_32x32_blk_t *ps_blk_32x32,
4162*c83a76b0SSuyog Pawar * cluster_16x16_blk_t *ps_blk_16x16
4163*c83a76b0SSuyog Pawar * )
4164*c83a76b0SSuyog Pawar *
4165*c83a76b0SSuyog Pawar * @brief Updates attributes for 32x32 clusters based on the attributes of
4166*c83a76b0SSuyog Pawar * the constituent 16x16 clusters
4167*c83a76b0SSuyog Pawar *
4168*c83a76b0SSuyog Pawar * @param[out] ps_blk_32x32: structure containing 32x32 block results
4169*c83a76b0SSuyog Pawar *
4170*c83a76b0SSuyog Pawar * @param[in] ps_blk_16x16 : structure containing 16x16 block results
4171*c83a76b0SSuyog Pawar *
4172*c83a76b0SSuyog Pawar * @return None
4173*c83a76b0SSuyog Pawar ********************************************************************************
4174*c83a76b0SSuyog Pawar */
4175*c83a76b0SSuyog Pawar static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t * ps_blk_32x32,cluster_16x16_blk_t * ps_blk_16x16)4176*c83a76b0SSuyog Pawar hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177*c83a76b0SSuyog Pawar {
4178*c83a76b0SSuyog Pawar cluster_16x16_blk_t *ps_blk_16x16_cur;
4179*c83a76b0SSuyog Pawar cluster_data_t *ps_cur_cluster;
4180*c83a76b0SSuyog Pawar
4181*c83a76b0SSuyog Pawar S32 i, j;
4182*c83a76b0SSuyog Pawar S32 num_clusters_cur_16x16_blk;
4183*c83a76b0SSuyog Pawar
4184*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
4185*c83a76b0SSuyog Pawar {
4186*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
4187*c83a76b0SSuyog Pawar
4188*c83a76b0SSuyog Pawar ps_blk_16x16_cur = &ps_blk_16x16[i];
4189*c83a76b0SSuyog Pawar
4190*c83a76b0SSuyog Pawar num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191*c83a76b0SSuyog Pawar
4192*c83a76b0SSuyog Pawar ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193*c83a76b0SSuyog Pawar
4194*c83a76b0SSuyog Pawar ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195*c83a76b0SSuyog Pawar
4196*c83a76b0SSuyog Pawar for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197*c83a76b0SSuyog Pawar {
4198*c83a76b0SSuyog Pawar ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199*c83a76b0SSuyog Pawar
4200*c83a76b0SSuyog Pawar if(!ps_cur_cluster->is_valid_cluster)
4201*c83a76b0SSuyog Pawar {
4202*c83a76b0SSuyog Pawar continue;
4203*c83a76b0SSuyog Pawar }
4204*c83a76b0SSuyog Pawar
4205*c83a76b0SSuyog Pawar hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206*c83a76b0SSuyog Pawar
4207*c83a76b0SSuyog Pawar num_clusters_evaluated++;
4208*c83a76b0SSuyog Pawar }
4209*c83a76b0SSuyog Pawar }
4210*c83a76b0SSuyog Pawar }
4211*c83a76b0SSuyog Pawar
4212*c83a76b0SSuyog Pawar /**
4213*c83a76b0SSuyog Pawar ********************************************************************************
4214*c83a76b0SSuyog Pawar * @fn void hme_update_64x64_clusters
4215*c83a76b0SSuyog Pawar * (
4216*c83a76b0SSuyog Pawar * cluster_64x64_blk_t *ps_blk_64x64,
4217*c83a76b0SSuyog Pawar * cluster_32x32_blk_t *ps_blk_32x32
4218*c83a76b0SSuyog Pawar * )
4219*c83a76b0SSuyog Pawar *
4220*c83a76b0SSuyog Pawar * @brief Updates attributes for 64x64 clusters based on the attributes of
4221*c83a76b0SSuyog Pawar * the constituent 16x16 clusters
4222*c83a76b0SSuyog Pawar *
4223*c83a76b0SSuyog Pawar * @param[out] ps_blk_64x64: structure containing 32x32 block results
4224*c83a76b0SSuyog Pawar *
4225*c83a76b0SSuyog Pawar * @param[in] ps_blk_32x32 : structure containing 16x16 block results
4226*c83a76b0SSuyog Pawar *
4227*c83a76b0SSuyog Pawar * @return None
4228*c83a76b0SSuyog Pawar ********************************************************************************
4229*c83a76b0SSuyog Pawar */
4230*c83a76b0SSuyog Pawar static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t * ps_blk_64x64,cluster_32x32_blk_t * ps_blk_32x32)4231*c83a76b0SSuyog Pawar hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232*c83a76b0SSuyog Pawar {
4233*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_blk_32x32_cur;
4234*c83a76b0SSuyog Pawar cluster_data_t *ps_cur_cluster;
4235*c83a76b0SSuyog Pawar
4236*c83a76b0SSuyog Pawar S32 i, j;
4237*c83a76b0SSuyog Pawar S32 num_clusters_cur_32x32_blk;
4238*c83a76b0SSuyog Pawar
4239*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
4240*c83a76b0SSuyog Pawar {
4241*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
4242*c83a76b0SSuyog Pawar
4243*c83a76b0SSuyog Pawar ps_blk_32x32_cur = &ps_blk_32x32[i];
4244*c83a76b0SSuyog Pawar
4245*c83a76b0SSuyog Pawar num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246*c83a76b0SSuyog Pawar
4247*c83a76b0SSuyog Pawar ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248*c83a76b0SSuyog Pawar ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249*c83a76b0SSuyog Pawar
4250*c83a76b0SSuyog Pawar for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251*c83a76b0SSuyog Pawar {
4252*c83a76b0SSuyog Pawar ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253*c83a76b0SSuyog Pawar
4254*c83a76b0SSuyog Pawar if(!ps_cur_cluster->is_valid_cluster)
4255*c83a76b0SSuyog Pawar {
4256*c83a76b0SSuyog Pawar continue;
4257*c83a76b0SSuyog Pawar }
4258*c83a76b0SSuyog Pawar
4259*c83a76b0SSuyog Pawar hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260*c83a76b0SSuyog Pawar
4261*c83a76b0SSuyog Pawar num_clusters_evaluated++;
4262*c83a76b0SSuyog Pawar }
4263*c83a76b0SSuyog Pawar }
4264*c83a76b0SSuyog Pawar }
4265*c83a76b0SSuyog Pawar
4266*c83a76b0SSuyog Pawar /**
4267*c83a76b0SSuyog Pawar ********************************************************************************
4268*c83a76b0SSuyog Pawar * @fn void hme_try_merge_clusters_blksize_gt_16
4269*c83a76b0SSuyog Pawar * (
4270*c83a76b0SSuyog Pawar * cluster_data_t *ps_cluster_data,
4271*c83a76b0SSuyog Pawar * S32 num_clusters
4272*c83a76b0SSuyog Pawar * )
4273*c83a76b0SSuyog Pawar *
4274*c83a76b0SSuyog Pawar * @brief Merging clusters from blocks of size 32x32 and greater
4275*c83a76b0SSuyog Pawar *
4276*c83a76b0SSuyog Pawar * @param[in/out] ps_cluster_data: structure containing cluster data
4277*c83a76b0SSuyog Pawar *
4278*c83a76b0SSuyog Pawar * @param[in/out] pi4_num_clusters : pointer to number of clusters
4279*c83a76b0SSuyog Pawar *
4280*c83a76b0SSuyog Pawar * @return Success or failure
4281*c83a76b0SSuyog Pawar ********************************************************************************
4282*c83a76b0SSuyog Pawar */
hme_try_merge_clusters_blksize_gt_16(cluster_data_t * ps_cluster_data,S32 num_clusters)4283*c83a76b0SSuyog Pawar S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284*c83a76b0SSuyog Pawar {
4285*c83a76b0SSuyog Pawar centroid_t *ps_cur_centroid;
4286*c83a76b0SSuyog Pawar cluster_data_t *ps_cur_cluster;
4287*c83a76b0SSuyog Pawar
4288*c83a76b0SSuyog Pawar S32 i, mvd;
4289*c83a76b0SSuyog Pawar S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290*c83a76b0SSuyog Pawar
4291*c83a76b0SSuyog Pawar centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292*c83a76b0SSuyog Pawar
4293*c83a76b0SSuyog Pawar S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294*c83a76b0SSuyog Pawar S32 ref_id = ps_cluster_data->ref_id;
4295*c83a76b0SSuyog Pawar
4296*c83a76b0SSuyog Pawar S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297*c83a76b0SSuyog Pawar S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 1;
4299*c83a76b0SSuyog Pawar S32 ret_value = 0;
4300*c83a76b0SSuyog Pawar
4301*c83a76b0SSuyog Pawar if(1 >= num_clusters)
4302*c83a76b0SSuyog Pawar {
4303*c83a76b0SSuyog Pawar return ret_value;
4304*c83a76b0SSuyog Pawar }
4305*c83a76b0SSuyog Pawar
4306*c83a76b0SSuyog Pawar for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307*c83a76b0SSuyog Pawar {
4308*c83a76b0SSuyog Pawar S32 cur_posx_q8;
4309*c83a76b0SSuyog Pawar S32 cur_posy_q8;
4310*c83a76b0SSuyog Pawar
4311*c83a76b0SSuyog Pawar ps_cur_cluster = &ps_cluster_data[i];
4312*c83a76b0SSuyog Pawar
4313*c83a76b0SSuyog Pawar if((ref_id != ps_cur_cluster->ref_id))
4314*c83a76b0SSuyog Pawar {
4315*c83a76b0SSuyog Pawar num_clusters_evaluated++;
4316*c83a76b0SSuyog Pawar continue;
4317*c83a76b0SSuyog Pawar }
4318*c83a76b0SSuyog Pawar
4319*c83a76b0SSuyog Pawar if((!ps_cur_cluster->is_valid_cluster))
4320*c83a76b0SSuyog Pawar {
4321*c83a76b0SSuyog Pawar continue;
4322*c83a76b0SSuyog Pawar }
4323*c83a76b0SSuyog Pawar
4324*c83a76b0SSuyog Pawar num_clusters_evaluated++;
4325*c83a76b0SSuyog Pawar
4326*c83a76b0SSuyog Pawar ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327*c83a76b0SSuyog Pawar
4328*c83a76b0SSuyog Pawar cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329*c83a76b0SSuyog Pawar cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330*c83a76b0SSuyog Pawar
4331*c83a76b0SSuyog Pawar mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332*c83a76b0SSuyog Pawar mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333*c83a76b0SSuyog Pawar
4334*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336*c83a76b0SSuyog Pawar
4337*c83a76b0SSuyog Pawar mvd = ABS(mvdx) + ABS(mvdy);
4338*c83a76b0SSuyog Pawar
4339*c83a76b0SSuyog Pawar if(mvd <= (mvd_limit >> 1))
4340*c83a76b0SSuyog Pawar {
4341*c83a76b0SSuyog Pawar LWORD64 i8_updated_posx;
4342*c83a76b0SSuyog Pawar LWORD64 i8_updated_posy;
4343*c83a76b0SSuyog Pawar WORD32 minmax_updated_x = 0;
4344*c83a76b0SSuyog Pawar WORD32 minmax_updated_y = 0;
4345*c83a76b0SSuyog Pawar
4346*c83a76b0SSuyog Pawar ps_cur_cluster->is_valid_cluster = 0;
4347*c83a76b0SSuyog Pawar
4348*c83a76b0SSuyog Pawar ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349*c83a76b0SSuyog Pawar ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350*c83a76b0SSuyog Pawar ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351*c83a76b0SSuyog Pawar
4352*c83a76b0SSuyog Pawar memcpy(
4353*c83a76b0SSuyog Pawar &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354*c83a76b0SSuyog Pawar ps_cur_cluster->as_mv,
4355*c83a76b0SSuyog Pawar sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356*c83a76b0SSuyog Pawar
4357*c83a76b0SSuyog Pawar if(mvdx > 0)
4358*c83a76b0SSuyog Pawar {
4359*c83a76b0SSuyog Pawar ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360*c83a76b0SSuyog Pawar minmax_updated_x = 1;
4361*c83a76b0SSuyog Pawar }
4362*c83a76b0SSuyog Pawar else
4363*c83a76b0SSuyog Pawar {
4364*c83a76b0SSuyog Pawar ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365*c83a76b0SSuyog Pawar minmax_updated_x = 2;
4366*c83a76b0SSuyog Pawar }
4367*c83a76b0SSuyog Pawar
4368*c83a76b0SSuyog Pawar if(mvdy > 0)
4369*c83a76b0SSuyog Pawar {
4370*c83a76b0SSuyog Pawar ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371*c83a76b0SSuyog Pawar minmax_updated_y = 1;
4372*c83a76b0SSuyog Pawar }
4373*c83a76b0SSuyog Pawar else
4374*c83a76b0SSuyog Pawar {
4375*c83a76b0SSuyog Pawar ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376*c83a76b0SSuyog Pawar minmax_updated_y = 2;
4377*c83a76b0SSuyog Pawar }
4378*c83a76b0SSuyog Pawar
4379*c83a76b0SSuyog Pawar switch((minmax_updated_y << 2) + minmax_updated_x)
4380*c83a76b0SSuyog Pawar {
4381*c83a76b0SSuyog Pawar case 1:
4382*c83a76b0SSuyog Pawar {
4383*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
4384*c83a76b0SSuyog Pawar
4385*c83a76b0SSuyog Pawar mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
4387*c83a76b0SSuyog Pawar
4388*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
4389*c83a76b0SSuyog Pawar {
4390*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4391*c83a76b0SSuyog Pawar }
4392*c83a76b0SSuyog Pawar break;
4393*c83a76b0SSuyog Pawar }
4394*c83a76b0SSuyog Pawar case 2:
4395*c83a76b0SSuyog Pawar {
4396*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
4397*c83a76b0SSuyog Pawar
4398*c83a76b0SSuyog Pawar mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
4400*c83a76b0SSuyog Pawar
4401*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
4402*c83a76b0SSuyog Pawar {
4403*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4404*c83a76b0SSuyog Pawar }
4405*c83a76b0SSuyog Pawar break;
4406*c83a76b0SSuyog Pawar }
4407*c83a76b0SSuyog Pawar case 4:
4408*c83a76b0SSuyog Pawar {
4409*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
4410*c83a76b0SSuyog Pawar
4411*c83a76b0SSuyog Pawar mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
4413*c83a76b0SSuyog Pawar
4414*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
4415*c83a76b0SSuyog Pawar {
4416*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4417*c83a76b0SSuyog Pawar }
4418*c83a76b0SSuyog Pawar break;
4419*c83a76b0SSuyog Pawar }
4420*c83a76b0SSuyog Pawar case 5:
4421*c83a76b0SSuyog Pawar {
4422*c83a76b0SSuyog Pawar S32 mvd;
4423*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4424*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4425*c83a76b0SSuyog Pawar
4426*c83a76b0SSuyog Pawar mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428*c83a76b0SSuyog Pawar
4429*c83a76b0SSuyog Pawar mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431*c83a76b0SSuyog Pawar
4432*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433*c83a76b0SSuyog Pawar
4434*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
4435*c83a76b0SSuyog Pawar {
4436*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4437*c83a76b0SSuyog Pawar }
4438*c83a76b0SSuyog Pawar break;
4439*c83a76b0SSuyog Pawar }
4440*c83a76b0SSuyog Pawar case 6:
4441*c83a76b0SSuyog Pawar {
4442*c83a76b0SSuyog Pawar S32 mvd;
4443*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4444*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4445*c83a76b0SSuyog Pawar
4446*c83a76b0SSuyog Pawar mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448*c83a76b0SSuyog Pawar
4449*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451*c83a76b0SSuyog Pawar
4452*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453*c83a76b0SSuyog Pawar
4454*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
4455*c83a76b0SSuyog Pawar {
4456*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4457*c83a76b0SSuyog Pawar }
4458*c83a76b0SSuyog Pawar break;
4459*c83a76b0SSuyog Pawar }
4460*c83a76b0SSuyog Pawar case 8:
4461*c83a76b0SSuyog Pawar {
4462*c83a76b0SSuyog Pawar S32 mvd, mvd_q8;
4463*c83a76b0SSuyog Pawar
4464*c83a76b0SSuyog Pawar mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465*c83a76b0SSuyog Pawar mvd = (mvd_q8 + (1 << 7)) >> 8;
4466*c83a76b0SSuyog Pawar
4467*c83a76b0SSuyog Pawar if(mvd > (mvd_limit))
4468*c83a76b0SSuyog Pawar {
4469*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4470*c83a76b0SSuyog Pawar }
4471*c83a76b0SSuyog Pawar break;
4472*c83a76b0SSuyog Pawar }
4473*c83a76b0SSuyog Pawar case 9:
4474*c83a76b0SSuyog Pawar {
4475*c83a76b0SSuyog Pawar S32 mvd;
4476*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4477*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4478*c83a76b0SSuyog Pawar
4479*c83a76b0SSuyog Pawar mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481*c83a76b0SSuyog Pawar
4482*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484*c83a76b0SSuyog Pawar
4485*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486*c83a76b0SSuyog Pawar
4487*c83a76b0SSuyog Pawar if(mvd > mvd_limit)
4488*c83a76b0SSuyog Pawar {
4489*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4490*c83a76b0SSuyog Pawar }
4491*c83a76b0SSuyog Pawar break;
4492*c83a76b0SSuyog Pawar }
4493*c83a76b0SSuyog Pawar case 10:
4494*c83a76b0SSuyog Pawar {
4495*c83a76b0SSuyog Pawar S32 mvd;
4496*c83a76b0SSuyog Pawar S32 mvdx, mvdx_q8;
4497*c83a76b0SSuyog Pawar S32 mvdy, mvdy_q8;
4498*c83a76b0SSuyog Pawar
4499*c83a76b0SSuyog Pawar mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500*c83a76b0SSuyog Pawar mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501*c83a76b0SSuyog Pawar
4502*c83a76b0SSuyog Pawar mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503*c83a76b0SSuyog Pawar mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504*c83a76b0SSuyog Pawar
4505*c83a76b0SSuyog Pawar mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506*c83a76b0SSuyog Pawar
4507*c83a76b0SSuyog Pawar if(mvd > ps_cluster_data->max_dist_from_centroid)
4508*c83a76b0SSuyog Pawar {
4509*c83a76b0SSuyog Pawar ps_cluster_data->max_dist_from_centroid = mvd;
4510*c83a76b0SSuyog Pawar }
4511*c83a76b0SSuyog Pawar break;
4512*c83a76b0SSuyog Pawar }
4513*c83a76b0SSuyog Pawar default:
4514*c83a76b0SSuyog Pawar {
4515*c83a76b0SSuyog Pawar break;
4516*c83a76b0SSuyog Pawar }
4517*c83a76b0SSuyog Pawar }
4518*c83a76b0SSuyog Pawar
4519*c83a76b0SSuyog Pawar i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520*c83a76b0SSuyog Pawar ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521*c83a76b0SSuyog Pawar i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522*c83a76b0SSuyog Pawar ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523*c83a76b0SSuyog Pawar
4524*c83a76b0SSuyog Pawar ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525*c83a76b0SSuyog Pawar
4526*c83a76b0SSuyog Pawar ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527*c83a76b0SSuyog Pawar ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528*c83a76b0SSuyog Pawar
4529*c83a76b0SSuyog Pawar if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530*c83a76b0SSuyog Pawar {
4531*c83a76b0SSuyog Pawar num_clusters--;
4532*c83a76b0SSuyog Pawar num_clusters_evaluated = 1;
4533*c83a76b0SSuyog Pawar i = 0;
4534*c83a76b0SSuyog Pawar ret_value++;
4535*c83a76b0SSuyog Pawar }
4536*c83a76b0SSuyog Pawar else
4537*c83a76b0SSuyog Pawar {
4538*c83a76b0SSuyog Pawar ret_value++;
4539*c83a76b0SSuyog Pawar
4540*c83a76b0SSuyog Pawar return ret_value;
4541*c83a76b0SSuyog Pawar }
4542*c83a76b0SSuyog Pawar }
4543*c83a76b0SSuyog Pawar }
4544*c83a76b0SSuyog Pawar
4545*c83a76b0SSuyog Pawar if(ret_value)
4546*c83a76b0SSuyog Pawar {
4547*c83a76b0SSuyog Pawar for(i = 1; i < (num_clusters + ret_value); i++)
4548*c83a76b0SSuyog Pawar {
4549*c83a76b0SSuyog Pawar if(ps_cluster_data[i].is_valid_cluster)
4550*c83a76b0SSuyog Pawar {
4551*c83a76b0SSuyog Pawar break;
4552*c83a76b0SSuyog Pawar }
4553*c83a76b0SSuyog Pawar }
4554*c83a76b0SSuyog Pawar if(i == (num_clusters + ret_value))
4555*c83a76b0SSuyog Pawar {
4556*c83a76b0SSuyog Pawar return ret_value;
4557*c83a76b0SSuyog Pawar }
4558*c83a76b0SSuyog Pawar }
4559*c83a76b0SSuyog Pawar else
4560*c83a76b0SSuyog Pawar {
4561*c83a76b0SSuyog Pawar i = 1;
4562*c83a76b0SSuyog Pawar }
4563*c83a76b0SSuyog Pawar
4564*c83a76b0SSuyog Pawar return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565*c83a76b0SSuyog Pawar ret_value;
4566*c83a76b0SSuyog Pawar }
4567*c83a76b0SSuyog Pawar
4568*c83a76b0SSuyog Pawar /**
4569*c83a76b0SSuyog Pawar ********************************************************************************
4570*c83a76b0SSuyog Pawar * @fn S32 hme_determine_validity_32x32
4571*c83a76b0SSuyog Pawar * (
4572*c83a76b0SSuyog Pawar * ctb_cluster_info_t *ps_ctb_cluster_info
4573*c83a76b0SSuyog Pawar * )
4574*c83a76b0SSuyog Pawar *
4575*c83a76b0SSuyog Pawar * @brief Determines whther current 32x32 block needs to be evaluated in enc_loop
4576*c83a76b0SSuyog Pawar * while recursing through the CU tree or not
4577*c83a76b0SSuyog Pawar *
4578*c83a76b0SSuyog Pawar * @param[in] ps_cluster_data: structure containing cluster data
4579*c83a76b0SSuyog Pawar *
4580*c83a76b0SSuyog Pawar * @return Success or failure
4581*c83a76b0SSuyog Pawar ********************************************************************************
4582*c83a76b0SSuyog Pawar */
hme_determine_validity_32x32(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4583*c83a76b0SSuyog Pawar __inline S32 hme_determine_validity_32x32(
4584*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info,
4585*c83a76b0SSuyog Pawar S32 *pi4_children_nodes_required,
4586*c83a76b0SSuyog Pawar S32 blk_validity_wrt_pic_bndry,
4587*c83a76b0SSuyog Pawar S32 parent_blk_validity_wrt_pic_bndry)
4588*c83a76b0SSuyog Pawar {
4589*c83a76b0SSuyog Pawar cluster_data_t *ps_data;
4590*c83a76b0SSuyog Pawar
4591*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592*c83a76b0SSuyog Pawar cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593*c83a76b0SSuyog Pawar
4594*c83a76b0SSuyog Pawar S32 num_clusters = ps_32x32_blk->num_clusters;
4595*c83a76b0SSuyog Pawar S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596*c83a76b0SSuyog Pawar
4597*c83a76b0SSuyog Pawar if(!blk_validity_wrt_pic_bndry)
4598*c83a76b0SSuyog Pawar {
4599*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4600*c83a76b0SSuyog Pawar return 0;
4601*c83a76b0SSuyog Pawar }
4602*c83a76b0SSuyog Pawar
4603*c83a76b0SSuyog Pawar if(!parent_blk_validity_wrt_pic_bndry)
4604*c83a76b0SSuyog Pawar {
4605*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4606*c83a76b0SSuyog Pawar return 1;
4607*c83a76b0SSuyog Pawar }
4608*c83a76b0SSuyog Pawar
4609*c83a76b0SSuyog Pawar if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610*c83a76b0SSuyog Pawar {
4611*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4612*c83a76b0SSuyog Pawar return 0;
4613*c83a76b0SSuyog Pawar }
4614*c83a76b0SSuyog Pawar
4615*c83a76b0SSuyog Pawar if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616*c83a76b0SSuyog Pawar {
4617*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4618*c83a76b0SSuyog Pawar
4619*c83a76b0SSuyog Pawar return 1;
4620*c83a76b0SSuyog Pawar }
4621*c83a76b0SSuyog Pawar else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622*c83a76b0SSuyog Pawar {
4623*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 0;
4624*c83a76b0SSuyog Pawar
4625*c83a76b0SSuyog Pawar return 1;
4626*c83a76b0SSuyog Pawar }
4627*c83a76b0SSuyog Pawar else
4628*c83a76b0SSuyog Pawar {
4629*c83a76b0SSuyog Pawar if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630*c83a76b0SSuyog Pawar {
4631*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 0;
4632*c83a76b0SSuyog Pawar return 1;
4633*c83a76b0SSuyog Pawar }
4634*c83a76b0SSuyog Pawar else
4635*c83a76b0SSuyog Pawar {
4636*c83a76b0SSuyog Pawar S32 i;
4637*c83a76b0SSuyog Pawar
4638*c83a76b0SSuyog Pawar S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639*c83a76b0SSuyog Pawar S32 min_area = MAX_32BIT_VAL;
4640*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
4641*c83a76b0SSuyog Pawar
4642*c83a76b0SSuyog Pawar for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643*c83a76b0SSuyog Pawar {
4644*c83a76b0SSuyog Pawar ps_data = &ps_32x32_blk->as_cluster_data[i];
4645*c83a76b0SSuyog Pawar
4646*c83a76b0SSuyog Pawar if(!ps_data->is_valid_cluster)
4647*c83a76b0SSuyog Pawar {
4648*c83a76b0SSuyog Pawar continue;
4649*c83a76b0SSuyog Pawar }
4650*c83a76b0SSuyog Pawar
4651*c83a76b0SSuyog Pawar num_clusters_evaluated++;
4652*c83a76b0SSuyog Pawar
4653*c83a76b0SSuyog Pawar if(ps_data->area_in_pixels < min_area)
4654*c83a76b0SSuyog Pawar {
4655*c83a76b0SSuyog Pawar min_area = ps_data->area_in_pixels;
4656*c83a76b0SSuyog Pawar }
4657*c83a76b0SSuyog Pawar }
4658*c83a76b0SSuyog Pawar
4659*c83a76b0SSuyog Pawar if((min_area << 4) < area_of_parent)
4660*c83a76b0SSuyog Pawar {
4661*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4662*c83a76b0SSuyog Pawar return 0;
4663*c83a76b0SSuyog Pawar }
4664*c83a76b0SSuyog Pawar else
4665*c83a76b0SSuyog Pawar {
4666*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 0;
4667*c83a76b0SSuyog Pawar return 1;
4668*c83a76b0SSuyog Pawar }
4669*c83a76b0SSuyog Pawar }
4670*c83a76b0SSuyog Pawar }
4671*c83a76b0SSuyog Pawar }
4672*c83a76b0SSuyog Pawar
4673*c83a76b0SSuyog Pawar /**
4674*c83a76b0SSuyog Pawar ********************************************************************************
4675*c83a76b0SSuyog Pawar * @fn S32 hme_determine_validity_16x16
4676*c83a76b0SSuyog Pawar * (
4677*c83a76b0SSuyog Pawar * ctb_cluster_info_t *ps_ctb_cluster_info
4678*c83a76b0SSuyog Pawar * )
4679*c83a76b0SSuyog Pawar *
4680*c83a76b0SSuyog Pawar * @brief Determines whther current 16x16 block needs to be evaluated in enc_loop
4681*c83a76b0SSuyog Pawar * while recursing through the CU tree or not
4682*c83a76b0SSuyog Pawar *
4683*c83a76b0SSuyog Pawar * @param[in] ps_cluster_data: structure containing cluster data
4684*c83a76b0SSuyog Pawar *
4685*c83a76b0SSuyog Pawar * @return Success or failure
4686*c83a76b0SSuyog Pawar ********************************************************************************
4687*c83a76b0SSuyog Pawar */
hme_determine_validity_16x16(ctb_cluster_info_t * ps_ctb_cluster_info,S32 * pi4_children_nodes_required,S32 blk_validity_wrt_pic_bndry,S32 parent_blk_validity_wrt_pic_bndry)4688*c83a76b0SSuyog Pawar __inline S32 hme_determine_validity_16x16(
4689*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info,
4690*c83a76b0SSuyog Pawar S32 *pi4_children_nodes_required,
4691*c83a76b0SSuyog Pawar S32 blk_validity_wrt_pic_bndry,
4692*c83a76b0SSuyog Pawar S32 parent_blk_validity_wrt_pic_bndry)
4693*c83a76b0SSuyog Pawar {
4694*c83a76b0SSuyog Pawar cluster_data_t *ps_data;
4695*c83a76b0SSuyog Pawar
4696*c83a76b0SSuyog Pawar cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698*c83a76b0SSuyog Pawar cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699*c83a76b0SSuyog Pawar
4700*c83a76b0SSuyog Pawar S32 num_clusters = ps_16x16_blk->num_clusters;
4701*c83a76b0SSuyog Pawar S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702*c83a76b0SSuyog Pawar S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703*c83a76b0SSuyog Pawar
4704*c83a76b0SSuyog Pawar if(!blk_validity_wrt_pic_bndry)
4705*c83a76b0SSuyog Pawar {
4706*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4707*c83a76b0SSuyog Pawar return 0;
4708*c83a76b0SSuyog Pawar }
4709*c83a76b0SSuyog Pawar
4710*c83a76b0SSuyog Pawar if(!parent_blk_validity_wrt_pic_bndry)
4711*c83a76b0SSuyog Pawar {
4712*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4713*c83a76b0SSuyog Pawar return 1;
4714*c83a76b0SSuyog Pawar }
4715*c83a76b0SSuyog Pawar
4716*c83a76b0SSuyog Pawar if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717*c83a76b0SSuyog Pawar (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718*c83a76b0SSuyog Pawar {
4719*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4720*c83a76b0SSuyog Pawar return 1;
4721*c83a76b0SSuyog Pawar }
4722*c83a76b0SSuyog Pawar
4723*c83a76b0SSuyog Pawar /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724*c83a76b0SSuyog Pawar /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725*c83a76b0SSuyog Pawar if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726*c83a76b0SSuyog Pawar {
4727*c83a76b0SSuyog Pawar if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728*c83a76b0SSuyog Pawar {
4729*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 0;
4730*c83a76b0SSuyog Pawar
4731*c83a76b0SSuyog Pawar return 1;
4732*c83a76b0SSuyog Pawar }
4733*c83a76b0SSuyog Pawar else
4734*c83a76b0SSuyog Pawar {
4735*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4736*c83a76b0SSuyog Pawar
4737*c83a76b0SSuyog Pawar return 0;
4738*c83a76b0SSuyog Pawar }
4739*c83a76b0SSuyog Pawar }
4740*c83a76b0SSuyog Pawar /* Implies nc_64 >= 3 */
4741*c83a76b0SSuyog Pawar else
4742*c83a76b0SSuyog Pawar {
4743*c83a76b0SSuyog Pawar if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744*c83a76b0SSuyog Pawar {
4745*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 0;
4746*c83a76b0SSuyog Pawar return 1;
4747*c83a76b0SSuyog Pawar }
4748*c83a76b0SSuyog Pawar else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749*c83a76b0SSuyog Pawar {
4750*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4751*c83a76b0SSuyog Pawar return 0;
4752*c83a76b0SSuyog Pawar }
4753*c83a76b0SSuyog Pawar else
4754*c83a76b0SSuyog Pawar {
4755*c83a76b0SSuyog Pawar S32 i;
4756*c83a76b0SSuyog Pawar
4757*c83a76b0SSuyog Pawar S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758*c83a76b0SSuyog Pawar S32 min_area = MAX_32BIT_VAL;
4759*c83a76b0SSuyog Pawar S32 num_clusters_evaluated = 0;
4760*c83a76b0SSuyog Pawar
4761*c83a76b0SSuyog Pawar for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762*c83a76b0SSuyog Pawar {
4763*c83a76b0SSuyog Pawar ps_data = &ps_16x16_blk->as_cluster_data[i];
4764*c83a76b0SSuyog Pawar
4765*c83a76b0SSuyog Pawar if(!ps_data->is_valid_cluster)
4766*c83a76b0SSuyog Pawar {
4767*c83a76b0SSuyog Pawar continue;
4768*c83a76b0SSuyog Pawar }
4769*c83a76b0SSuyog Pawar
4770*c83a76b0SSuyog Pawar num_clusters_evaluated++;
4771*c83a76b0SSuyog Pawar
4772*c83a76b0SSuyog Pawar if(ps_data->area_in_pixels < min_area)
4773*c83a76b0SSuyog Pawar {
4774*c83a76b0SSuyog Pawar min_area = ps_data->area_in_pixels;
4775*c83a76b0SSuyog Pawar }
4776*c83a76b0SSuyog Pawar }
4777*c83a76b0SSuyog Pawar
4778*c83a76b0SSuyog Pawar if((min_area << 4) < area_of_parent)
4779*c83a76b0SSuyog Pawar {
4780*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 1;
4781*c83a76b0SSuyog Pawar return 0;
4782*c83a76b0SSuyog Pawar }
4783*c83a76b0SSuyog Pawar else
4784*c83a76b0SSuyog Pawar {
4785*c83a76b0SSuyog Pawar *pi4_children_nodes_required = 0;
4786*c83a76b0SSuyog Pawar return 1;
4787*c83a76b0SSuyog Pawar }
4788*c83a76b0SSuyog Pawar }
4789*c83a76b0SSuyog Pawar }
4790*c83a76b0SSuyog Pawar }
4791*c83a76b0SSuyog Pawar
4792*c83a76b0SSuyog Pawar /**
4793*c83a76b0SSuyog Pawar ********************************************************************************
4794*c83a76b0SSuyog Pawar * @fn void hme_build_cu_tree
4795*c83a76b0SSuyog Pawar * (
4796*c83a76b0SSuyog Pawar * ctb_cluster_info_t *ps_ctb_cluster_info,
4797*c83a76b0SSuyog Pawar * cur_ctb_cu_tree_t *ps_cu_tree,
4798*c83a76b0SSuyog Pawar * S32 tree_depth,
4799*c83a76b0SSuyog Pawar * CU_POS_T e_grand_parent_blk_pos,
4800*c83a76b0SSuyog Pawar * CU_POS_T e_parent_blk_pos,
4801*c83a76b0SSuyog Pawar * CU_POS_T e_cur_blk_pos
4802*c83a76b0SSuyog Pawar * )
4803*c83a76b0SSuyog Pawar *
4804*c83a76b0SSuyog Pawar * @brief Recursive function for CU tree initialisation
4805*c83a76b0SSuyog Pawar *
4806*c83a76b0SSuyog Pawar * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4807*c83a76b0SSuyog Pawar * corresponding to all block sizes from 64x64
4808*c83a76b0SSuyog Pawar * to 16x16
4809*c83a76b0SSuyog Pawar *
4810*c83a76b0SSuyog Pawar * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4811*c83a76b0SSuyog Pawar * applicable
4812*c83a76b0SSuyog Pawar *
4813*c83a76b0SSuyog Pawar * @param[in] e_cur_blk_pos: position of current block wrt parent
4814*c83a76b0SSuyog Pawar *
4815*c83a76b0SSuyog Pawar * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4816*c83a76b0SSuyog Pawar *
4817*c83a76b0SSuyog Pawar * @param[in] tree_depth : specifies depth of the CU tree
4818*c83a76b0SSuyog Pawar *
4819*c83a76b0SSuyog Pawar * @return Nothing
4820*c83a76b0SSuyog Pawar ********************************************************************************
4821*c83a76b0SSuyog Pawar */
hme_build_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4822*c83a76b0SSuyog Pawar void hme_build_cu_tree(
4823*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info,
4824*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_cu_tree,
4825*c83a76b0SSuyog Pawar S32 tree_depth,
4826*c83a76b0SSuyog Pawar CU_POS_T e_grandparent_blk_pos,
4827*c83a76b0SSuyog Pawar CU_POS_T e_parent_blk_pos,
4828*c83a76b0SSuyog Pawar CU_POS_T e_cur_blk_pos)
4829*c83a76b0SSuyog Pawar {
4830*c83a76b0SSuyog Pawar ihevce_cu_tree_init(
4831*c83a76b0SSuyog Pawar ps_cu_tree,
4832*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root,
4833*c83a76b0SSuyog Pawar &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834*c83a76b0SSuyog Pawar tree_depth,
4835*c83a76b0SSuyog Pawar e_grandparent_blk_pos,
4836*c83a76b0SSuyog Pawar e_parent_blk_pos,
4837*c83a76b0SSuyog Pawar e_cur_blk_pos);
4838*c83a76b0SSuyog Pawar }
4839*c83a76b0SSuyog Pawar
4840*c83a76b0SSuyog Pawar /**
4841*c83a76b0SSuyog Pawar ********************************************************************************
4842*c83a76b0SSuyog Pawar * @fn S32 hme_sdi_based_cluster_spread_eligibility
4843*c83a76b0SSuyog Pawar * (
4844*c83a76b0SSuyog Pawar * cluster_32x32_blk_t *ps_blk_32x32
4845*c83a76b0SSuyog Pawar * )
4846*c83a76b0SSuyog Pawar *
4847*c83a76b0SSuyog Pawar * @brief Determines whether the spread of high SDI MV's around each cluster
4848*c83a76b0SSuyog Pawar * center is below a pre-determined threshold
4849*c83a76b0SSuyog Pawar *
4850*c83a76b0SSuyog Pawar * @param[in] ps_blk_32x32: structure containing pointers to clusters
4851*c83a76b0SSuyog Pawar * corresponding to all block sizes from 64x64
4852*c83a76b0SSuyog Pawar * to 16x16
4853*c83a76b0SSuyog Pawar *
4854*c83a76b0SSuyog Pawar * @return 1 if the spread is constrained, else 0
4855*c83a76b0SSuyog Pawar ********************************************************************************
4856*c83a76b0SSuyog Pawar */
4857*c83a76b0SSuyog Pawar __inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t * ps_blk_32x32,S32 sdi_threshold)4858*c83a76b0SSuyog Pawar hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859*c83a76b0SSuyog Pawar {
4860*c83a76b0SSuyog Pawar S32 cumulative_mv_distance;
4861*c83a76b0SSuyog Pawar S32 i, j;
4862*c83a76b0SSuyog Pawar S32 num_high_sdi_mvs;
4863*c83a76b0SSuyog Pawar
4864*c83a76b0SSuyog Pawar S32 num_clusters = ps_blk_32x32->num_clusters;
4865*c83a76b0SSuyog Pawar
4866*c83a76b0SSuyog Pawar for(i = 0; i < num_clusters; i++)
4867*c83a76b0SSuyog Pawar {
4868*c83a76b0SSuyog Pawar cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869*c83a76b0SSuyog Pawar
4870*c83a76b0SSuyog Pawar num_high_sdi_mvs = 0;
4871*c83a76b0SSuyog Pawar cumulative_mv_distance = 0;
4872*c83a76b0SSuyog Pawar
4873*c83a76b0SSuyog Pawar for(j = 0; j < ps_data->num_mvs; j++)
4874*c83a76b0SSuyog Pawar {
4875*c83a76b0SSuyog Pawar mv_data_t *ps_mv = &ps_data->as_mv[j];
4876*c83a76b0SSuyog Pawar
4877*c83a76b0SSuyog Pawar if(ps_mv->sdi >= sdi_threshold)
4878*c83a76b0SSuyog Pawar {
4879*c83a76b0SSuyog Pawar num_high_sdi_mvs++;
4880*c83a76b0SSuyog Pawar
4881*c83a76b0SSuyog Pawar COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882*c83a76b0SSuyog Pawar }
4883*c83a76b0SSuyog Pawar }
4884*c83a76b0SSuyog Pawar
4885*c83a76b0SSuyog Pawar if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886*c83a76b0SSuyog Pawar {
4887*c83a76b0SSuyog Pawar return 0;
4888*c83a76b0SSuyog Pawar }
4889*c83a76b0SSuyog Pawar }
4890*c83a76b0SSuyog Pawar
4891*c83a76b0SSuyog Pawar return 1;
4892*c83a76b0SSuyog Pawar }
4893*c83a76b0SSuyog Pawar
4894*c83a76b0SSuyog Pawar /**
4895*c83a76b0SSuyog Pawar ********************************************************************************
4896*c83a76b0SSuyog Pawar * @fn S32 hme_populate_cu_tree
4897*c83a76b0SSuyog Pawar * (
4898*c83a76b0SSuyog Pawar * ctb_cluster_info_t *ps_ctb_cluster_info,
4899*c83a76b0SSuyog Pawar * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900*c83a76b0SSuyog Pawar * cur_ctb_cu_tree_t *ps_cu_tree,
4901*c83a76b0SSuyog Pawar * S32 tree_depth,
4902*c83a76b0SSuyog Pawar * CU_POS_T e_parent_blk_pos,
4903*c83a76b0SSuyog Pawar * CU_POS_T e_cur_blk_pos
4904*c83a76b0SSuyog Pawar * )
4905*c83a76b0SSuyog Pawar *
4906*c83a76b0SSuyog Pawar * @brief Recursive function for CU tree population based on output of
4907*c83a76b0SSuyog Pawar * clustering algorithm
4908*c83a76b0SSuyog Pawar *
4909*c83a76b0SSuyog Pawar * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
4910*c83a76b0SSuyog Pawar * corresponding to all block sizes from 64x64
4911*c83a76b0SSuyog Pawar * to 16x16
4912*c83a76b0SSuyog Pawar *
4913*c83a76b0SSuyog Pawar * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
4914*c83a76b0SSuyog Pawar applicable
4915*c83a76b0SSuyog Pawar *
4916*c83a76b0SSuyog Pawar * @param[in] e_cur_blk_pos: position of current block wrt parent
4917*c83a76b0SSuyog Pawar *
4918*c83a76b0SSuyog Pawar * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
4919*c83a76b0SSuyog Pawar *
4920*c83a76b0SSuyog Pawar * @param[out] ps_cu_tree : represents CU tree used in CU recursion
4921*c83a76b0SSuyog Pawar *
4922*c83a76b0SSuyog Pawar * @param[in] tree_depth : specifies depth of the CU tree
4923*c83a76b0SSuyog Pawar *
4924*c83a76b0SSuyog Pawar * @param[in] ipe_decision_precedence : specifies whether precedence should
4925*c83a76b0SSuyog Pawar * be given to decisions made either by IPE(1) or clustering algos.
4926*c83a76b0SSuyog Pawar *
4927*c83a76b0SSuyog Pawar * @return 1 if re-evaluation of parent node's validity is not required,
4928*c83a76b0SSuyog Pawar else 0
4929*c83a76b0SSuyog Pawar ********************************************************************************
4930*c83a76b0SSuyog Pawar */
hme_populate_cu_tree(ctb_cluster_info_t * ps_ctb_cluster_info,cur_ctb_cu_tree_t * ps_cu_tree,S32 tree_depth,ME_QUALITY_PRESETS_T e_quality_preset,CU_POS_T e_grandparent_blk_pos,CU_POS_T e_parent_blk_pos,CU_POS_T e_cur_blk_pos)4931*c83a76b0SSuyog Pawar void hme_populate_cu_tree(
4932*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info,
4933*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_cu_tree,
4934*c83a76b0SSuyog Pawar S32 tree_depth,
4935*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_quality_preset,
4936*c83a76b0SSuyog Pawar CU_POS_T e_grandparent_blk_pos,
4937*c83a76b0SSuyog Pawar CU_POS_T e_parent_blk_pos,
4938*c83a76b0SSuyog Pawar CU_POS_T e_cur_blk_pos)
4939*c83a76b0SSuyog Pawar {
4940*c83a76b0SSuyog Pawar S32 area_of_cur_blk;
4941*c83a76b0SSuyog Pawar S32 area_limit_for_me_decision_precedence;
4942*c83a76b0SSuyog Pawar S32 children_nodes_required;
4943*c83a76b0SSuyog Pawar S32 intra_mv_area;
4944*c83a76b0SSuyog Pawar S32 intra_eval_enable;
4945*c83a76b0SSuyog Pawar S32 inter_eval_enable;
4946*c83a76b0SSuyog Pawar S32 ipe_decision_precedence;
4947*c83a76b0SSuyog Pawar S32 node_validity;
4948*c83a76b0SSuyog Pawar S32 num_clusters;
4949*c83a76b0SSuyog Pawar
4950*c83a76b0SSuyog Pawar ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951*c83a76b0SSuyog Pawar
4952*c83a76b0SSuyog Pawar if(NULL == ps_cu_tree)
4953*c83a76b0SSuyog Pawar {
4954*c83a76b0SSuyog Pawar return;
4955*c83a76b0SSuyog Pawar }
4956*c83a76b0SSuyog Pawar
4957*c83a76b0SSuyog Pawar switch(tree_depth)
4958*c83a76b0SSuyog Pawar {
4959*c83a76b0SSuyog Pawar case 0:
4960*c83a76b0SSuyog Pawar {
4961*c83a76b0SSuyog Pawar /* 64x64 block */
4962*c83a76b0SSuyog Pawar S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963*c83a76b0SSuyog Pawar
4964*c83a76b0SSuyog Pawar cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965*c83a76b0SSuyog Pawar
4966*c83a76b0SSuyog Pawar area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967*c83a76b0SSuyog Pawar area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968*c83a76b0SSuyog Pawar children_nodes_required = 0;
4969*c83a76b0SSuyog Pawar intra_mv_area = ps_blk_64x64->intra_mv_area;
4970*c83a76b0SSuyog Pawar
4971*c83a76b0SSuyog Pawar ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972*c83a76b0SSuyog Pawar
4973*c83a76b0SSuyog Pawar intra_eval_enable = ipe_decision_precedence;
4974*c83a76b0SSuyog Pawar inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975*c83a76b0SSuyog Pawar
4976*c83a76b0SSuyog Pawar #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
4978*c83a76b0SSuyog Pawar {
4979*c83a76b0SSuyog Pawar inter_eval_enable = 1;
4980*c83a76b0SSuyog Pawar node_validity = (blk_32x32_mask == 0xf);
4981*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982*c83a76b0SSuyog Pawar ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983*c83a76b0SSuyog Pawar #endif
4984*c83a76b0SSuyog Pawar break;
4985*c83a76b0SSuyog Pawar }
4986*c83a76b0SSuyog Pawar #endif
4987*c83a76b0SSuyog Pawar
4988*c83a76b0SSuyog Pawar #if ENABLE_4CTB_EVALUATION
4989*c83a76b0SSuyog Pawar node_validity = (blk_32x32_mask == 0xf);
4990*c83a76b0SSuyog Pawar
4991*c83a76b0SSuyog Pawar break;
4992*c83a76b0SSuyog Pawar #else
4993*c83a76b0SSuyog Pawar {
4994*c83a76b0SSuyog Pawar S32 i;
4995*c83a76b0SSuyog Pawar
4996*c83a76b0SSuyog Pawar num_clusters = ps_blk_64x64->num_clusters;
4997*c83a76b0SSuyog Pawar
4998*c83a76b0SSuyog Pawar node_validity = (ipe_decision_precedence)
4999*c83a76b0SSuyog Pawar ? (!ps_cur_ipe_ctb->u1_split_flag)
5000*c83a76b0SSuyog Pawar : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001*c83a76b0SSuyog Pawar
5002*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_REF; i++)
5003*c83a76b0SSuyog Pawar {
5004*c83a76b0SSuyog Pawar node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005*c83a76b0SSuyog Pawar MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006*c83a76b0SSuyog Pawar }
5007*c83a76b0SSuyog Pawar
5008*c83a76b0SSuyog Pawar node_validity = node_validity && (blk_32x32_mask == 0xf);
5009*c83a76b0SSuyog Pawar }
5010*c83a76b0SSuyog Pawar break;
5011*c83a76b0SSuyog Pawar #endif
5012*c83a76b0SSuyog Pawar }
5013*c83a76b0SSuyog Pawar case 1:
5014*c83a76b0SSuyog Pawar {
5015*c83a76b0SSuyog Pawar /* 32x32 block */
5016*c83a76b0SSuyog Pawar S32 is_percent_intra_area_gt_threshold;
5017*c83a76b0SSuyog Pawar
5018*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019*c83a76b0SSuyog Pawar
5020*c83a76b0SSuyog Pawar S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021*c83a76b0SSuyog Pawar
5022*c83a76b0SSuyog Pawar #if !ENABLE_4CTB_EVALUATION
5023*c83a76b0SSuyog Pawar S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024*c83a76b0SSuyog Pawar S32 best_intra_cost =
5025*c83a76b0SSuyog Pawar ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027*c83a76b0SSuyog Pawar 4) < 0)
5028*c83a76b0SSuyog Pawar ? MAX_32BIT_VAL
5029*c83a76b0SSuyog Pawar : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep *
5031*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032*c83a76b0SSuyog Pawar S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033*c83a76b0SSuyog Pawar S32 cost_differential = (best_inter_cost - best_cost);
5034*c83a76b0SSuyog Pawar #endif
5035*c83a76b0SSuyog Pawar
5036*c83a76b0SSuyog Pawar area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037*c83a76b0SSuyog Pawar area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038*c83a76b0SSuyog Pawar intra_mv_area = ps_blk_32x32->intra_mv_area;
5039*c83a76b0SSuyog Pawar is_percent_intra_area_gt_threshold =
5040*c83a76b0SSuyog Pawar (intra_mv_area > area_limit_for_me_decision_precedence);
5041*c83a76b0SSuyog Pawar ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042*c83a76b0SSuyog Pawar
5043*c83a76b0SSuyog Pawar intra_eval_enable = ipe_decision_precedence;
5044*c83a76b0SSuyog Pawar inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045*c83a76b0SSuyog Pawar children_nodes_required = 1;
5046*c83a76b0SSuyog Pawar
5047*c83a76b0SSuyog Pawar #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5049*c83a76b0SSuyog Pawar {
5050*c83a76b0SSuyog Pawar inter_eval_enable = 1;
5051*c83a76b0SSuyog Pawar node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053*c83a76b0SSuyog Pawar ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054*c83a76b0SSuyog Pawar #endif
5055*c83a76b0SSuyog Pawar break;
5056*c83a76b0SSuyog Pawar }
5057*c83a76b0SSuyog Pawar #endif
5058*c83a76b0SSuyog Pawar
5059*c83a76b0SSuyog Pawar #if ENABLE_4CTB_EVALUATION
5060*c83a76b0SSuyog Pawar node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061*c83a76b0SSuyog Pawar
5062*c83a76b0SSuyog Pawar break;
5063*c83a76b0SSuyog Pawar #else
5064*c83a76b0SSuyog Pawar {
5065*c83a76b0SSuyog Pawar S32 i;
5066*c83a76b0SSuyog Pawar num_clusters = ps_blk_32x32->num_clusters;
5067*c83a76b0SSuyog Pawar
5068*c83a76b0SSuyog Pawar if(ipe_decision_precedence)
5069*c83a76b0SSuyog Pawar {
5070*c83a76b0SSuyog Pawar node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071*c83a76b0SSuyog Pawar node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072*c83a76b0SSuyog Pawar }
5073*c83a76b0SSuyog Pawar else
5074*c83a76b0SSuyog Pawar {
5075*c83a76b0SSuyog Pawar node_validity =
5076*c83a76b0SSuyog Pawar ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077*c83a76b0SSuyog Pawar (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078*c83a76b0SSuyog Pawar (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079*c83a76b0SSuyog Pawar
5080*c83a76b0SSuyog Pawar for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081*c83a76b0SSuyog Pawar {
5082*c83a76b0SSuyog Pawar node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083*c83a76b0SSuyog Pawar MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084*c83a76b0SSuyog Pawar }
5085*c83a76b0SSuyog Pawar
5086*c83a76b0SSuyog Pawar if(node_validity)
5087*c83a76b0SSuyog Pawar {
5088*c83a76b0SSuyog Pawar node_validity = node_validity &&
5089*c83a76b0SSuyog Pawar hme_sdi_based_cluster_spread_eligibility(
5090*c83a76b0SSuyog Pawar ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091*c83a76b0SSuyog Pawar }
5092*c83a76b0SSuyog Pawar }
5093*c83a76b0SSuyog Pawar }
5094*c83a76b0SSuyog Pawar
5095*c83a76b0SSuyog Pawar break;
5096*c83a76b0SSuyog Pawar #endif
5097*c83a76b0SSuyog Pawar }
5098*c83a76b0SSuyog Pawar case 2:
5099*c83a76b0SSuyog Pawar {
5100*c83a76b0SSuyog Pawar cluster_16x16_blk_t *ps_blk_16x16 =
5101*c83a76b0SSuyog Pawar &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102*c83a76b0SSuyog Pawar
5103*c83a76b0SSuyog Pawar S32 blk_8x8_mask =
5104*c83a76b0SSuyog Pawar ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105*c83a76b0SSuyog Pawar
5106*c83a76b0SSuyog Pawar area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107*c83a76b0SSuyog Pawar area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108*c83a76b0SSuyog Pawar children_nodes_required = 1;
5109*c83a76b0SSuyog Pawar intra_mv_area = ps_blk_16x16->intra_mv_area;
5110*c83a76b0SSuyog Pawar ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111*c83a76b0SSuyog Pawar num_clusters = ps_blk_16x16->num_clusters;
5112*c83a76b0SSuyog Pawar
5113*c83a76b0SSuyog Pawar intra_eval_enable = ipe_decision_precedence;
5114*c83a76b0SSuyog Pawar inter_eval_enable = 1;
5115*c83a76b0SSuyog Pawar
5116*c83a76b0SSuyog Pawar #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5118*c83a76b0SSuyog Pawar {
5119*c83a76b0SSuyog Pawar node_validity =
5120*c83a76b0SSuyog Pawar !ps_ctb_cluster_info
5121*c83a76b0SSuyog Pawar ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122*c83a76b0SSuyog Pawar children_nodes_required = !node_validity;
5123*c83a76b0SSuyog Pawar break;
5124*c83a76b0SSuyog Pawar }
5125*c83a76b0SSuyog Pawar #endif
5126*c83a76b0SSuyog Pawar
5127*c83a76b0SSuyog Pawar #if ENABLE_4CTB_EVALUATION
5128*c83a76b0SSuyog Pawar node_validity = (blk_8x8_mask == 0xf);
5129*c83a76b0SSuyog Pawar
5130*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
5131*c83a76b0SSuyog Pawar {
5132*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_32x32_root = NULL;
5133*c83a76b0SSuyog Pawar
5134*c83a76b0SSuyog Pawar switch(e_parent_blk_pos)
5135*c83a76b0SSuyog Pawar {
5136*c83a76b0SSuyog Pawar case POS_TL:
5137*c83a76b0SSuyog Pawar {
5138*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139*c83a76b0SSuyog Pawar
5140*c83a76b0SSuyog Pawar break;
5141*c83a76b0SSuyog Pawar }
5142*c83a76b0SSuyog Pawar case POS_TR:
5143*c83a76b0SSuyog Pawar {
5144*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145*c83a76b0SSuyog Pawar
5146*c83a76b0SSuyog Pawar break;
5147*c83a76b0SSuyog Pawar }
5148*c83a76b0SSuyog Pawar case POS_BL:
5149*c83a76b0SSuyog Pawar {
5150*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151*c83a76b0SSuyog Pawar
5152*c83a76b0SSuyog Pawar break;
5153*c83a76b0SSuyog Pawar }
5154*c83a76b0SSuyog Pawar case POS_BR:
5155*c83a76b0SSuyog Pawar {
5156*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157*c83a76b0SSuyog Pawar
5158*c83a76b0SSuyog Pawar break;
5159*c83a76b0SSuyog Pawar }
5160*c83a76b0SSuyog Pawar default:
5161*c83a76b0SSuyog Pawar {
5162*c83a76b0SSuyog Pawar DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5163*c83a76b0SSuyog Pawar break;
5164*c83a76b0SSuyog Pawar }
5165*c83a76b0SSuyog Pawar }
5166*c83a76b0SSuyog Pawar
5167*c83a76b0SSuyog Pawar if(ps_32x32_root->is_node_valid)
5168*c83a76b0SSuyog Pawar {
5169*c83a76b0SSuyog Pawar node_validity =
5170*c83a76b0SSuyog Pawar node_validity &&
5171*c83a76b0SSuyog Pawar !ps_ctb_cluster_info
5172*c83a76b0SSuyog Pawar ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5173*c83a76b0SSuyog Pawar children_nodes_required = !node_validity;
5174*c83a76b0SSuyog Pawar }
5175*c83a76b0SSuyog Pawar }
5176*c83a76b0SSuyog Pawar #endif
5177*c83a76b0SSuyog Pawar
5178*c83a76b0SSuyog Pawar break;
5179*c83a76b0SSuyog Pawar #else
5180*c83a76b0SSuyog Pawar
5181*c83a76b0SSuyog Pawar if(ipe_decision_precedence)
5182*c83a76b0SSuyog Pawar {
5183*c83a76b0SSuyog Pawar S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5184*c83a76b0SSuyog Pawar .as_intra16_analyse[e_cur_blk_pos]
5185*c83a76b0SSuyog Pawar .b1_merge_flag);
5186*c83a76b0SSuyog Pawar S32 valid_flag = (blk_8x8_mask == 0xf);
5187*c83a76b0SSuyog Pawar
5188*c83a76b0SSuyog Pawar node_validity = merge_flag_16 && valid_flag;
5189*c83a76b0SSuyog Pawar }
5190*c83a76b0SSuyog Pawar else
5191*c83a76b0SSuyog Pawar {
5192*c83a76b0SSuyog Pawar node_validity = (blk_8x8_mask == 0xf);
5193*c83a76b0SSuyog Pawar }
5194*c83a76b0SSuyog Pawar
5195*c83a76b0SSuyog Pawar break;
5196*c83a76b0SSuyog Pawar #endif
5197*c83a76b0SSuyog Pawar }
5198*c83a76b0SSuyog Pawar case 3:
5199*c83a76b0SSuyog Pawar {
5200*c83a76b0SSuyog Pawar S32 blk_8x8_mask =
5201*c83a76b0SSuyog Pawar ps_ctb_cluster_info
5202*c83a76b0SSuyog Pawar ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5203*c83a76b0SSuyog Pawar S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5204*c83a76b0SSuyog Pawar .as_intra16_analyse[e_parent_blk_pos]
5205*c83a76b0SSuyog Pawar .b1_merge_flag);
5206*c83a76b0SSuyog Pawar S32 merge_flag_32 =
5207*c83a76b0SSuyog Pawar (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5208*c83a76b0SSuyog Pawar
5209*c83a76b0SSuyog Pawar intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5210*c83a76b0SSuyog Pawar inter_eval_enable = 1;
5211*c83a76b0SSuyog Pawar children_nodes_required = 0;
5212*c83a76b0SSuyog Pawar
5213*c83a76b0SSuyog Pawar #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5214*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5215*c83a76b0SSuyog Pawar {
5216*c83a76b0SSuyog Pawar node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217*c83a76b0SSuyog Pawar break;
5218*c83a76b0SSuyog Pawar }
5219*c83a76b0SSuyog Pawar #endif
5220*c83a76b0SSuyog Pawar
5221*c83a76b0SSuyog Pawar #if ENABLE_4CTB_EVALUATION
5222*c83a76b0SSuyog Pawar node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5223*c83a76b0SSuyog Pawar
5224*c83a76b0SSuyog Pawar break;
5225*c83a76b0SSuyog Pawar #else
5226*c83a76b0SSuyog Pawar {
5227*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_32x32_root;
5228*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_16x16_root;
5229*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_32x32_blk;
5230*c83a76b0SSuyog Pawar
5231*c83a76b0SSuyog Pawar switch(e_grandparent_blk_pos)
5232*c83a76b0SSuyog Pawar {
5233*c83a76b0SSuyog Pawar case POS_TL:
5234*c83a76b0SSuyog Pawar {
5235*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5236*c83a76b0SSuyog Pawar
5237*c83a76b0SSuyog Pawar break;
5238*c83a76b0SSuyog Pawar }
5239*c83a76b0SSuyog Pawar case POS_TR:
5240*c83a76b0SSuyog Pawar {
5241*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5242*c83a76b0SSuyog Pawar
5243*c83a76b0SSuyog Pawar break;
5244*c83a76b0SSuyog Pawar }
5245*c83a76b0SSuyog Pawar case POS_BL:
5246*c83a76b0SSuyog Pawar {
5247*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5248*c83a76b0SSuyog Pawar
5249*c83a76b0SSuyog Pawar break;
5250*c83a76b0SSuyog Pawar }
5251*c83a76b0SSuyog Pawar case POS_BR:
5252*c83a76b0SSuyog Pawar {
5253*c83a76b0SSuyog Pawar ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5254*c83a76b0SSuyog Pawar
5255*c83a76b0SSuyog Pawar break;
5256*c83a76b0SSuyog Pawar }
5257*c83a76b0SSuyog Pawar default:
5258*c83a76b0SSuyog Pawar {
5259*c83a76b0SSuyog Pawar DBG_PRINTF("Invalid block position %d\n", e_grandparent_blk_pos);
5260*c83a76b0SSuyog Pawar break;
5261*c83a76b0SSuyog Pawar }
5262*c83a76b0SSuyog Pawar }
5263*c83a76b0SSuyog Pawar
5264*c83a76b0SSuyog Pawar switch(e_parent_blk_pos)
5265*c83a76b0SSuyog Pawar {
5266*c83a76b0SSuyog Pawar case POS_TL:
5267*c83a76b0SSuyog Pawar {
5268*c83a76b0SSuyog Pawar ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5269*c83a76b0SSuyog Pawar
5270*c83a76b0SSuyog Pawar break;
5271*c83a76b0SSuyog Pawar }
5272*c83a76b0SSuyog Pawar case POS_TR:
5273*c83a76b0SSuyog Pawar {
5274*c83a76b0SSuyog Pawar ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5275*c83a76b0SSuyog Pawar
5276*c83a76b0SSuyog Pawar break;
5277*c83a76b0SSuyog Pawar }
5278*c83a76b0SSuyog Pawar case POS_BL:
5279*c83a76b0SSuyog Pawar {
5280*c83a76b0SSuyog Pawar ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5281*c83a76b0SSuyog Pawar
5282*c83a76b0SSuyog Pawar break;
5283*c83a76b0SSuyog Pawar }
5284*c83a76b0SSuyog Pawar case POS_BR:
5285*c83a76b0SSuyog Pawar {
5286*c83a76b0SSuyog Pawar ps_16x16_root = ps_32x32_root->ps_child_node_br;
5287*c83a76b0SSuyog Pawar
5288*c83a76b0SSuyog Pawar break;
5289*c83a76b0SSuyog Pawar }
5290*c83a76b0SSuyog Pawar default:
5291*c83a76b0SSuyog Pawar {
5292*c83a76b0SSuyog Pawar DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5293*c83a76b0SSuyog Pawar break;
5294*c83a76b0SSuyog Pawar }
5295*c83a76b0SSuyog Pawar }
5296*c83a76b0SSuyog Pawar
5297*c83a76b0SSuyog Pawar ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5298*c83a76b0SSuyog Pawar
5299*c83a76b0SSuyog Pawar node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5300*c83a76b0SSuyog Pawar ((!ps_32x32_root->is_node_valid) ||
5301*c83a76b0SSuyog Pawar (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5302*c83a76b0SSuyog Pawar (!ps_16x16_root->is_node_valid));
5303*c83a76b0SSuyog Pawar
5304*c83a76b0SSuyog Pawar break;
5305*c83a76b0SSuyog Pawar }
5306*c83a76b0SSuyog Pawar #endif
5307*c83a76b0SSuyog Pawar }
5308*c83a76b0SSuyog Pawar }
5309*c83a76b0SSuyog Pawar
5310*c83a76b0SSuyog Pawar /* Fill the current cu_tree node */
5311*c83a76b0SSuyog Pawar ps_cu_tree->is_node_valid = node_validity;
5312*c83a76b0SSuyog Pawar ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5313*c83a76b0SSuyog Pawar ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5314*c83a76b0SSuyog Pawar
5315*c83a76b0SSuyog Pawar if(children_nodes_required)
5316*c83a76b0SSuyog Pawar {
5317*c83a76b0SSuyog Pawar tree_depth++;
5318*c83a76b0SSuyog Pawar
5319*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5320*c83a76b0SSuyog Pawar ps_ctb_cluster_info,
5321*c83a76b0SSuyog Pawar ps_cu_tree->ps_child_node_tl,
5322*c83a76b0SSuyog Pawar tree_depth,
5323*c83a76b0SSuyog Pawar e_quality_preset,
5324*c83a76b0SSuyog Pawar e_parent_blk_pos,
5325*c83a76b0SSuyog Pawar e_cur_blk_pos,
5326*c83a76b0SSuyog Pawar POS_TL);
5327*c83a76b0SSuyog Pawar
5328*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5329*c83a76b0SSuyog Pawar ps_ctb_cluster_info,
5330*c83a76b0SSuyog Pawar ps_cu_tree->ps_child_node_tr,
5331*c83a76b0SSuyog Pawar tree_depth,
5332*c83a76b0SSuyog Pawar e_quality_preset,
5333*c83a76b0SSuyog Pawar e_parent_blk_pos,
5334*c83a76b0SSuyog Pawar e_cur_blk_pos,
5335*c83a76b0SSuyog Pawar POS_TR);
5336*c83a76b0SSuyog Pawar
5337*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5338*c83a76b0SSuyog Pawar ps_ctb_cluster_info,
5339*c83a76b0SSuyog Pawar ps_cu_tree->ps_child_node_bl,
5340*c83a76b0SSuyog Pawar tree_depth,
5341*c83a76b0SSuyog Pawar e_quality_preset,
5342*c83a76b0SSuyog Pawar e_parent_blk_pos,
5343*c83a76b0SSuyog Pawar e_cur_blk_pos,
5344*c83a76b0SSuyog Pawar POS_BL);
5345*c83a76b0SSuyog Pawar
5346*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5347*c83a76b0SSuyog Pawar ps_ctb_cluster_info,
5348*c83a76b0SSuyog Pawar ps_cu_tree->ps_child_node_br,
5349*c83a76b0SSuyog Pawar tree_depth,
5350*c83a76b0SSuyog Pawar e_quality_preset,
5351*c83a76b0SSuyog Pawar e_parent_blk_pos,
5352*c83a76b0SSuyog Pawar e_cur_blk_pos,
5353*c83a76b0SSuyog Pawar POS_BR);
5354*c83a76b0SSuyog Pawar }
5355*c83a76b0SSuyog Pawar }
5356*c83a76b0SSuyog Pawar
5357*c83a76b0SSuyog Pawar /**
5358*c83a76b0SSuyog Pawar ********************************************************************************
5359*c83a76b0SSuyog Pawar * @fn void hme_analyse_mv_clustering
5360*c83a76b0SSuyog Pawar * (
5361*c83a76b0SSuyog Pawar * search_results_t *ps_search_results,
5362*c83a76b0SSuyog Pawar * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5363*c83a76b0SSuyog Pawar * cur_ctb_cu_tree_t *ps_cu_tree
5364*c83a76b0SSuyog Pawar * )
5365*c83a76b0SSuyog Pawar *
5366*c83a76b0SSuyog Pawar * @brief Implementation for the clustering algorithm
5367*c83a76b0SSuyog Pawar *
5368*c83a76b0SSuyog Pawar * @param[in] ps_search_results: structure containing 16x16 block results
5369*c83a76b0SSuyog Pawar *
5370*c83a76b0SSuyog Pawar * @param[in] ps_cur_ipe_ctb : output container for ipe analyses
5371*c83a76b0SSuyog Pawar *
5372*c83a76b0SSuyog Pawar * @param[out] ps_cu_tree : represents CU tree used in CU recursion
5373*c83a76b0SSuyog Pawar *
5374*c83a76b0SSuyog Pawar * @return None
5375*c83a76b0SSuyog Pawar ********************************************************************************
5376*c83a76b0SSuyog Pawar */
hme_analyse_mv_clustering(search_results_t * ps_search_results,inter_cu_results_t * ps_16x16_cu_results,inter_cu_results_t * ps_8x8_cu_results,ctb_cluster_info_t * ps_ctb_cluster_info,S08 * pi1_future_list,S08 * pi1_past_list,S32 bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)5377*c83a76b0SSuyog Pawar void hme_analyse_mv_clustering(
5378*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
5379*c83a76b0SSuyog Pawar inter_cu_results_t *ps_16x16_cu_results,
5380*c83a76b0SSuyog Pawar inter_cu_results_t *ps_8x8_cu_results,
5381*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info,
5382*c83a76b0SSuyog Pawar S08 *pi1_future_list,
5383*c83a76b0SSuyog Pawar S08 *pi1_past_list,
5384*c83a76b0SSuyog Pawar S32 bidir_enabled,
5385*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_quality_preset)
5386*c83a76b0SSuyog Pawar {
5387*c83a76b0SSuyog Pawar cluster_16x16_blk_t *ps_blk_16x16;
5388*c83a76b0SSuyog Pawar cluster_32x32_blk_t *ps_blk_32x32;
5389*c83a76b0SSuyog Pawar cluster_64x64_blk_t *ps_blk_64x64;
5390*c83a76b0SSuyog Pawar
5391*c83a76b0SSuyog Pawar part_type_results_t *ps_best_result;
5392*c83a76b0SSuyog Pawar pu_result_t *aps_part_result[MAX_NUM_PARTS];
5393*c83a76b0SSuyog Pawar pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5394*c83a76b0SSuyog Pawar
5395*c83a76b0SSuyog Pawar PART_ID_T e_part_id;
5396*c83a76b0SSuyog Pawar PART_TYPE_T e_part_type;
5397*c83a76b0SSuyog Pawar
5398*c83a76b0SSuyog Pawar S32 enable_64x64_merge;
5399*c83a76b0SSuyog Pawar S32 i, j, k;
5400*c83a76b0SSuyog Pawar S32 mvx, mvy;
5401*c83a76b0SSuyog Pawar S32 num_parts;
5402*c83a76b0SSuyog Pawar S32 ref_idx;
5403*c83a76b0SSuyog Pawar S32 ai4_pred_mode[MAX_NUM_PARTS];
5404*c83a76b0SSuyog Pawar
5405*c83a76b0SSuyog Pawar S32 num_32x32_merges = 0;
5406*c83a76b0SSuyog Pawar
5407*c83a76b0SSuyog Pawar /*****************************************/
5408*c83a76b0SSuyog Pawar /*****************************************/
5409*c83a76b0SSuyog Pawar /********* Enter ye who is HQ ************/
5410*c83a76b0SSuyog Pawar /*****************************************/
5411*c83a76b0SSuyog Pawar /*****************************************/
5412*c83a76b0SSuyog Pawar
5413*c83a76b0SSuyog Pawar ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5414*c83a76b0SSuyog Pawar
5415*c83a76b0SSuyog Pawar /* Initialise data in each of the clusters */
5416*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
5417*c83a76b0SSuyog Pawar {
5418*c83a76b0SSuyog Pawar ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5419*c83a76b0SSuyog Pawar
5420*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5421*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5422*c83a76b0SSuyog Pawar {
5423*c83a76b0SSuyog Pawar hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5424*c83a76b0SSuyog Pawar }
5425*c83a76b0SSuyog Pawar else
5426*c83a76b0SSuyog Pawar {
5427*c83a76b0SSuyog Pawar ps_blk_16x16->best_inter_cost = 0;
5428*c83a76b0SSuyog Pawar ps_blk_16x16->intra_mv_area = 0;
5429*c83a76b0SSuyog Pawar }
5430*c83a76b0SSuyog Pawar #else
5431*c83a76b0SSuyog Pawar hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5432*c83a76b0SSuyog Pawar #endif
5433*c83a76b0SSuyog Pawar }
5434*c83a76b0SSuyog Pawar
5435*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
5436*c83a76b0SSuyog Pawar {
5437*c83a76b0SSuyog Pawar ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5438*c83a76b0SSuyog Pawar
5439*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5441*c83a76b0SSuyog Pawar {
5442*c83a76b0SSuyog Pawar hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5443*c83a76b0SSuyog Pawar }
5444*c83a76b0SSuyog Pawar else
5445*c83a76b0SSuyog Pawar {
5446*c83a76b0SSuyog Pawar ps_blk_32x32->best_inter_cost = 0;
5447*c83a76b0SSuyog Pawar ps_blk_32x32->intra_mv_area = 0;
5448*c83a76b0SSuyog Pawar }
5449*c83a76b0SSuyog Pawar #else
5450*c83a76b0SSuyog Pawar hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5451*c83a76b0SSuyog Pawar #endif
5452*c83a76b0SSuyog Pawar }
5453*c83a76b0SSuyog Pawar
5454*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5455*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5456*c83a76b0SSuyog Pawar {
5457*c83a76b0SSuyog Pawar hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5458*c83a76b0SSuyog Pawar }
5459*c83a76b0SSuyog Pawar else
5460*c83a76b0SSuyog Pawar {
5461*c83a76b0SSuyog Pawar ps_blk_64x64->best_inter_cost = 0;
5462*c83a76b0SSuyog Pawar ps_blk_64x64->intra_mv_area = 0;
5463*c83a76b0SSuyog Pawar }
5464*c83a76b0SSuyog Pawar #else
5465*c83a76b0SSuyog Pawar hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5466*c83a76b0SSuyog Pawar #endif
5467*c83a76b0SSuyog Pawar
5468*c83a76b0SSuyog Pawar /* Initialise data for all nodes in the CU tree */
5469*c83a76b0SSuyog Pawar hme_build_cu_tree(
5470*c83a76b0SSuyog Pawar ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5471*c83a76b0SSuyog Pawar
5472*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5473*c83a76b0SSuyog Pawar {
5474*c83a76b0SSuyog Pawar memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5475*c83a76b0SSuyog Pawar }
5476*c83a76b0SSuyog Pawar
5477*c83a76b0SSuyog Pawar #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5478*c83a76b0SSuyog Pawar return;
5479*c83a76b0SSuyog Pawar #endif
5480*c83a76b0SSuyog Pawar
5481*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
5482*c83a76b0SSuyog Pawar {
5483*c83a76b0SSuyog Pawar S32 blk_8x8_mask;
5484*c83a76b0SSuyog Pawar S32 is_16x16_blk_valid;
5485*c83a76b0SSuyog Pawar S32 num_clusters_updated;
5486*c83a76b0SSuyog Pawar S32 num_clusters;
5487*c83a76b0SSuyog Pawar
5488*c83a76b0SSuyog Pawar blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5489*c83a76b0SSuyog Pawar
5490*c83a76b0SSuyog Pawar ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5491*c83a76b0SSuyog Pawar
5492*c83a76b0SSuyog Pawar is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5493*c83a76b0SSuyog Pawar
5494*c83a76b0SSuyog Pawar if(is_16x16_blk_valid)
5495*c83a76b0SSuyog Pawar {
5496*c83a76b0SSuyog Pawar /* Use 8x8 data when 16x16 CU is split */
5497*c83a76b0SSuyog Pawar if(ps_search_results[i].u1_split_flag)
5498*c83a76b0SSuyog Pawar {
5499*c83a76b0SSuyog Pawar S32 blk_8x8_idx = i << 2;
5500*c83a76b0SSuyog Pawar
5501*c83a76b0SSuyog Pawar num_parts = 4;
5502*c83a76b0SSuyog Pawar e_part_type = PRT_NxN;
5503*c83a76b0SSuyog Pawar
5504*c83a76b0SSuyog Pawar for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5505*c83a76b0SSuyog Pawar {
5506*c83a76b0SSuyog Pawar /* Only 2Nx2N partition supported for 8x8 block */
5507*c83a76b0SSuyog Pawar ASSERT(
5508*c83a76b0SSuyog Pawar ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5509*c83a76b0SSuyog Pawar ((PART_TYPE_T)PRT_2Nx2N));
5510*c83a76b0SSuyog Pawar
5511*c83a76b0SSuyog Pawar aps_part_result[j] =
5512*c83a76b0SSuyog Pawar &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5513*c83a76b0SSuyog Pawar aps_inferior_parts[j] =
5514*c83a76b0SSuyog Pawar &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5515*c83a76b0SSuyog Pawar ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5516*c83a76b0SSuyog Pawar }
5517*c83a76b0SSuyog Pawar }
5518*c83a76b0SSuyog Pawar else
5519*c83a76b0SSuyog Pawar {
5520*c83a76b0SSuyog Pawar ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5521*c83a76b0SSuyog Pawar
5522*c83a76b0SSuyog Pawar e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5523*c83a76b0SSuyog Pawar num_parts = gau1_num_parts_in_part_type[e_part_type];
5524*c83a76b0SSuyog Pawar
5525*c83a76b0SSuyog Pawar for(j = 0; j < num_parts; j++)
5526*c83a76b0SSuyog Pawar {
5527*c83a76b0SSuyog Pawar aps_part_result[j] = &ps_best_result->as_pu_results[j];
5528*c83a76b0SSuyog Pawar aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5529*c83a76b0SSuyog Pawar ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5530*c83a76b0SSuyog Pawar }
5531*c83a76b0SSuyog Pawar
5532*c83a76b0SSuyog Pawar ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5533*c83a76b0SSuyog Pawar }
5534*c83a76b0SSuyog Pawar
5535*c83a76b0SSuyog Pawar for(j = 0; j < num_parts; j++)
5536*c83a76b0SSuyog Pawar {
5537*c83a76b0SSuyog Pawar pu_result_t *ps_part_result = aps_part_result[j];
5538*c83a76b0SSuyog Pawar
5539*c83a76b0SSuyog Pawar S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5540*c83a76b0SSuyog Pawar
5541*c83a76b0SSuyog Pawar e_part_id = ge_part_type_to_part_id[e_part_type][j];
5542*c83a76b0SSuyog Pawar
5543*c83a76b0SSuyog Pawar /* Skip clustering if best mode is intra */
5544*c83a76b0SSuyog Pawar if((ps_part_result->pu.b1_intra_flag))
5545*c83a76b0SSuyog Pawar {
5546*c83a76b0SSuyog Pawar ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5547*c83a76b0SSuyog Pawar ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5548*c83a76b0SSuyog Pawar continue;
5549*c83a76b0SSuyog Pawar }
5550*c83a76b0SSuyog Pawar else
5551*c83a76b0SSuyog Pawar {
5552*c83a76b0SSuyog Pawar ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5553*c83a76b0SSuyog Pawar }
5554*c83a76b0SSuyog Pawar
5555*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5556*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5557*c83a76b0SSuyog Pawar {
5558*c83a76b0SSuyog Pawar continue;
5559*c83a76b0SSuyog Pawar }
5560*c83a76b0SSuyog Pawar #endif
5561*c83a76b0SSuyog Pawar
5562*c83a76b0SSuyog Pawar for(k = 0; k < num_mvs; k++)
5563*c83a76b0SSuyog Pawar {
5564*c83a76b0SSuyog Pawar mv_t *ps_mv;
5565*c83a76b0SSuyog Pawar
5566*c83a76b0SSuyog Pawar pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5567*c83a76b0SSuyog Pawar
5568*c83a76b0SSuyog Pawar S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5569*c83a76b0SSuyog Pawar
5570*c83a76b0SSuyog Pawar ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5571*c83a76b0SSuyog Pawar
5572*c83a76b0SSuyog Pawar mvx = ps_mv->i2_mvx;
5573*c83a76b0SSuyog Pawar mvy = ps_mv->i2_mvy;
5574*c83a76b0SSuyog Pawar
5575*c83a76b0SSuyog Pawar ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5576*c83a76b0SSuyog Pawar : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5577*c83a76b0SSuyog Pawar
5578*c83a76b0SSuyog Pawar num_clusters = ps_blk_16x16->num_clusters;
5579*c83a76b0SSuyog Pawar
5580*c83a76b0SSuyog Pawar hme_find_and_update_clusters(
5581*c83a76b0SSuyog Pawar ps_blk_16x16->as_cluster_data,
5582*c83a76b0SSuyog Pawar &(ps_blk_16x16->num_clusters),
5583*c83a76b0SSuyog Pawar mvx,
5584*c83a76b0SSuyog Pawar mvy,
5585*c83a76b0SSuyog Pawar ref_idx,
5586*c83a76b0SSuyog Pawar ps_part_result->i4_sdi,
5587*c83a76b0SSuyog Pawar e_part_id,
5588*c83a76b0SSuyog Pawar (ai4_pred_mode[j] == 2));
5589*c83a76b0SSuyog Pawar
5590*c83a76b0SSuyog Pawar num_clusters_updated = (ps_blk_16x16->num_clusters);
5591*c83a76b0SSuyog Pawar
5592*c83a76b0SSuyog Pawar ps_blk_16x16->au1_num_clusters[ref_idx] +=
5593*c83a76b0SSuyog Pawar (num_clusters_updated - num_clusters);
5594*c83a76b0SSuyog Pawar }
5595*c83a76b0SSuyog Pawar }
5596*c83a76b0SSuyog Pawar }
5597*c83a76b0SSuyog Pawar }
5598*c83a76b0SSuyog Pawar
5599*c83a76b0SSuyog Pawar /* Search for 32x32 clusters */
5600*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
5601*c83a76b0SSuyog Pawar {
5602*c83a76b0SSuyog Pawar S32 num_clusters_merged;
5603*c83a76b0SSuyog Pawar
5604*c83a76b0SSuyog Pawar S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5605*c83a76b0SSuyog Pawar
5606*c83a76b0SSuyog Pawar if(is_32x32_blk_valid)
5607*c83a76b0SSuyog Pawar {
5608*c83a76b0SSuyog Pawar ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5609*c83a76b0SSuyog Pawar ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5610*c83a76b0SSuyog Pawar
5611*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5612*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5613*c83a76b0SSuyog Pawar {
5614*c83a76b0SSuyog Pawar for(j = 0; j < 4; j++, ps_blk_16x16++)
5615*c83a76b0SSuyog Pawar {
5616*c83a76b0SSuyog Pawar ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5617*c83a76b0SSuyog Pawar
5618*c83a76b0SSuyog Pawar ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5619*c83a76b0SSuyog Pawar }
5620*c83a76b0SSuyog Pawar continue;
5621*c83a76b0SSuyog Pawar }
5622*c83a76b0SSuyog Pawar #endif
5623*c83a76b0SSuyog Pawar
5624*c83a76b0SSuyog Pawar hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5625*c83a76b0SSuyog Pawar
5626*c83a76b0SSuyog Pawar if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5627*c83a76b0SSuyog Pawar {
5628*c83a76b0SSuyog Pawar num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5629*c83a76b0SSuyog Pawar ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5630*c83a76b0SSuyog Pawar
5631*c83a76b0SSuyog Pawar if(num_clusters_merged)
5632*c83a76b0SSuyog Pawar {
5633*c83a76b0SSuyog Pawar ps_blk_32x32->num_clusters -= num_clusters_merged;
5634*c83a76b0SSuyog Pawar
5635*c83a76b0SSuyog Pawar UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5636*c83a76b0SSuyog Pawar }
5637*c83a76b0SSuyog Pawar }
5638*c83a76b0SSuyog Pawar }
5639*c83a76b0SSuyog Pawar }
5640*c83a76b0SSuyog Pawar
5641*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5642*c83a76b0SSuyog Pawar /* Eliminate outlier 32x32 clusters */
5643*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5644*c83a76b0SSuyog Pawar #endif
5645*c83a76b0SSuyog Pawar {
5646*c83a76b0SSuyog Pawar hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5647*c83a76b0SSuyog Pawar
5648*c83a76b0SSuyog Pawar /* Find best_uni_ref and best_alt_ref */
5649*c83a76b0SSuyog Pawar hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5650*c83a76b0SSuyog Pawar }
5651*c83a76b0SSuyog Pawar
5652*c83a76b0SSuyog Pawar /* Populate the CU tree for depths 1 and higher */
5653*c83a76b0SSuyog Pawar {
5654*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5655*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5656*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5657*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5658*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5659*c83a76b0SSuyog Pawar
5660*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5661*c83a76b0SSuyog Pawar ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5662*c83a76b0SSuyog Pawar
5663*c83a76b0SSuyog Pawar num_32x32_merges += (ps_tl->is_node_valid == 1);
5664*c83a76b0SSuyog Pawar
5665*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5666*c83a76b0SSuyog Pawar ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5667*c83a76b0SSuyog Pawar
5668*c83a76b0SSuyog Pawar num_32x32_merges += (ps_tr->is_node_valid == 1);
5669*c83a76b0SSuyog Pawar
5670*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5671*c83a76b0SSuyog Pawar ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5672*c83a76b0SSuyog Pawar
5673*c83a76b0SSuyog Pawar num_32x32_merges += (ps_bl->is_node_valid == 1);
5674*c83a76b0SSuyog Pawar
5675*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5676*c83a76b0SSuyog Pawar ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5677*c83a76b0SSuyog Pawar
5678*c83a76b0SSuyog Pawar num_32x32_merges += (ps_br->is_node_valid == 1);
5679*c83a76b0SSuyog Pawar }
5680*c83a76b0SSuyog Pawar
5681*c83a76b0SSuyog Pawar #if !ENABLE_4CTB_EVALUATION
5682*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5683*c83a76b0SSuyog Pawar {
5684*c83a76b0SSuyog Pawar enable_64x64_merge = (num_32x32_merges >= 3);
5685*c83a76b0SSuyog Pawar }
5686*c83a76b0SSuyog Pawar #else
5687*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5688*c83a76b0SSuyog Pawar {
5689*c83a76b0SSuyog Pawar enable_64x64_merge = 1;
5690*c83a76b0SSuyog Pawar }
5691*c83a76b0SSuyog Pawar #endif
5692*c83a76b0SSuyog Pawar
5693*c83a76b0SSuyog Pawar #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5694*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5695*c83a76b0SSuyog Pawar {
5696*c83a76b0SSuyog Pawar enable_64x64_merge = 1;
5697*c83a76b0SSuyog Pawar }
5698*c83a76b0SSuyog Pawar #else
5699*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5700*c83a76b0SSuyog Pawar {
5701*c83a76b0SSuyog Pawar enable_64x64_merge = (num_32x32_merges >= 3);
5702*c83a76b0SSuyog Pawar }
5703*c83a76b0SSuyog Pawar #endif
5704*c83a76b0SSuyog Pawar
5705*c83a76b0SSuyog Pawar if(enable_64x64_merge)
5706*c83a76b0SSuyog Pawar {
5707*c83a76b0SSuyog Pawar S32 num_clusters_merged;
5708*c83a76b0SSuyog Pawar
5709*c83a76b0SSuyog Pawar ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5710*c83a76b0SSuyog Pawar
5711*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5712*c83a76b0SSuyog Pawar if(e_quality_preset >= ME_HIGH_QUALITY)
5713*c83a76b0SSuyog Pawar {
5714*c83a76b0SSuyog Pawar for(j = 0; j < 4; j++, ps_blk_32x32++)
5715*c83a76b0SSuyog Pawar {
5716*c83a76b0SSuyog Pawar ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5717*c83a76b0SSuyog Pawar
5718*c83a76b0SSuyog Pawar ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5719*c83a76b0SSuyog Pawar }
5720*c83a76b0SSuyog Pawar }
5721*c83a76b0SSuyog Pawar else
5722*c83a76b0SSuyog Pawar #endif
5723*c83a76b0SSuyog Pawar {
5724*c83a76b0SSuyog Pawar hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5725*c83a76b0SSuyog Pawar
5726*c83a76b0SSuyog Pawar if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5727*c83a76b0SSuyog Pawar {
5728*c83a76b0SSuyog Pawar num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5729*c83a76b0SSuyog Pawar ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5730*c83a76b0SSuyog Pawar
5731*c83a76b0SSuyog Pawar if(num_clusters_merged)
5732*c83a76b0SSuyog Pawar {
5733*c83a76b0SSuyog Pawar ps_blk_64x64->num_clusters -= num_clusters_merged;
5734*c83a76b0SSuyog Pawar
5735*c83a76b0SSuyog Pawar UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5736*c83a76b0SSuyog Pawar }
5737*c83a76b0SSuyog Pawar }
5738*c83a76b0SSuyog Pawar }
5739*c83a76b0SSuyog Pawar
5740*c83a76b0SSuyog Pawar #if !ENABLE_4CTB_EVALUATION
5741*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5742*c83a76b0SSuyog Pawar {
5743*c83a76b0SSuyog Pawar S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5744*c83a76b0SSuyog Pawar S32 best_intra_cost =
5745*c83a76b0SSuyog Pawar ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5746*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep *
5747*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5748*c83a76b0SSuyog Pawar ? MAX_32BIT_VAL
5749*c83a76b0SSuyog Pawar : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5750*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep *
5751*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5752*c83a76b0SSuyog Pawar S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5753*c83a76b0SSuyog Pawar S32 cost_differential = (best_inter_cost - best_cost);
5754*c83a76b0SSuyog Pawar
5755*c83a76b0SSuyog Pawar enable_64x64_merge =
5756*c83a76b0SSuyog Pawar ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5757*c83a76b0SSuyog Pawar }
5758*c83a76b0SSuyog Pawar #endif
5759*c83a76b0SSuyog Pawar }
5760*c83a76b0SSuyog Pawar
5761*c83a76b0SSuyog Pawar if(enable_64x64_merge)
5762*c83a76b0SSuyog Pawar {
5763*c83a76b0SSuyog Pawar #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5764*c83a76b0SSuyog Pawar if(e_quality_preset < ME_HIGH_QUALITY)
5765*c83a76b0SSuyog Pawar #endif
5766*c83a76b0SSuyog Pawar {
5767*c83a76b0SSuyog Pawar hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5768*c83a76b0SSuyog Pawar
5769*c83a76b0SSuyog Pawar hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5770*c83a76b0SSuyog Pawar }
5771*c83a76b0SSuyog Pawar
5772*c83a76b0SSuyog Pawar hme_populate_cu_tree(
5773*c83a76b0SSuyog Pawar ps_ctb_cluster_info,
5774*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root,
5775*c83a76b0SSuyog Pawar 0,
5776*c83a76b0SSuyog Pawar e_quality_preset,
5777*c83a76b0SSuyog Pawar POS_NA,
5778*c83a76b0SSuyog Pawar POS_NA,
5779*c83a76b0SSuyog Pawar POS_NA);
5780*c83a76b0SSuyog Pawar }
5781*c83a76b0SSuyog Pawar }
5782*c83a76b0SSuyog Pawar #endif
5783*c83a76b0SSuyog Pawar
hme_merge_prms_init(hme_merge_prms_t * ps_prms,layer_ctxt_t * ps_curr_layer,refine_prms_t * ps_refine_prms,me_frm_ctxt_t * ps_me_ctxt,range_prms_t * ps_range_prms_rec,range_prms_t * ps_range_prms_inp,mv_grid_t ** pps_mv_grid,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_num_pred_dir,S32 i4_32x32_id,BLK_SIZE_T e_blk_size,ME_QUALITY_PRESETS_T e_me_quality_presets)5784*c83a76b0SSuyog Pawar static __inline void hme_merge_prms_init(
5785*c83a76b0SSuyog Pawar hme_merge_prms_t *ps_prms,
5786*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
5787*c83a76b0SSuyog Pawar refine_prms_t *ps_refine_prms,
5788*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_me_ctxt,
5789*c83a76b0SSuyog Pawar range_prms_t *ps_range_prms_rec,
5790*c83a76b0SSuyog Pawar range_prms_t *ps_range_prms_inp,
5791*c83a76b0SSuyog Pawar mv_grid_t **pps_mv_grid,
5792*c83a76b0SSuyog Pawar inter_ctb_prms_t *ps_inter_ctb_prms,
5793*c83a76b0SSuyog Pawar S32 i4_num_pred_dir,
5794*c83a76b0SSuyog Pawar S32 i4_32x32_id,
5795*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size,
5796*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_me_quality_presets)
5797*c83a76b0SSuyog Pawar {
5798*c83a76b0SSuyog Pawar S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5799*c83a76b0SSuyog Pawar S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5800*c83a76b0SSuyog Pawar
5801*c83a76b0SSuyog Pawar /* Currently not enabling segmentation info from prev layers */
5802*c83a76b0SSuyog Pawar ps_prms->i4_seg_info_avail = 0;
5803*c83a76b0SSuyog Pawar ps_prms->i4_part_mask = 0;
5804*c83a76b0SSuyog Pawar
5805*c83a76b0SSuyog Pawar /* Number of reference pics in which to do merge */
5806*c83a76b0SSuyog Pawar ps_prms->i4_num_ref = i4_num_pred_dir;
5807*c83a76b0SSuyog Pawar
5808*c83a76b0SSuyog Pawar /* Layer ctxt info */
5809*c83a76b0SSuyog Pawar ps_prms->ps_layer_ctxt = ps_curr_layer;
5810*c83a76b0SSuyog Pawar
5811*c83a76b0SSuyog Pawar ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5812*c83a76b0SSuyog Pawar
5813*c83a76b0SSuyog Pawar /* Top left, top right, bottom left and bottom right 16x16 units */
5814*c83a76b0SSuyog Pawar if(BLK_32x32 == e_blk_size)
5815*c83a76b0SSuyog Pawar {
5816*c83a76b0SSuyog Pawar ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5817*c83a76b0SSuyog Pawar ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5818*c83a76b0SSuyog Pawar ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5819*c83a76b0SSuyog Pawar ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5820*c83a76b0SSuyog Pawar
5821*c83a76b0SSuyog Pawar /* Merge results stored here */
5822*c83a76b0SSuyog Pawar ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5823*c83a76b0SSuyog Pawar
5824*c83a76b0SSuyog Pawar /* This could be lesser than the number of 16x16results generated*/
5825*c83a76b0SSuyog Pawar /* For now, keeping it to be same */
5826*c83a76b0SSuyog Pawar ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5827*c83a76b0SSuyog Pawar ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5828*c83a76b0SSuyog Pawar ps_prms->ps_results_grandchild = NULL;
5829*c83a76b0SSuyog Pawar }
5830*c83a76b0SSuyog Pawar else
5831*c83a76b0SSuyog Pawar {
5832*c83a76b0SSuyog Pawar ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5833*c83a76b0SSuyog Pawar ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5834*c83a76b0SSuyog Pawar ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5835*c83a76b0SSuyog Pawar ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5836*c83a76b0SSuyog Pawar
5837*c83a76b0SSuyog Pawar /* Merge results stored here */
5838*c83a76b0SSuyog Pawar ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5839*c83a76b0SSuyog Pawar
5840*c83a76b0SSuyog Pawar ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5841*c83a76b0SSuyog Pawar ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5842*c83a76b0SSuyog Pawar ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5843*c83a76b0SSuyog Pawar }
5844*c83a76b0SSuyog Pawar
5845*c83a76b0SSuyog Pawar if(i4_use_rec)
5846*c83a76b0SSuyog Pawar {
5847*c83a76b0SSuyog Pawar WORD32 ref_ctr;
5848*c83a76b0SSuyog Pawar
5849*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5850*c83a76b0SSuyog Pawar {
5851*c83a76b0SSuyog Pawar ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5852*c83a76b0SSuyog Pawar }
5853*c83a76b0SSuyog Pawar }
5854*c83a76b0SSuyog Pawar else
5855*c83a76b0SSuyog Pawar {
5856*c83a76b0SSuyog Pawar WORD32 ref_ctr;
5857*c83a76b0SSuyog Pawar
5858*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5859*c83a76b0SSuyog Pawar {
5860*c83a76b0SSuyog Pawar ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5861*c83a76b0SSuyog Pawar }
5862*c83a76b0SSuyog Pawar }
5863*c83a76b0SSuyog Pawar ps_prms->i4_use_rec = i4_use_rec;
5864*c83a76b0SSuyog Pawar
5865*c83a76b0SSuyog Pawar ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5866*c83a76b0SSuyog Pawar
5867*c83a76b0SSuyog Pawar ps_prms->pps_mv_grid = pps_mv_grid;
5868*c83a76b0SSuyog Pawar
5869*c83a76b0SSuyog Pawar ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5870*c83a76b0SSuyog Pawar
5871*c83a76b0SSuyog Pawar ps_prms->e_quality_preset = e_me_quality_presets;
5872*c83a76b0SSuyog Pawar ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5873*c83a76b0SSuyog Pawar ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5874*c83a76b0SSuyog Pawar ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5875*c83a76b0SSuyog Pawar }
5876*c83a76b0SSuyog Pawar
5877*c83a76b0SSuyog Pawar /**
5878*c83a76b0SSuyog Pawar ********************************************************************************
5879*c83a76b0SSuyog Pawar * @fn void hme_refine(me_ctxt_t *ps_ctxt,
5880*c83a76b0SSuyog Pawar * refine_layer_prms_t *ps_refine_prms)
5881*c83a76b0SSuyog Pawar *
5882*c83a76b0SSuyog Pawar * @brief Top level entry point for refinement ME
5883*c83a76b0SSuyog Pawar *
5884*c83a76b0SSuyog Pawar * @param[in,out] ps_ctxt: ME Handle
5885*c83a76b0SSuyog Pawar *
5886*c83a76b0SSuyog Pawar * @param[in] ps_refine_prms : refinement layer prms
5887*c83a76b0SSuyog Pawar *
5888*c83a76b0SSuyog Pawar * @return None
5889*c83a76b0SSuyog Pawar ********************************************************************************
5890*c83a76b0SSuyog Pawar */
hme_refine(me_ctxt_t * ps_thrd_ctxt,refine_prms_t * ps_refine_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,layer_ctxt_t * ps_coarse_layer,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,S32 thrd_id,S32 me_frm_id,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input)5891*c83a76b0SSuyog Pawar void hme_refine(
5892*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt,
5893*c83a76b0SSuyog Pawar refine_prms_t *ps_refine_prms,
5894*c83a76b0SSuyog Pawar PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5895*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer,
5896*c83a76b0SSuyog Pawar multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5897*c83a76b0SSuyog Pawar S32 lyr_job_type,
5898*c83a76b0SSuyog Pawar S32 thrd_id,
5899*c83a76b0SSuyog Pawar S32 me_frm_id,
5900*c83a76b0SSuyog Pawar pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5901*c83a76b0SSuyog Pawar {
5902*c83a76b0SSuyog Pawar inter_ctb_prms_t s_common_frm_prms;
5903*c83a76b0SSuyog Pawar
5904*c83a76b0SSuyog Pawar BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5905*c83a76b0SSuyog Pawar WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5906*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5907*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_me_quality_presets =
5908*c83a76b0SSuyog Pawar ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5909*c83a76b0SSuyog Pawar
5910*c83a76b0SSuyog Pawar WORD32 num_rows_proc = 0;
5911*c83a76b0SSuyog Pawar WORD32 num_act_ref_pics;
5912*c83a76b0SSuyog Pawar WORD16 i2_prev_enc_frm_max_mv_y;
5913*c83a76b0SSuyog Pawar WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5914*c83a76b0SSuyog Pawar
5915*c83a76b0SSuyog Pawar /*************************************************************************/
5916*c83a76b0SSuyog Pawar /* Complexity of search: Low to High */
5917*c83a76b0SSuyog Pawar /*************************************************************************/
5918*c83a76b0SSuyog Pawar SEARCH_COMPLEXITY_T e_search_complexity;
5919*c83a76b0SSuyog Pawar
5920*c83a76b0SSuyog Pawar /*************************************************************************/
5921*c83a76b0SSuyog Pawar /* to store the PU results which are passed to the decide_part_types */
5922*c83a76b0SSuyog Pawar /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5923*c83a76b0SSuyog Pawar /*************************************************************************/
5924*c83a76b0SSuyog Pawar
5925*c83a76b0SSuyog Pawar pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5926*c83a76b0SSuyog Pawar inter_pu_results_t as_inter_pu_results[4];
5927*c83a76b0SSuyog Pawar inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5928*c83a76b0SSuyog Pawar
5929*c83a76b0SSuyog Pawar /*************************************************************************/
5930*c83a76b0SSuyog Pawar /* Config parameter structures for varius ME submodules */
5931*c83a76b0SSuyog Pawar /*************************************************************************/
5932*c83a76b0SSuyog Pawar hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5933*c83a76b0SSuyog Pawar hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5934*c83a76b0SSuyog Pawar hme_merge_prms_t s_merge_prms_64x64;
5935*c83a76b0SSuyog Pawar hme_search_prms_t s_search_prms_blk;
5936*c83a76b0SSuyog Pawar mvbank_update_prms_t s_mv_update_prms;
5937*c83a76b0SSuyog Pawar hme_ctb_prms_t s_ctb_prms;
5938*c83a76b0SSuyog Pawar hme_subpel_prms_t s_subpel_prms;
5939*c83a76b0SSuyog Pawar fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5940*c83a76b0SSuyog Pawar ctb_cluster_info_t *ps_ctb_cluster_info;
5941*c83a76b0SSuyog Pawar fpel_srch_cand_init_data_t s_srch_cand_init_data;
5942*c83a76b0SSuyog Pawar
5943*c83a76b0SSuyog Pawar /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5944*c83a76b0SSuyog Pawar S32 en_merge_32x32;
5945*c83a76b0SSuyog Pawar /* 5 lsb's specify whether or not merge algorithm is required */
5946*c83a76b0SSuyog Pawar /* to be executed or not. Relevant only in PQ. Ought to be */
5947*c83a76b0SSuyog Pawar /* used in conjunction with en_merge_32x32 and */
5948*c83a76b0SSuyog Pawar /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5949*c83a76b0SSuyog Pawar /* required when all children are deemed to be intras */
5950*c83a76b0SSuyog Pawar S32 en_merge_execution;
5951*c83a76b0SSuyog Pawar
5952*c83a76b0SSuyog Pawar /*************************************************************************/
5953*c83a76b0SSuyog Pawar /* All types of search candidates for predictor based search. */
5954*c83a76b0SSuyog Pawar /*************************************************************************/
5955*c83a76b0SSuyog Pawar S32 num_init_candts = 0;
5956*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5957*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5958*c83a76b0SSuyog Pawar search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5959*c83a76b0SSuyog Pawar search_node_t as_top_neighbours[4], as_left_neighbours[3];
5960*c83a76b0SSuyog Pawar
5961*c83a76b0SSuyog Pawar pf_get_wt_inp fp_get_wt_inp;
5962*c83a76b0SSuyog Pawar
5963*c83a76b0SSuyog Pawar search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5964*c83a76b0SSuyog Pawar U32 au4_unique_node_map[MAP_X_MAX * 2];
5965*c83a76b0SSuyog Pawar
5966*c83a76b0SSuyog Pawar /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5967*c83a76b0SSuyog Pawar ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5968*c83a76b0SSuyog Pawar
5969*c83a76b0SSuyog Pawar /*************************************************************************/
5970*c83a76b0SSuyog Pawar /* points ot the search results for the blk level search (8x8/16x16) */
5971*c83a76b0SSuyog Pawar /*************************************************************************/
5972*c83a76b0SSuyog Pawar search_results_t *ps_search_results;
5973*c83a76b0SSuyog Pawar
5974*c83a76b0SSuyog Pawar /*************************************************************************/
5975*c83a76b0SSuyog Pawar /* Coordinates */
5976*c83a76b0SSuyog Pawar /*************************************************************************/
5977*c83a76b0SSuyog Pawar S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5978*c83a76b0SSuyog Pawar S32 pos_x, pos_y;
5979*c83a76b0SSuyog Pawar S32 blk_id_in_full_ctb;
5980*c83a76b0SSuyog Pawar
5981*c83a76b0SSuyog Pawar /*************************************************************************/
5982*c83a76b0SSuyog Pawar /* Related to dimensions of block being searched and pic dimensions */
5983*c83a76b0SSuyog Pawar /*************************************************************************/
5984*c83a76b0SSuyog Pawar S32 blk_4x4_to_16x16;
5985*c83a76b0SSuyog Pawar S32 blk_wd, blk_ht, blk_size_shift;
5986*c83a76b0SSuyog Pawar S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5987*c83a76b0SSuyog Pawar S32 num_results_prev_layer;
5988*c83a76b0SSuyog Pawar
5989*c83a76b0SSuyog Pawar /*************************************************************************/
5990*c83a76b0SSuyog Pawar /* Size of a basic unit for this layer. For non encode layers, we search */
5991*c83a76b0SSuyog Pawar /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5992*c83a76b0SSuyog Pawar /* basic unit size is the ctb size. */
5993*c83a76b0SSuyog Pawar /*************************************************************************/
5994*c83a76b0SSuyog Pawar S32 unit_size;
5995*c83a76b0SSuyog Pawar
5996*c83a76b0SSuyog Pawar /*************************************************************************/
5997*c83a76b0SSuyog Pawar /* Local variable storing results of any 4 CU merge to bigger CU */
5998*c83a76b0SSuyog Pawar /*************************************************************************/
5999*c83a76b0SSuyog Pawar CU_MERGE_RESULT_T e_merge_result;
6000*c83a76b0SSuyog Pawar
6001*c83a76b0SSuyog Pawar /*************************************************************************/
6002*c83a76b0SSuyog Pawar /* This mv grid stores results during and after fpel search, during */
6003*c83a76b0SSuyog Pawar /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
6004*c83a76b0SSuyog Pawar /* meant for the 2 directions of search (l0 and l1). */
6005*c83a76b0SSuyog Pawar /*************************************************************************/
6006*c83a76b0SSuyog Pawar mv_grid_t *aps_mv_grid[2];
6007*c83a76b0SSuyog Pawar
6008*c83a76b0SSuyog Pawar /*************************************************************************/
6009*c83a76b0SSuyog Pawar /* Pointers to context in current and coarser layers */
6010*c83a76b0SSuyog Pawar /*************************************************************************/
6011*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
6012*c83a76b0SSuyog Pawar
6013*c83a76b0SSuyog Pawar /*************************************************************************/
6014*c83a76b0SSuyog Pawar /* to store mv range per blk, and picture limit, allowed search range */
6015*c83a76b0SSuyog Pawar /* range prms in hpel and qpel units as well */
6016*c83a76b0SSuyog Pawar /*************************************************************************/
6017*c83a76b0SSuyog Pawar range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6018*c83a76b0SSuyog Pawar range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6019*c83a76b0SSuyog Pawar range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6020*c83a76b0SSuyog Pawar
6021*c83a76b0SSuyog Pawar /*************************************************************************/
6022*c83a76b0SSuyog Pawar /* These variables are used to track number of references at different */
6023*c83a76b0SSuyog Pawar /* stages of ME. */
6024*c83a76b0SSuyog Pawar /*************************************************************************/
6025*c83a76b0SSuyog Pawar S32 i4_num_pred_dir;
6026*c83a76b0SSuyog Pawar S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6027*c83a76b0SSuyog Pawar S32 lambda_recon = ps_refine_prms->lambda_recon;
6028*c83a76b0SSuyog Pawar
6029*c83a76b0SSuyog Pawar /* Counts successful merge to 32x32 every CTB (0-4) */
6030*c83a76b0SSuyog Pawar S32 merge_count_32x32;
6031*c83a76b0SSuyog Pawar
6032*c83a76b0SSuyog Pawar S32 ai4_id_coloc[14], ai4_id_Z[2];
6033*c83a76b0SSuyog Pawar U08 au1_search_candidate_list_index[2];
6034*c83a76b0SSuyog Pawar S32 ai4_num_coloc_cands[2];
6035*c83a76b0SSuyog Pawar U08 u1_pred_dir, u1_pred_dir_ctr;
6036*c83a76b0SSuyog Pawar
6037*c83a76b0SSuyog Pawar /*************************************************************************/
6038*c83a76b0SSuyog Pawar /* Input pointer and stride */
6039*c83a76b0SSuyog Pawar /*************************************************************************/
6040*c83a76b0SSuyog Pawar U08 *pu1_inp;
6041*c83a76b0SSuyog Pawar S32 i4_inp_stride;
6042*c83a76b0SSuyog Pawar S32 end_of_frame;
6043*c83a76b0SSuyog Pawar S32 num_sync_units_in_row, num_sync_units_in_tile;
6044*c83a76b0SSuyog Pawar
6045*c83a76b0SSuyog Pawar /*************************************************************************/
6046*c83a76b0SSuyog Pawar /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6047*c83a76b0SSuyog Pawar /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6048*c83a76b0SSuyog Pawar /* we need to stop merges and force 8x8 CUs for that 16x16 blk */
6049*c83a76b0SSuyog Pawar /*************************************************************************/
6050*c83a76b0SSuyog Pawar S32 blk_8x8_mask;
6051*c83a76b0SSuyog Pawar S32 ai4_blk_8x8_mask[16];
6052*c83a76b0SSuyog Pawar U08 au1_is_64x64Blk_noisy[1];
6053*c83a76b0SSuyog Pawar U08 au1_is_32x32Blk_noisy[4];
6054*c83a76b0SSuyog Pawar U08 au1_is_16x16Blk_noisy[16];
6055*c83a76b0SSuyog Pawar
6056*c83a76b0SSuyog Pawar ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6057*c83a76b0SSuyog Pawar ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6058*c83a76b0SSuyog Pawar ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6059*c83a76b0SSuyog Pawar ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6060*c83a76b0SSuyog Pawar
6061*c83a76b0SSuyog Pawar ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6062*c83a76b0SSuyog Pawar
6063*c83a76b0SSuyog Pawar /*************************************************************************/
6064*c83a76b0SSuyog Pawar /* Pointers to current and coarse layer are needed for projection */
6065*c83a76b0SSuyog Pawar /* Pointer to prev layer are needed for other candts like coloc */
6066*c83a76b0SSuyog Pawar /*************************************************************************/
6067*c83a76b0SSuyog Pawar ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6068*c83a76b0SSuyog Pawar
6069*c83a76b0SSuyog Pawar ps_prev_layer = hme_get_past_layer_ctxt(
6070*c83a76b0SSuyog Pawar ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6071*c83a76b0SSuyog Pawar
6072*c83a76b0SSuyog Pawar num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6073*c83a76b0SSuyog Pawar
6074*c83a76b0SSuyog Pawar /* Function pointer is selected based on the C vc X86 macro */
6075*c83a76b0SSuyog Pawar
6076*c83a76b0SSuyog Pawar fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6077*c83a76b0SSuyog Pawar
6078*c83a76b0SSuyog Pawar i4_inp_stride = ps_curr_layer->i4_inp_stride;
6079*c83a76b0SSuyog Pawar i4_pic_wd = ps_curr_layer->i4_wd;
6080*c83a76b0SSuyog Pawar i4_pic_ht = ps_curr_layer->i4_ht;
6081*c83a76b0SSuyog Pawar e_search_complexity = ps_refine_prms->e_search_complexity;
6082*c83a76b0SSuyog Pawar end_of_frame = 0;
6083*c83a76b0SSuyog Pawar
6084*c83a76b0SSuyog Pawar /* This points to all the initial candts */
6085*c83a76b0SSuyog Pawar ps_search_candts = &as_search_candts[0];
6086*c83a76b0SSuyog Pawar
6087*c83a76b0SSuyog Pawar /* mv grid being huge strucutre is part of context */
6088*c83a76b0SSuyog Pawar aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6089*c83a76b0SSuyog Pawar aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6090*c83a76b0SSuyog Pawar
6091*c83a76b0SSuyog Pawar /*************************************************************************/
6092*c83a76b0SSuyog Pawar /* If the current layer is encoded (since it may be multicast or final */
6093*c83a76b0SSuyog Pawar /* layer (finest)), then we use 16x16 blk size with some selected parts */
6094*c83a76b0SSuyog Pawar /* If the current layer is not encoded, then we use 8x8 blk size, with */
6095*c83a76b0SSuyog Pawar /* enable or disable of 4x4 partitions depending on the input prms */
6096*c83a76b0SSuyog Pawar /*************************************************************************/
6097*c83a76b0SSuyog Pawar e_search_blk_size = BLK_16x16;
6098*c83a76b0SSuyog Pawar blk_wd = blk_ht = 16;
6099*c83a76b0SSuyog Pawar blk_size_shift = 4;
6100*c83a76b0SSuyog Pawar e_result_blk_size = BLK_8x8;
6101*c83a76b0SSuyog Pawar s_mv_update_prms.i4_shift = 1;
6102*c83a76b0SSuyog Pawar
6103*c83a76b0SSuyog Pawar if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6104*c83a76b0SSuyog Pawar {
6105*c83a76b0SSuyog Pawar blk_4x4_to_16x16 = 1;
6106*c83a76b0SSuyog Pawar }
6107*c83a76b0SSuyog Pawar else
6108*c83a76b0SSuyog Pawar {
6109*c83a76b0SSuyog Pawar blk_4x4_to_16x16 = 0;
6110*c83a76b0SSuyog Pawar }
6111*c83a76b0SSuyog Pawar
6112*c83a76b0SSuyog Pawar unit_size = 1 << ps_ctxt->log_ctb_size;
6113*c83a76b0SSuyog Pawar s_search_prms_blk.i4_inp_stride = unit_size;
6114*c83a76b0SSuyog Pawar
6115*c83a76b0SSuyog Pawar /* This is required to properly update the layer mv bank */
6116*c83a76b0SSuyog Pawar s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6117*c83a76b0SSuyog Pawar s_search_prms_blk.e_blk_size = e_search_blk_size;
6118*c83a76b0SSuyog Pawar
6119*c83a76b0SSuyog Pawar /*************************************************************************/
6120*c83a76b0SSuyog Pawar /* If current layer is explicit, then the number of ref frames are to */
6121*c83a76b0SSuyog Pawar /* be same as previous layer. Else it will be 2 */
6122*c83a76b0SSuyog Pawar /*************************************************************************/
6123*c83a76b0SSuyog Pawar i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6124*c83a76b0SSuyog Pawar i4_num_pred_dir =
6125*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6126*c83a76b0SSuyog Pawar 1;
6127*c83a76b0SSuyog Pawar
6128*c83a76b0SSuyog Pawar #if USE_MODIFIED == 1
6129*c83a76b0SSuyog Pawar s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6130*c83a76b0SSuyog Pawar #else
6131*c83a76b0SSuyog Pawar s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6132*c83a76b0SSuyog Pawar #endif
6133*c83a76b0SSuyog Pawar
6134*c83a76b0SSuyog Pawar i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6135*c83a76b0SSuyog Pawar if(i4_num_ref_prev_layer <= 2)
6136*c83a76b0SSuyog Pawar {
6137*c83a76b0SSuyog Pawar i4_num_ref_each_dir = 1;
6138*c83a76b0SSuyog Pawar }
6139*c83a76b0SSuyog Pawar else
6140*c83a76b0SSuyog Pawar {
6141*c83a76b0SSuyog Pawar i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6142*c83a76b0SSuyog Pawar }
6143*c83a76b0SSuyog Pawar
6144*c83a76b0SSuyog Pawar s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6145*c83a76b0SSuyog Pawar s_mv_update_prms.i4_num_results_to_store =
6146*c83a76b0SSuyog Pawar MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6147*c83a76b0SSuyog Pawar : (i4_num_act_ref_l0 > 1) + 1,
6148*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part);
6149*c83a76b0SSuyog Pawar
6150*c83a76b0SSuyog Pawar /*************************************************************************/
6151*c83a76b0SSuyog Pawar /* Initialization of merge params for 16x16 to 32x32 merge. */
6152*c83a76b0SSuyog Pawar /* There are 4 32x32 units in a CTB, so 4 param structures initialized */
6153*c83a76b0SSuyog Pawar /*************************************************************************/
6154*c83a76b0SSuyog Pawar {
6155*c83a76b0SSuyog Pawar hme_merge_prms_t *aps_merge_prms[4];
6156*c83a76b0SSuyog Pawar aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6157*c83a76b0SSuyog Pawar aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6158*c83a76b0SSuyog Pawar aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6159*c83a76b0SSuyog Pawar aps_merge_prms[3] = &s_merge_prms_32x32_br;
6160*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
6161*c83a76b0SSuyog Pawar {
6162*c83a76b0SSuyog Pawar hme_merge_prms_init(
6163*c83a76b0SSuyog Pawar aps_merge_prms[i],
6164*c83a76b0SSuyog Pawar ps_curr_layer,
6165*c83a76b0SSuyog Pawar ps_refine_prms,
6166*c83a76b0SSuyog Pawar ps_ctxt,
6167*c83a76b0SSuyog Pawar as_range_prms_rec,
6168*c83a76b0SSuyog Pawar as_range_prms_inp,
6169*c83a76b0SSuyog Pawar &aps_mv_grid[0],
6170*c83a76b0SSuyog Pawar &s_common_frm_prms,
6171*c83a76b0SSuyog Pawar i4_num_pred_dir,
6172*c83a76b0SSuyog Pawar i,
6173*c83a76b0SSuyog Pawar BLK_32x32,
6174*c83a76b0SSuyog Pawar e_me_quality_presets);
6175*c83a76b0SSuyog Pawar }
6176*c83a76b0SSuyog Pawar }
6177*c83a76b0SSuyog Pawar
6178*c83a76b0SSuyog Pawar /*************************************************************************/
6179*c83a76b0SSuyog Pawar /* Initialization of merge params for 32x32 to 64x64 merge. */
6180*c83a76b0SSuyog Pawar /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */
6181*c83a76b0SSuyog Pawar /*************************************************************************/
6182*c83a76b0SSuyog Pawar {
6183*c83a76b0SSuyog Pawar hme_merge_prms_init(
6184*c83a76b0SSuyog Pawar &s_merge_prms_64x64,
6185*c83a76b0SSuyog Pawar ps_curr_layer,
6186*c83a76b0SSuyog Pawar ps_refine_prms,
6187*c83a76b0SSuyog Pawar ps_ctxt,
6188*c83a76b0SSuyog Pawar as_range_prms_rec,
6189*c83a76b0SSuyog Pawar as_range_prms_inp,
6190*c83a76b0SSuyog Pawar &aps_mv_grid[0],
6191*c83a76b0SSuyog Pawar &s_common_frm_prms,
6192*c83a76b0SSuyog Pawar i4_num_pred_dir,
6193*c83a76b0SSuyog Pawar 0,
6194*c83a76b0SSuyog Pawar BLK_64x64,
6195*c83a76b0SSuyog Pawar e_me_quality_presets);
6196*c83a76b0SSuyog Pawar }
6197*c83a76b0SSuyog Pawar
6198*c83a76b0SSuyog Pawar /* Pointers to cu_results are initialised here */
6199*c83a76b0SSuyog Pawar {
6200*c83a76b0SSuyog Pawar WORD32 i;
6201*c83a76b0SSuyog Pawar
6202*c83a76b0SSuyog Pawar ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6203*c83a76b0SSuyog Pawar
6204*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
6205*c83a76b0SSuyog Pawar {
6206*c83a76b0SSuyog Pawar ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6207*c83a76b0SSuyog Pawar }
6208*c83a76b0SSuyog Pawar
6209*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
6210*c83a76b0SSuyog Pawar {
6211*c83a76b0SSuyog Pawar ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6212*c83a76b0SSuyog Pawar }
6213*c83a76b0SSuyog Pawar }
6214*c83a76b0SSuyog Pawar
6215*c83a76b0SSuyog Pawar /*************************************************************************/
6216*c83a76b0SSuyog Pawar /* SUBPEL Params initialized here */
6217*c83a76b0SSuyog Pawar /*************************************************************************/
6218*c83a76b0SSuyog Pawar {
6219*c83a76b0SSuyog Pawar s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6220*c83a76b0SSuyog Pawar s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6221*c83a76b0SSuyog Pawar s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6222*c83a76b0SSuyog Pawar
6223*c83a76b0SSuyog Pawar s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6224*c83a76b0SSuyog Pawar s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6225*c83a76b0SSuyog Pawar s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6226*c83a76b0SSuyog Pawar
6227*c83a76b0SSuyog Pawar s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6228*c83a76b0SSuyog Pawar s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6229*c83a76b0SSuyog Pawar
6230*c83a76b0SSuyog Pawar s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6231*c83a76b0SSuyog Pawar
6232*c83a76b0SSuyog Pawar s_subpel_prms.i4_inp_stride = unit_size;
6233*c83a76b0SSuyog Pawar
6234*c83a76b0SSuyog Pawar s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6235*c83a76b0SSuyog Pawar s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6236*c83a76b0SSuyog Pawar s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6237*c83a76b0SSuyog Pawar
6238*c83a76b0SSuyog Pawar s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6239*c83a76b0SSuyog Pawar
6240*c83a76b0SSuyog Pawar {
6241*c83a76b0SSuyog Pawar WORD32 ref_ctr;
6242*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6243*c83a76b0SSuyog Pawar {
6244*c83a76b0SSuyog Pawar s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6245*c83a76b0SSuyog Pawar s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6246*c83a76b0SSuyog Pawar }
6247*c83a76b0SSuyog Pawar }
6248*c83a76b0SSuyog Pawar s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6249*c83a76b0SSuyog Pawar
6250*c83a76b0SSuyog Pawar #if USE_MODIFIED == 0
6251*c83a76b0SSuyog Pawar s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6252*c83a76b0SSuyog Pawar #else
6253*c83a76b0SSuyog Pawar s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6254*c83a76b0SSuyog Pawar #endif
6255*c83a76b0SSuyog Pawar s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6256*c83a76b0SSuyog Pawar
6257*c83a76b0SSuyog Pawar /* BI Refinement done only if this field is 1 */
6258*c83a76b0SSuyog Pawar s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6259*c83a76b0SSuyog Pawar
6260*c83a76b0SSuyog Pawar s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6261*c83a76b0SSuyog Pawar
6262*c83a76b0SSuyog Pawar s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6263*c83a76b0SSuyog Pawar s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6264*c83a76b0SSuyog Pawar s_subpel_prms.u1_max_num_subpel_refine_centers =
6265*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_num_subpel_refine_centers;
6266*c83a76b0SSuyog Pawar }
6267*c83a76b0SSuyog Pawar
6268*c83a76b0SSuyog Pawar /* inter_ctb_prms_t struct initialisation */
6269*c83a76b0SSuyog Pawar {
6270*c83a76b0SSuyog Pawar inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6271*c83a76b0SSuyog Pawar hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6272*c83a76b0SSuyog Pawar
6273*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6274*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6275*c83a76b0SSuyog Pawar ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6276*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6277*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6278*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6279*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6280*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6281*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6282*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6283*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6284*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6285*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_lamda = lambda_recon;
6286*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6287*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6288*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6289*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6290*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6291*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6292*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6293*c83a76b0SSuyog Pawar ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6294*c83a76b0SSuyog Pawar }
6295*c83a76b0SSuyog Pawar
6296*c83a76b0SSuyog Pawar for(i = 0; i < MAX_INIT_CANDTS; i++)
6297*c83a76b0SSuyog Pawar {
6298*c83a76b0SSuyog Pawar ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6299*c83a76b0SSuyog Pawar ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6300*c83a76b0SSuyog Pawar
6301*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6302*c83a76b0SSuyog Pawar }
6303*c83a76b0SSuyog Pawar num_act_ref_pics =
6304*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6305*c83a76b0SSuyog Pawar
6306*c83a76b0SSuyog Pawar if(num_act_ref_pics)
6307*c83a76b0SSuyog Pawar {
6308*c83a76b0SSuyog Pawar hme_search_cand_data_init(
6309*c83a76b0SSuyog Pawar ai4_id_Z,
6310*c83a76b0SSuyog Pawar ai4_id_coloc,
6311*c83a76b0SSuyog Pawar ai4_num_coloc_cands,
6312*c83a76b0SSuyog Pawar au1_search_candidate_list_index,
6313*c83a76b0SSuyog Pawar i4_num_act_ref_l0,
6314*c83a76b0SSuyog Pawar i4_num_act_ref_l1,
6315*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.bidir_enabled,
6316*c83a76b0SSuyog Pawar blk_4x4_to_16x16);
6317*c83a76b0SSuyog Pawar }
6318*c83a76b0SSuyog Pawar
6319*c83a76b0SSuyog Pawar if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6320*c83a76b0SSuyog Pawar {
6321*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6322*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6323*c83a76b0SSuyog Pawar }
6324*c83a76b0SSuyog Pawar else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6325*c83a76b0SSuyog Pawar {
6326*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6327*c83a76b0SSuyog Pawar }
6328*c83a76b0SSuyog Pawar
6329*c83a76b0SSuyog Pawar for(i = 0; i < 3; i++)
6330*c83a76b0SSuyog Pawar {
6331*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
6332*c83a76b0SSuyog Pawar ps_search_node = &as_left_neighbours[i];
6333*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(ps_search_node, 0);
6334*c83a76b0SSuyog Pawar ps_search_node = &as_top_neighbours[i];
6335*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(ps_search_node, 0);
6336*c83a76b0SSuyog Pawar }
6337*c83a76b0SSuyog Pawar
6338*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6339*c83a76b0SSuyog Pawar as_left_neighbours[2].u1_is_avail = 0;
6340*c83a76b0SSuyog Pawar
6341*c83a76b0SSuyog Pawar /*************************************************************************/
6342*c83a76b0SSuyog Pawar /* Initialize all the search results structure here. We update all the */
6343*c83a76b0SSuyog Pawar /* search results to default values, and configure things like blk sizes */
6344*c83a76b0SSuyog Pawar /*************************************************************************/
6345*c83a76b0SSuyog Pawar if(num_act_ref_pics)
6346*c83a76b0SSuyog Pawar {
6347*c83a76b0SSuyog Pawar S32 i4_x, i4_y;
6348*c83a76b0SSuyog Pawar /* 16x16 results */
6349*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
6350*c83a76b0SSuyog Pawar {
6351*c83a76b0SSuyog Pawar search_results_t *ps_search_results;
6352*c83a76b0SSuyog Pawar S32 pred_lx;
6353*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6354*c83a76b0SSuyog Pawar i4_x = (S32)gau1_encode_to_raster_x[i];
6355*c83a76b0SSuyog Pawar i4_y = (S32)gau1_encode_to_raster_y[i];
6356*c83a76b0SSuyog Pawar i4_x <<= 4;
6357*c83a76b0SSuyog Pawar i4_y <<= 4;
6358*c83a76b0SSuyog Pawar
6359*c83a76b0SSuyog Pawar hme_init_search_results(
6360*c83a76b0SSuyog Pawar ps_search_results,
6361*c83a76b0SSuyog Pawar i4_num_pred_dir,
6362*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results,
6363*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part,
6364*c83a76b0SSuyog Pawar e_search_blk_size,
6365*c83a76b0SSuyog Pawar i4_x,
6366*c83a76b0SSuyog Pawar i4_y,
6367*c83a76b0SSuyog Pawar &ps_ctxt->au1_is_past[0]);
6368*c83a76b0SSuyog Pawar
6369*c83a76b0SSuyog Pawar for(pred_lx = 0; pred_lx < 2; pred_lx++)
6370*c83a76b0SSuyog Pawar {
6371*c83a76b0SSuyog Pawar pred_ctxt_t *ps_pred_ctxt;
6372*c83a76b0SSuyog Pawar
6373*c83a76b0SSuyog Pawar ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6374*c83a76b0SSuyog Pawar
6375*c83a76b0SSuyog Pawar hme_init_pred_ctxt_encode(
6376*c83a76b0SSuyog Pawar ps_pred_ctxt,
6377*c83a76b0SSuyog Pawar ps_search_results,
6378*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6379*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_Z[0]].ps_search_node,
6380*c83a76b0SSuyog Pawar aps_mv_grid[pred_lx],
6381*c83a76b0SSuyog Pawar pred_lx,
6382*c83a76b0SSuyog Pawar lambda_recon,
6383*c83a76b0SSuyog Pawar ps_refine_prms->lambda_q_shift,
6384*c83a76b0SSuyog Pawar &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6385*c83a76b0SSuyog Pawar &ps_ctxt->ai2_ref_scf[0]);
6386*c83a76b0SSuyog Pawar }
6387*c83a76b0SSuyog Pawar }
6388*c83a76b0SSuyog Pawar
6389*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
6390*c83a76b0SSuyog Pawar {
6391*c83a76b0SSuyog Pawar search_results_t *ps_search_results;
6392*c83a76b0SSuyog Pawar S32 pred_lx;
6393*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6394*c83a76b0SSuyog Pawar
6395*c83a76b0SSuyog Pawar i4_x = (S32)gau1_encode_to_raster_x[i];
6396*c83a76b0SSuyog Pawar i4_y = (S32)gau1_encode_to_raster_y[i];
6397*c83a76b0SSuyog Pawar i4_x <<= 5;
6398*c83a76b0SSuyog Pawar i4_y <<= 5;
6399*c83a76b0SSuyog Pawar
6400*c83a76b0SSuyog Pawar hme_init_search_results(
6401*c83a76b0SSuyog Pawar ps_search_results,
6402*c83a76b0SSuyog Pawar i4_num_pred_dir,
6403*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_32x32_merge_results,
6404*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part,
6405*c83a76b0SSuyog Pawar BLK_32x32,
6406*c83a76b0SSuyog Pawar i4_x,
6407*c83a76b0SSuyog Pawar i4_y,
6408*c83a76b0SSuyog Pawar &ps_ctxt->au1_is_past[0]);
6409*c83a76b0SSuyog Pawar
6410*c83a76b0SSuyog Pawar for(pred_lx = 0; pred_lx < 2; pred_lx++)
6411*c83a76b0SSuyog Pawar {
6412*c83a76b0SSuyog Pawar pred_ctxt_t *ps_pred_ctxt;
6413*c83a76b0SSuyog Pawar
6414*c83a76b0SSuyog Pawar ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6415*c83a76b0SSuyog Pawar
6416*c83a76b0SSuyog Pawar hme_init_pred_ctxt_encode(
6417*c83a76b0SSuyog Pawar ps_pred_ctxt,
6418*c83a76b0SSuyog Pawar ps_search_results,
6419*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6420*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_Z[0]].ps_search_node,
6421*c83a76b0SSuyog Pawar aps_mv_grid[pred_lx],
6422*c83a76b0SSuyog Pawar pred_lx,
6423*c83a76b0SSuyog Pawar lambda_recon,
6424*c83a76b0SSuyog Pawar ps_refine_prms->lambda_q_shift,
6425*c83a76b0SSuyog Pawar &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6426*c83a76b0SSuyog Pawar &ps_ctxt->ai2_ref_scf[0]);
6427*c83a76b0SSuyog Pawar }
6428*c83a76b0SSuyog Pawar }
6429*c83a76b0SSuyog Pawar
6430*c83a76b0SSuyog Pawar {
6431*c83a76b0SSuyog Pawar search_results_t *ps_search_results;
6432*c83a76b0SSuyog Pawar S32 pred_lx;
6433*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->s_search_results_64x64;
6434*c83a76b0SSuyog Pawar
6435*c83a76b0SSuyog Pawar hme_init_search_results(
6436*c83a76b0SSuyog Pawar ps_search_results,
6437*c83a76b0SSuyog Pawar i4_num_pred_dir,
6438*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_64x64_merge_results,
6439*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part,
6440*c83a76b0SSuyog Pawar BLK_64x64,
6441*c83a76b0SSuyog Pawar 0,
6442*c83a76b0SSuyog Pawar 0,
6443*c83a76b0SSuyog Pawar &ps_ctxt->au1_is_past[0]);
6444*c83a76b0SSuyog Pawar
6445*c83a76b0SSuyog Pawar for(pred_lx = 0; pred_lx < 2; pred_lx++)
6446*c83a76b0SSuyog Pawar {
6447*c83a76b0SSuyog Pawar pred_ctxt_t *ps_pred_ctxt;
6448*c83a76b0SSuyog Pawar
6449*c83a76b0SSuyog Pawar ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6450*c83a76b0SSuyog Pawar
6451*c83a76b0SSuyog Pawar hme_init_pred_ctxt_encode(
6452*c83a76b0SSuyog Pawar ps_pred_ctxt,
6453*c83a76b0SSuyog Pawar ps_search_results,
6454*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6455*c83a76b0SSuyog Pawar ps_search_candts[ai4_id_Z[0]].ps_search_node,
6456*c83a76b0SSuyog Pawar aps_mv_grid[pred_lx],
6457*c83a76b0SSuyog Pawar pred_lx,
6458*c83a76b0SSuyog Pawar lambda_recon,
6459*c83a76b0SSuyog Pawar ps_refine_prms->lambda_q_shift,
6460*c83a76b0SSuyog Pawar &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6461*c83a76b0SSuyog Pawar &ps_ctxt->ai2_ref_scf[0]);
6462*c83a76b0SSuyog Pawar }
6463*c83a76b0SSuyog Pawar }
6464*c83a76b0SSuyog Pawar }
6465*c83a76b0SSuyog Pawar
6466*c83a76b0SSuyog Pawar /* Initialise the structure used in clustering */
6467*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6468*c83a76b0SSuyog Pawar {
6469*c83a76b0SSuyog Pawar ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6470*c83a76b0SSuyog Pawar
6471*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6472*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6473*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6474*c83a76b0SSuyog Pawar ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6475*c83a76b0SSuyog Pawar ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6476*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6477*c83a76b0SSuyog Pawar ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6478*c83a76b0SSuyog Pawar }
6479*c83a76b0SSuyog Pawar
6480*c83a76b0SSuyog Pawar /*********************************************************************/
6481*c83a76b0SSuyog Pawar /* Initialize the dyn. search range params. for each reference index */
6482*c83a76b0SSuyog Pawar /* in current layer ctxt */
6483*c83a76b0SSuyog Pawar /*********************************************************************/
6484*c83a76b0SSuyog Pawar
6485*c83a76b0SSuyog Pawar /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6486*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6487*c83a76b0SSuyog Pawar {
6488*c83a76b0SSuyog Pawar WORD32 ref_ctr;
6489*c83a76b0SSuyog Pawar /* set no. of act ref in L0 for further use at frame level */
6490*c83a76b0SSuyog Pawar ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6491*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6492*c83a76b0SSuyog Pawar
6493*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6494*c83a76b0SSuyog Pawar {
6495*c83a76b0SSuyog Pawar INIT_DYN_SEARCH_PRMS(
6496*c83a76b0SSuyog Pawar &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6497*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6498*c83a76b0SSuyog Pawar }
6499*c83a76b0SSuyog Pawar }
6500*c83a76b0SSuyog Pawar /*************************************************************************/
6501*c83a76b0SSuyog Pawar /* Now that the candidates have been ordered, to choose the right number */
6502*c83a76b0SSuyog Pawar /* of initial candidates. */
6503*c83a76b0SSuyog Pawar /*************************************************************************/
6504*c83a76b0SSuyog Pawar if(blk_4x4_to_16x16)
6505*c83a76b0SSuyog Pawar {
6506*c83a76b0SSuyog Pawar if(i4_num_ref_prev_layer > 2)
6507*c83a76b0SSuyog Pawar {
6508*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
6509*c83a76b0SSuyog Pawar num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
6511*c83a76b0SSuyog Pawar num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6512*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
6513*c83a76b0SSuyog Pawar num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6514*c83a76b0SSuyog Pawar else
6515*c83a76b0SSuyog Pawar ASSERT(0);
6516*c83a76b0SSuyog Pawar }
6517*c83a76b0SSuyog Pawar else if(i4_num_ref_prev_layer == 2)
6518*c83a76b0SSuyog Pawar {
6519*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
6520*c83a76b0SSuyog Pawar num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6521*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
6522*c83a76b0SSuyog Pawar num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6523*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
6524*c83a76b0SSuyog Pawar num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6525*c83a76b0SSuyog Pawar else
6526*c83a76b0SSuyog Pawar ASSERT(0);
6527*c83a76b0SSuyog Pawar }
6528*c83a76b0SSuyog Pawar else
6529*c83a76b0SSuyog Pawar {
6530*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
6531*c83a76b0SSuyog Pawar num_init_candts = 5;
6532*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
6533*c83a76b0SSuyog Pawar num_init_candts = 12;
6534*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
6535*c83a76b0SSuyog Pawar num_init_candts = 19;
6536*c83a76b0SSuyog Pawar else
6537*c83a76b0SSuyog Pawar ASSERT(0);
6538*c83a76b0SSuyog Pawar }
6539*c83a76b0SSuyog Pawar }
6540*c83a76b0SSuyog Pawar else
6541*c83a76b0SSuyog Pawar {
6542*c83a76b0SSuyog Pawar if(i4_num_ref_prev_layer > 2)
6543*c83a76b0SSuyog Pawar {
6544*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
6545*c83a76b0SSuyog Pawar num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
6547*c83a76b0SSuyog Pawar num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6548*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
6549*c83a76b0SSuyog Pawar num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6550*c83a76b0SSuyog Pawar else
6551*c83a76b0SSuyog Pawar ASSERT(0);
6552*c83a76b0SSuyog Pawar }
6553*c83a76b0SSuyog Pawar else if(i4_num_ref_prev_layer == 2)
6554*c83a76b0SSuyog Pawar {
6555*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
6556*c83a76b0SSuyog Pawar num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6557*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
6558*c83a76b0SSuyog Pawar num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6559*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
6560*c83a76b0SSuyog Pawar num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6561*c83a76b0SSuyog Pawar else
6562*c83a76b0SSuyog Pawar ASSERT(0);
6563*c83a76b0SSuyog Pawar }
6564*c83a76b0SSuyog Pawar else
6565*c83a76b0SSuyog Pawar {
6566*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
6567*c83a76b0SSuyog Pawar num_init_candts = 5;
6568*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
6569*c83a76b0SSuyog Pawar num_init_candts = 11;
6570*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
6571*c83a76b0SSuyog Pawar num_init_candts = 16;
6572*c83a76b0SSuyog Pawar else
6573*c83a76b0SSuyog Pawar ASSERT(0);
6574*c83a76b0SSuyog Pawar }
6575*c83a76b0SSuyog Pawar }
6576*c83a76b0SSuyog Pawar
6577*c83a76b0SSuyog Pawar /*************************************************************************/
6578*c83a76b0SSuyog Pawar /* The following search parameters are fixed throughout the search across*/
6579*c83a76b0SSuyog Pawar /* all blks. So these are configured outside processing loop */
6580*c83a76b0SSuyog Pawar /*************************************************************************/
6581*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_init_candts = num_init_candts;
6582*c83a76b0SSuyog Pawar s_search_prms_blk.i4_start_step = 1;
6583*c83a76b0SSuyog Pawar s_search_prms_blk.i4_use_satd = 0;
6584*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6585*c83a76b0SSuyog Pawar /* we use recon only for encoded layers, otherwise it is not available */
6586*c83a76b0SSuyog Pawar s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6587*c83a76b0SSuyog Pawar
6588*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_candts = ps_search_candts;
6589*c83a76b0SSuyog Pawar if(s_search_prms_blk.i4_use_rec)
6590*c83a76b0SSuyog Pawar {
6591*c83a76b0SSuyog Pawar WORD32 ref_ctr;
6592*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6593*c83a76b0SSuyog Pawar s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6594*c83a76b0SSuyog Pawar }
6595*c83a76b0SSuyog Pawar else
6596*c83a76b0SSuyog Pawar {
6597*c83a76b0SSuyog Pawar WORD32 ref_ctr;
6598*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6599*c83a76b0SSuyog Pawar s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6600*c83a76b0SSuyog Pawar }
6601*c83a76b0SSuyog Pawar
6602*c83a76b0SSuyog Pawar /*************************************************************************/
6603*c83a76b0SSuyog Pawar /* Initialize coordinates. Meaning as follows */
6604*c83a76b0SSuyog Pawar /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
6605*c83a76b0SSuyog Pawar /* blk_y : same as above, y coord. */
6606*c83a76b0SSuyog Pawar /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
6607*c83a76b0SSuyog Pawar /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
6608*c83a76b0SSuyog Pawar /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
6609*c83a76b0SSuyog Pawar /* corner of the picture. Always multiple of 64. */
6610*c83a76b0SSuyog Pawar /* blk_id_in_ctb : encode order id of the blk in the ctb. */
6611*c83a76b0SSuyog Pawar /*************************************************************************/
6612*c83a76b0SSuyog Pawar blk_y = 0;
6613*c83a76b0SSuyog Pawar blk_id_in_ctb = 0;
6614*c83a76b0SSuyog Pawar i4_ctb_y = 0;
6615*c83a76b0SSuyog Pawar
6616*c83a76b0SSuyog Pawar /*************************************************************************/
6617*c83a76b0SSuyog Pawar /* Picture limit on all 4 sides. This will be used to set mv limits for */
6618*c83a76b0SSuyog Pawar /* every block given its coordinate. Note thsi assumes that the min amt */
6619*c83a76b0SSuyog Pawar /* of padding to right of pic is equal to the blk size. If we go all the */
6620*c83a76b0SSuyog Pawar /* way upto 64x64, then the min padding on right size of picture should */
6621*c83a76b0SSuyog Pawar /* be 64, and also on bottom side of picture. */
6622*c83a76b0SSuyog Pawar /*************************************************************************/
6623*c83a76b0SSuyog Pawar SET_PIC_LIMIT(
6624*c83a76b0SSuyog Pawar s_pic_limit_inp,
6625*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_x_rec,
6626*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_y_rec,
6627*c83a76b0SSuyog Pawar ps_curr_layer->i4_wd,
6628*c83a76b0SSuyog Pawar ps_curr_layer->i4_ht,
6629*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_steps_post_refine);
6630*c83a76b0SSuyog Pawar
6631*c83a76b0SSuyog Pawar SET_PIC_LIMIT(
6632*c83a76b0SSuyog Pawar s_pic_limit_rec,
6633*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_x_rec,
6634*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_y_rec,
6635*c83a76b0SSuyog Pawar ps_curr_layer->i4_wd,
6636*c83a76b0SSuyog Pawar ps_curr_layer->i4_ht,
6637*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_steps_post_refine);
6638*c83a76b0SSuyog Pawar
6639*c83a76b0SSuyog Pawar /*************************************************************************/
6640*c83a76b0SSuyog Pawar /* set the MV limit per ref. pic. */
6641*c83a76b0SSuyog Pawar /* - P pic. : Based on the config params. */
6642*c83a76b0SSuyog Pawar /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6643*c83a76b0SSuyog Pawar /*************************************************************************/
6644*c83a76b0SSuyog Pawar hme_set_mv_limit_using_dvsr_data(
6645*c83a76b0SSuyog Pawar ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6646*c83a76b0SSuyog Pawar s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6647*c83a76b0SSuyog Pawar s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6648*c83a76b0SSuyog Pawar s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6649*c83a76b0SSuyog Pawar s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6650*c83a76b0SSuyog Pawar s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6651*c83a76b0SSuyog Pawar s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6652*c83a76b0SSuyog Pawar s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6653*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6654*c83a76b0SSuyog Pawar s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6655*c83a76b0SSuyog Pawar s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6656*c83a76b0SSuyog Pawar s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6657*c83a76b0SSuyog Pawar
6658*c83a76b0SSuyog Pawar while(0 == end_of_frame)
6659*c83a76b0SSuyog Pawar {
6660*c83a76b0SSuyog Pawar job_queue_t *ps_job;
6661*c83a76b0SSuyog Pawar frm_ctb_ctxt_t *ps_frm_ctb_prms;
6662*c83a76b0SSuyog Pawar ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6663*c83a76b0SSuyog Pawar
6664*c83a76b0SSuyog Pawar WORD32 i4_max_mv_x_in_ctb;
6665*c83a76b0SSuyog Pawar WORD32 i4_max_mv_y_in_ctb;
6666*c83a76b0SSuyog Pawar void *pv_dep_mngr_encloop_dep_me;
6667*c83a76b0SSuyog Pawar WORD32 offset_val, check_dep_pos, set_dep_pos;
6668*c83a76b0SSuyog Pawar WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6669*c83a76b0SSuyog Pawar
6670*c83a76b0SSuyog Pawar pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6671*c83a76b0SSuyog Pawar
6672*c83a76b0SSuyog Pawar ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6673*c83a76b0SSuyog Pawar
6674*c83a76b0SSuyog Pawar /* Get the current row from the job queue */
6675*c83a76b0SSuyog Pawar ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6676*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6677*c83a76b0SSuyog Pawar
6678*c83a76b0SSuyog Pawar /* If all rows are done, set the end of process flag to 1, */
6679*c83a76b0SSuyog Pawar /* and the current row to -1 */
6680*c83a76b0SSuyog Pawar if(NULL == ps_job)
6681*c83a76b0SSuyog Pawar {
6682*c83a76b0SSuyog Pawar blk_y = -1;
6683*c83a76b0SSuyog Pawar i4_ctb_y = -1;
6684*c83a76b0SSuyog Pawar tile_col_idx = -1;
6685*c83a76b0SSuyog Pawar end_of_frame = 1;
6686*c83a76b0SSuyog Pawar
6687*c83a76b0SSuyog Pawar continue;
6688*c83a76b0SSuyog Pawar }
6689*c83a76b0SSuyog Pawar
6690*c83a76b0SSuyog Pawar /* set the output dependency after picking up the row */
6691*c83a76b0SSuyog Pawar ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6692*c83a76b0SSuyog Pawar
6693*c83a76b0SSuyog Pawar /* Obtain the current row's details from the job */
6694*c83a76b0SSuyog Pawar {
6695*c83a76b0SSuyog Pawar ihevce_tile_params_t *ps_col_tile_params;
6696*c83a76b0SSuyog Pawar
6697*c83a76b0SSuyog Pawar i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6698*c83a76b0SSuyog Pawar /* Obtain the current colum tile index from the job */
6699*c83a76b0SSuyog Pawar tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6700*c83a76b0SSuyog Pawar
6701*c83a76b0SSuyog Pawar /* in encode layer block are 16x16 and CTB is 64 x 64 */
6702*c83a76b0SSuyog Pawar /* note if ctb is 32x32 the this calc needs to be changed */
6703*c83a76b0SSuyog Pawar num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6704*c83a76b0SSuyog Pawar ps_ctxt->log_ctb_size;
6705*c83a76b0SSuyog Pawar
6706*c83a76b0SSuyog Pawar /* The tile parameter for the col. idx. Use only the properties
6707*c83a76b0SSuyog Pawar which is same for all the bottom tiles like width, start_x, etc.
6708*c83a76b0SSuyog Pawar Don't use height, start_y, etc. */
6709*c83a76b0SSuyog Pawar ps_col_tile_params =
6710*c83a76b0SSuyog Pawar ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6711*c83a76b0SSuyog Pawar /* in encode layer block are 16x16 and CTB is 64 x 64 */
6712*c83a76b0SSuyog Pawar /* note if ctb is 32x32 the this calc needs to be changed */
6713*c83a76b0SSuyog Pawar num_sync_units_in_tile =
6714*c83a76b0SSuyog Pawar (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6715*c83a76b0SSuyog Pawar ps_ctxt->log_ctb_size;
6716*c83a76b0SSuyog Pawar
6717*c83a76b0SSuyog Pawar i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6718*c83a76b0SSuyog Pawar i4_ctb_x = i4_first_ctb_x;
6719*c83a76b0SSuyog Pawar
6720*c83a76b0SSuyog Pawar if(!num_act_ref_pics)
6721*c83a76b0SSuyog Pawar {
6722*c83a76b0SSuyog Pawar for(i4_ctb_x = i4_first_ctb_x;
6723*c83a76b0SSuyog Pawar i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6724*c83a76b0SSuyog Pawar i4_ctb_x++)
6725*c83a76b0SSuyog Pawar {
6726*c83a76b0SSuyog Pawar S32 blk_i = 0, blk_j = 0;
6727*c83a76b0SSuyog Pawar /* set the dependency for the corresponding row in enc loop */
6728*c83a76b0SSuyog Pawar ihevce_dmgr_set_row_row_sync(
6729*c83a76b0SSuyog Pawar pv_dep_mngr_encloop_dep_me,
6730*c83a76b0SSuyog Pawar (i4_ctb_x + 1),
6731*c83a76b0SSuyog Pawar i4_ctb_y,
6732*c83a76b0SSuyog Pawar tile_col_idx /* Col Tile No. */);
6733*c83a76b0SSuyog Pawar }
6734*c83a76b0SSuyog Pawar
6735*c83a76b0SSuyog Pawar continue;
6736*c83a76b0SSuyog Pawar }
6737*c83a76b0SSuyog Pawar
6738*c83a76b0SSuyog Pawar /* increment the number of rows proc */
6739*c83a76b0SSuyog Pawar num_rows_proc++;
6740*c83a76b0SSuyog Pawar
6741*c83a76b0SSuyog Pawar /* Set Variables for Dep. Checking and Setting */
6742*c83a76b0SSuyog Pawar set_dep_pos = i4_ctb_y + 1;
6743*c83a76b0SSuyog Pawar if(i4_ctb_y > 0)
6744*c83a76b0SSuyog Pawar {
6745*c83a76b0SSuyog Pawar offset_val = 2;
6746*c83a76b0SSuyog Pawar check_dep_pos = i4_ctb_y - 1;
6747*c83a76b0SSuyog Pawar }
6748*c83a76b0SSuyog Pawar else
6749*c83a76b0SSuyog Pawar {
6750*c83a76b0SSuyog Pawar /* First row should run without waiting */
6751*c83a76b0SSuyog Pawar offset_val = -1;
6752*c83a76b0SSuyog Pawar check_dep_pos = 0;
6753*c83a76b0SSuyog Pawar }
6754*c83a76b0SSuyog Pawar
6755*c83a76b0SSuyog Pawar /* row ctb out pointer */
6756*c83a76b0SSuyog Pawar ps_ctxt->ps_ctb_analyse_curr_row =
6757*c83a76b0SSuyog Pawar ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6758*c83a76b0SSuyog Pawar
6759*c83a76b0SSuyog Pawar /* Row level CU Tree buffer */
6760*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row =
6761*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_base +
6762*c83a76b0SSuyog Pawar i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6763*c83a76b0SSuyog Pawar
6764*c83a76b0SSuyog Pawar ps_ctxt->ps_me_ctb_data_curr_row =
6765*c83a76b0SSuyog Pawar ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6766*c83a76b0SSuyog Pawar }
6767*c83a76b0SSuyog Pawar
6768*c83a76b0SSuyog Pawar /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6769*c83a76b0SSuyog Pawar left_ctb_in_diff_tile = 1;
6770*c83a76b0SSuyog Pawar
6771*c83a76b0SSuyog Pawar /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */
6772*c83a76b0SSuyog Pawar /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6773*c83a76b0SSuyog Pawar {
6774*c83a76b0SSuyog Pawar S32 i4_ref_id, i4_bits_req;
6775*c83a76b0SSuyog Pawar
6776*c83a76b0SSuyog Pawar for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6777*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6778*c83a76b0SSuyog Pawar i4_ref_id++)
6779*c83a76b0SSuyog Pawar {
6780*c83a76b0SSuyog Pawar GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6781*c83a76b0SSuyog Pawar
6782*c83a76b0SSuyog Pawar if(i4_bits_req > 12)
6783*c83a76b0SSuyog Pawar {
6784*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6785*c83a76b0SSuyog Pawar }
6786*c83a76b0SSuyog Pawar else
6787*c83a76b0SSuyog Pawar {
6788*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6789*c83a76b0SSuyog Pawar }
6790*c83a76b0SSuyog Pawar }
6791*c83a76b0SSuyog Pawar
6792*c83a76b0SSuyog Pawar s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6793*c83a76b0SSuyog Pawar }
6794*c83a76b0SSuyog Pawar
6795*c83a76b0SSuyog Pawar /* if non-encode layer then i4_ctb_x will be same as blk_x */
6796*c83a76b0SSuyog Pawar /* loop over all the units is a row */
6797*c83a76b0SSuyog Pawar for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6798*c83a76b0SSuyog Pawar i4_ctb_x++)
6799*c83a76b0SSuyog Pawar {
6800*c83a76b0SSuyog Pawar ihevce_ctb_noise_params *ps_ctb_noise_params =
6801*c83a76b0SSuyog Pawar &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6802*c83a76b0SSuyog Pawar
6803*c83a76b0SSuyog Pawar s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6804*c83a76b0SSuyog Pawar s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6805*c83a76b0SSuyog Pawar
6806*c83a76b0SSuyog Pawar ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6807*c83a76b0SSuyog Pawar ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6808*c83a76b0SSuyog Pawar /* Initialize ptr to current IPE CTB */
6809*c83a76b0SSuyog Pawar ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6810*c83a76b0SSuyog Pawar i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6811*c83a76b0SSuyog Pawar {
6812*c83a76b0SSuyog Pawar ps_ctb_bound_attrs =
6813*c83a76b0SSuyog Pawar get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6814*c83a76b0SSuyog Pawar
6815*c83a76b0SSuyog Pawar en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6816*c83a76b0SSuyog Pawar num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6817*c83a76b0SSuyog Pawar }
6818*c83a76b0SSuyog Pawar
6819*c83a76b0SSuyog Pawar /* Block to initialise pointers to part_type_results_t */
6820*c83a76b0SSuyog Pawar /* in each size-specific inter_cu_results_t */
6821*c83a76b0SSuyog Pawar {
6822*c83a76b0SSuyog Pawar WORD32 i;
6823*c83a76b0SSuyog Pawar
6824*c83a76b0SSuyog Pawar for(i = 0; i < 64; i++)
6825*c83a76b0SSuyog Pawar {
6826*c83a76b0SSuyog Pawar ps_ctxt->as_cu8x8_results[i].ps_best_results =
6827*c83a76b0SSuyog Pawar ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828*c83a76b0SSuyog Pawar .as_8x8_block_data[i]
6829*c83a76b0SSuyog Pawar .as_best_results;
6830*c83a76b0SSuyog Pawar ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6831*c83a76b0SSuyog Pawar }
6832*c83a76b0SSuyog Pawar
6833*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
6834*c83a76b0SSuyog Pawar {
6835*c83a76b0SSuyog Pawar ps_ctxt->as_cu16x16_results[i].ps_best_results =
6836*c83a76b0SSuyog Pawar ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6837*c83a76b0SSuyog Pawar ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6838*c83a76b0SSuyog Pawar }
6839*c83a76b0SSuyog Pawar
6840*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
6841*c83a76b0SSuyog Pawar {
6842*c83a76b0SSuyog Pawar ps_ctxt->as_cu32x32_results[i].ps_best_results =
6843*c83a76b0SSuyog Pawar ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6844*c83a76b0SSuyog Pawar .as_32x32_block_data[i]
6845*c83a76b0SSuyog Pawar .as_best_results;
6846*c83a76b0SSuyog Pawar ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6847*c83a76b0SSuyog Pawar }
6848*c83a76b0SSuyog Pawar
6849*c83a76b0SSuyog Pawar ps_ctxt->s_cu64x64_results.ps_best_results =
6850*c83a76b0SSuyog Pawar ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6851*c83a76b0SSuyog Pawar ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6852*c83a76b0SSuyog Pawar }
6853*c83a76b0SSuyog Pawar
6854*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6855*c83a76b0SSuyog Pawar {
6856*c83a76b0SSuyog Pawar ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6857*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6858*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root =
6859*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6860*c83a76b0SSuyog Pawar ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6861*c83a76b0SSuyog Pawar }
6862*c83a76b0SSuyog Pawar
6863*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6864*c83a76b0SSuyog Pawar {
6865*c83a76b0SSuyog Pawar S32 i4_nodes_created_in_cu_tree = 1;
6866*c83a76b0SSuyog Pawar
6867*c83a76b0SSuyog Pawar ihevce_cu_tree_init(
6868*c83a76b0SSuyog Pawar (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6869*c83a76b0SSuyog Pawar (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6870*c83a76b0SSuyog Pawar &i4_nodes_created_in_cu_tree,
6871*c83a76b0SSuyog Pawar 0,
6872*c83a76b0SSuyog Pawar POS_NA,
6873*c83a76b0SSuyog Pawar POS_NA,
6874*c83a76b0SSuyog Pawar POS_NA);
6875*c83a76b0SSuyog Pawar }
6876*c83a76b0SSuyog Pawar
6877*c83a76b0SSuyog Pawar memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6878*c83a76b0SSuyog Pawar
6879*c83a76b0SSuyog Pawar if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6880*c83a76b0SSuyog Pawar {
6881*c83a76b0SSuyog Pawar S32 j;
6882*c83a76b0SSuyog Pawar
6883*c83a76b0SSuyog Pawar ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6884*c83a76b0SSuyog Pawar
6885*c83a76b0SSuyog Pawar ps_cur_ipe_ctb =
6886*c83a76b0SSuyog Pawar ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6887*c83a76b0SSuyog Pawar lambda_recon =
6888*c83a76b0SSuyog Pawar hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6889*c83a76b0SSuyog Pawar
6890*c83a76b0SSuyog Pawar lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6891*c83a76b0SSuyog Pawar
6892*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
6893*c83a76b0SSuyog Pawar {
6894*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6895*c83a76b0SSuyog Pawar
6896*c83a76b0SSuyog Pawar for(j = 0; j < 2; j++)
6897*c83a76b0SSuyog Pawar {
6898*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6899*c83a76b0SSuyog Pawar }
6900*c83a76b0SSuyog Pawar }
6901*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->s_search_results_64x64;
6902*c83a76b0SSuyog Pawar
6903*c83a76b0SSuyog Pawar for(j = 0; j < 2; j++)
6904*c83a76b0SSuyog Pawar {
6905*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6906*c83a76b0SSuyog Pawar }
6907*c83a76b0SSuyog Pawar
6908*c83a76b0SSuyog Pawar s_common_frm_prms.i4_lamda = lambda_recon;
6909*c83a76b0SSuyog Pawar }
6910*c83a76b0SSuyog Pawar else
6911*c83a76b0SSuyog Pawar {
6912*c83a76b0SSuyog Pawar lambda_recon = ps_refine_prms->lambda_recon;
6913*c83a76b0SSuyog Pawar }
6914*c83a76b0SSuyog Pawar
6915*c83a76b0SSuyog Pawar /*********************************************************************/
6916*c83a76b0SSuyog Pawar /* replicate the inp buffer at blk or ctb level for each ref id, */
6917*c83a76b0SSuyog Pawar /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6918*c83a76b0SSuyog Pawar /* thereby avoiding a bloat up of memory. If we did all references */
6919*c83a76b0SSuyog Pawar /* weighted pred, we will end up with a duplicate copy of each ref */
6920*c83a76b0SSuyog Pawar /* at each layer, since we need to preserve the original reference. */
6921*c83a76b0SSuyog Pawar /* ToDo: Need to observe performance with this mechanism and compare */
6922*c83a76b0SSuyog Pawar /* with case where ref is weighted. */
6923*c83a76b0SSuyog Pawar /*********************************************************************/
6924*c83a76b0SSuyog Pawar fp_get_wt_inp(
6925*c83a76b0SSuyog Pawar ps_curr_layer,
6926*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
6927*c83a76b0SSuyog Pawar unit_size,
6928*c83a76b0SSuyog Pawar s_common_frm_prms.i4_ctb_x_off,
6929*c83a76b0SSuyog Pawar s_common_frm_prms.i4_ctb_y_off,
6930*c83a76b0SSuyog Pawar unit_size,
6931*c83a76b0SSuyog Pawar ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6932*c83a76b0SSuyog Pawar ps_ctxt->i4_wt_pred_enable_flag);
6933*c83a76b0SSuyog Pawar
6934*c83a76b0SSuyog Pawar if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6935*c83a76b0SSuyog Pawar {
6936*c83a76b0SSuyog Pawar #if TEMPORAL_NOISE_DETECT
6937*c83a76b0SSuyog Pawar {
6938*c83a76b0SSuyog Pawar WORD32 had_block_size = 16;
6939*c83a76b0SSuyog Pawar WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6940*c83a76b0SSuyog Pawar ? 64
6941*c83a76b0SSuyog Pawar : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6942*c83a76b0SSuyog Pawar WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6943*c83a76b0SSuyog Pawar ? 64
6944*c83a76b0SSuyog Pawar : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6945*c83a76b0SSuyog Pawar WORD32 num_pred_dir = i4_num_pred_dir;
6946*c83a76b0SSuyog Pawar WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6947*c83a76b0SSuyog Pawar WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6948*c83a76b0SSuyog Pawar
6949*c83a76b0SSuyog Pawar WORD32 i;
6950*c83a76b0SSuyog Pawar WORD32 noise_detected;
6951*c83a76b0SSuyog Pawar WORD32 ctb_size;
6952*c83a76b0SSuyog Pawar WORD32 num_comp_had_blocks;
6953*c83a76b0SSuyog Pawar WORD32 noisy_block_cnt;
6954*c83a76b0SSuyog Pawar WORD32 index_8x8_block;
6955*c83a76b0SSuyog Pawar WORD32 num_8x8_in_ctb_row;
6956*c83a76b0SSuyog Pawar
6957*c83a76b0SSuyog Pawar WORD32 ht_offset;
6958*c83a76b0SSuyog Pawar WORD32 wd_offset;
6959*c83a76b0SSuyog Pawar WORD32 block_ht;
6960*c83a76b0SSuyog Pawar WORD32 block_wd;
6961*c83a76b0SSuyog Pawar
6962*c83a76b0SSuyog Pawar WORD32 num_horz_blocks;
6963*c83a76b0SSuyog Pawar WORD32 num_vert_blocks;
6964*c83a76b0SSuyog Pawar
6965*c83a76b0SSuyog Pawar WORD32 mean;
6966*c83a76b0SSuyog Pawar UWORD32 variance_8x8;
6967*c83a76b0SSuyog Pawar
6968*c83a76b0SSuyog Pawar WORD32 hh_energy_percent;
6969*c83a76b0SSuyog Pawar
6970*c83a76b0SSuyog Pawar /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6971*c83a76b0SSuyog Pawar WORD32 min_noisy_block_cnt;
6972*c83a76b0SSuyog Pawar WORD32 min_coeffs_above_avg;
6973*c83a76b0SSuyog Pawar WORD32 min_coeff_avg_energy;
6974*c83a76b0SSuyog Pawar
6975*c83a76b0SSuyog Pawar /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6976*c83a76b0SSuyog Pawar WORD32 i4_cu_x_off, i4_cu_y_off;
6977*c83a76b0SSuyog Pawar WORD32 is_noisy;
6978*c83a76b0SSuyog Pawar
6979*c83a76b0SSuyog Pawar /* intialise the variables holding the constants */
6980*c83a76b0SSuyog Pawar if(had_block_size == 8)
6981*c83a76b0SSuyog Pawar {
6982*c83a76b0SSuyog Pawar min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;//
6983*c83a76b0SSuyog Pawar min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6984*c83a76b0SSuyog Pawar min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6985*c83a76b0SSuyog Pawar }
6986*c83a76b0SSuyog Pawar else
6987*c83a76b0SSuyog Pawar {
6988*c83a76b0SSuyog Pawar min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;//
6989*c83a76b0SSuyog Pawar min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6990*c83a76b0SSuyog Pawar min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6991*c83a76b0SSuyog Pawar }
6992*c83a76b0SSuyog Pawar
6993*c83a76b0SSuyog Pawar /* initialize the variables */
6994*c83a76b0SSuyog Pawar noise_detected = 0;
6995*c83a76b0SSuyog Pawar noisy_block_cnt = 0;
6996*c83a76b0SSuyog Pawar hh_energy_percent = 0;
6997*c83a76b0SSuyog Pawar variance_8x8 = 0;
6998*c83a76b0SSuyog Pawar block_ht = ctb_height;
6999*c83a76b0SSuyog Pawar block_wd = ctb_width;
7000*c83a76b0SSuyog Pawar
7001*c83a76b0SSuyog Pawar mean = 0;
7002*c83a76b0SSuyog Pawar
7003*c83a76b0SSuyog Pawar ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
7004*c83a76b0SSuyog Pawar num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
7005*c83a76b0SSuyog Pawar
7006*c83a76b0SSuyog Pawar num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
7007*c83a76b0SSuyog Pawar num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size;
7008*c83a76b0SSuyog Pawar
7009*c83a76b0SSuyog Pawar ht_offset = -had_block_size;
7010*c83a76b0SSuyog Pawar wd_offset = -had_block_size;
7011*c83a76b0SSuyog Pawar
7012*c83a76b0SSuyog Pawar num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb
7013*c83a76b0SSuyog Pawar for(i = 0; i < num_comp_had_blocks; i++)
7014*c83a76b0SSuyog Pawar {
7015*c83a76b0SSuyog Pawar if(i % num_horz_blocks == 0)
7016*c83a76b0SSuyog Pawar {
7017*c83a76b0SSuyog Pawar wd_offset = -had_block_size;
7018*c83a76b0SSuyog Pawar ht_offset += had_block_size;
7019*c83a76b0SSuyog Pawar }
7020*c83a76b0SSuyog Pawar wd_offset += had_block_size;
7021*c83a76b0SSuyog Pawar
7022*c83a76b0SSuyog Pawar /* CU level offsets */
7023*c83a76b0SSuyog Pawar i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16
7024*c83a76b0SSuyog Pawar i4_cu_y_off = i4_y_off + (i / 4) * 16;
7025*c83a76b0SSuyog Pawar
7026*c83a76b0SSuyog Pawar /* if 50 % or more of the CU is noisy then the return value is 1 */
7027*c83a76b0SSuyog Pawar is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7028*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7029*c83a76b0SSuyog Pawar (i % 4) * 16,
7030*c83a76b0SSuyog Pawar (i / 4) * 16,
7031*c83a76b0SSuyog Pawar 16);
7032*c83a76b0SSuyog Pawar
7033*c83a76b0SSuyog Pawar /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7034*c83a76b0SSuyog Pawar if(is_noisy)
7035*c83a76b0SSuyog Pawar {
7036*c83a76b0SSuyog Pawar index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7037*c83a76b0SSuyog Pawar (i % num_horz_blocks) * 2;
7038*c83a76b0SSuyog Pawar noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7039*c83a76b0SSuyog Pawar 16,
7040*c83a76b0SSuyog Pawar ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7041*c83a76b0SSuyog Pawar ? 64
7042*c83a76b0SSuyog Pawar : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7043*c83a76b0SSuyog Pawar ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7044*c83a76b0SSuyog Pawar ? 64
7045*c83a76b0SSuyog Pawar : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7046*c83a76b0SSuyog Pawar ps_ctb_noise_params,
7047*c83a76b0SSuyog Pawar &s_srch_cand_init_data,
7048*c83a76b0SSuyog Pawar &s_search_prms_blk,
7049*c83a76b0SSuyog Pawar ps_ctxt,
7050*c83a76b0SSuyog Pawar num_pred_dir,
7051*c83a76b0SSuyog Pawar i4_num_act_ref_l0,
7052*c83a76b0SSuyog Pawar i4_num_act_ref_l1,
7053*c83a76b0SSuyog Pawar i4_cu_x_off,
7054*c83a76b0SSuyog Pawar i4_cu_y_off,
7055*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
7056*c83a76b0SSuyog Pawar unit_size,
7057*c83a76b0SSuyog Pawar index_8x8_block,
7058*c83a76b0SSuyog Pawar num_horz_blocks,
7059*c83a76b0SSuyog Pawar /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra
7060*c83a76b0SSuyog Pawar i);
7061*c83a76b0SSuyog Pawar } /* if 16x16 is noisy */
7062*c83a76b0SSuyog Pawar } /* loop over for all 16x16*/
7063*c83a76b0SSuyog Pawar
7064*c83a76b0SSuyog Pawar if(noisy_block_cnt >= min_noisy_block_cnt)
7065*c83a76b0SSuyog Pawar {
7066*c83a76b0SSuyog Pawar noise_detected = 1;
7067*c83a76b0SSuyog Pawar }
7068*c83a76b0SSuyog Pawar
7069*c83a76b0SSuyog Pawar /* write back the noise presence detected for the current CTB to the structure */
7070*c83a76b0SSuyog Pawar ps_ctb_noise_params->i4_noise_present = noise_detected;
7071*c83a76b0SSuyog Pawar }
7072*c83a76b0SSuyog Pawar #endif
7073*c83a76b0SSuyog Pawar
7074*c83a76b0SSuyog Pawar #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7075*c83a76b0SSuyog Pawar if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7076*c83a76b0SSuyog Pawar ps_ctb_noise_params->i4_noise_present)
7077*c83a76b0SSuyog Pawar {
7078*c83a76b0SSuyog Pawar memset(
7079*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7080*c83a76b0SSuyog Pawar 1,
7081*c83a76b0SSuyog Pawar sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7082*c83a76b0SSuyog Pawar }
7083*c83a76b0SSuyog Pawar #endif
7084*c83a76b0SSuyog Pawar
7085*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
7086*c83a76b0SSuyog Pawar {
7087*c83a76b0SSuyog Pawar au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7088*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7089*c83a76b0SSuyog Pawar }
7090*c83a76b0SSuyog Pawar
7091*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
7092*c83a76b0SSuyog Pawar {
7093*c83a76b0SSuyog Pawar au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7094*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7095*c83a76b0SSuyog Pawar }
7096*c83a76b0SSuyog Pawar
7097*c83a76b0SSuyog Pawar for(i = 0; i < 1; i++)
7098*c83a76b0SSuyog Pawar {
7099*c83a76b0SSuyog Pawar au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7100*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7101*c83a76b0SSuyog Pawar }
7102*c83a76b0SSuyog Pawar
7103*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.bidir_enabled &&
7104*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7105*c83a76b0SSuyog Pawar MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7106*c83a76b0SSuyog Pawar {
7107*c83a76b0SSuyog Pawar ps_ctb_noise_params->i4_noise_present = 0;
7108*c83a76b0SSuyog Pawar memset(
7109*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7110*c83a76b0SSuyog Pawar 0,
7111*c83a76b0SSuyog Pawar sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7112*c83a76b0SSuyog Pawar }
7113*c83a76b0SSuyog Pawar
7114*c83a76b0SSuyog Pawar #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7115*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
7116*c83a76b0SSuyog Pawar {
7117*c83a76b0SSuyog Pawar S32 j;
7118*c83a76b0SSuyog Pawar S32 lambda;
7119*c83a76b0SSuyog Pawar
7120*c83a76b0SSuyog Pawar if(au1_is_32x32Blk_noisy[i])
7121*c83a76b0SSuyog Pawar {
7122*c83a76b0SSuyog Pawar lambda = lambda_recon;
7123*c83a76b0SSuyog Pawar lambda =
7124*c83a76b0SSuyog Pawar ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7125*c83a76b0SSuyog Pawar
7126*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7127*c83a76b0SSuyog Pawar
7128*c83a76b0SSuyog Pawar for(j = 0; j < 2; j++)
7129*c83a76b0SSuyog Pawar {
7130*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[j].lambda = lambda;
7131*c83a76b0SSuyog Pawar }
7132*c83a76b0SSuyog Pawar }
7133*c83a76b0SSuyog Pawar }
7134*c83a76b0SSuyog Pawar
7135*c83a76b0SSuyog Pawar {
7136*c83a76b0SSuyog Pawar S32 j;
7137*c83a76b0SSuyog Pawar S32 lambda;
7138*c83a76b0SSuyog Pawar
7139*c83a76b0SSuyog Pawar if(au1_is_64x64Blk_noisy[0])
7140*c83a76b0SSuyog Pawar {
7141*c83a76b0SSuyog Pawar lambda = lambda_recon;
7142*c83a76b0SSuyog Pawar lambda =
7143*c83a76b0SSuyog Pawar ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7144*c83a76b0SSuyog Pawar
7145*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->s_search_results_64x64;
7146*c83a76b0SSuyog Pawar
7147*c83a76b0SSuyog Pawar for(j = 0; j < 2; j++)
7148*c83a76b0SSuyog Pawar {
7149*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[j].lambda = lambda;
7150*c83a76b0SSuyog Pawar }
7151*c83a76b0SSuyog Pawar }
7152*c83a76b0SSuyog Pawar }
7153*c83a76b0SSuyog Pawar #endif
7154*c83a76b0SSuyog Pawar if(au1_is_64x64Blk_noisy[0])
7155*c83a76b0SSuyog Pawar {
7156*c83a76b0SSuyog Pawar U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7157*c83a76b0SSuyog Pawar (s_common_frm_prms.i4_ctb_y_off *
7158*c83a76b0SSuyog Pawar ps_curr_layer->i4_inp_stride));
7159*c83a76b0SSuyog Pawar
7160*c83a76b0SSuyog Pawar hme_compute_sigmaX_and_sigmaXSquared(
7161*c83a76b0SSuyog Pawar pu1_inp,
7162*c83a76b0SSuyog Pawar ps_curr_layer->i4_inp_stride,
7163*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaX,
7164*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaXSquared,
7165*c83a76b0SSuyog Pawar 4,
7166*c83a76b0SSuyog Pawar 4,
7167*c83a76b0SSuyog Pawar 64,
7168*c83a76b0SSuyog Pawar 64,
7169*c83a76b0SSuyog Pawar 1,
7170*c83a76b0SSuyog Pawar 16);
7171*c83a76b0SSuyog Pawar }
7172*c83a76b0SSuyog Pawar else
7173*c83a76b0SSuyog Pawar {
7174*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
7175*c83a76b0SSuyog Pawar {
7176*c83a76b0SSuyog Pawar if(au1_is_32x32Blk_noisy[i])
7177*c83a76b0SSuyog Pawar {
7178*c83a76b0SSuyog Pawar U08 *pu1_inp =
7179*c83a76b0SSuyog Pawar ps_curr_layer->pu1_inp +
7180*c83a76b0SSuyog Pawar (s_common_frm_prms.i4_ctb_x_off +
7181*c83a76b0SSuyog Pawar (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7182*c83a76b0SSuyog Pawar
7183*c83a76b0SSuyog Pawar U08 u1_cu_size = 32;
7184*c83a76b0SSuyog Pawar WORD32 i4_inp_buf_offset =
7185*c83a76b0SSuyog Pawar (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7186*c83a76b0SSuyog Pawar ((i % 2) * u1_cu_size));
7187*c83a76b0SSuyog Pawar
7188*c83a76b0SSuyog Pawar U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7189*c83a76b0SSuyog Pawar U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7190*c83a76b0SSuyog Pawar S32 i4_sigma_arr_offset =
7191*c83a76b0SSuyog Pawar (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7192*c83a76b0SSuyog Pawar ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7193*c83a76b0SSuyog Pawar
7194*c83a76b0SSuyog Pawar hme_compute_sigmaX_and_sigmaXSquared(
7195*c83a76b0SSuyog Pawar pu1_inp + i4_inp_buf_offset,
7196*c83a76b0SSuyog Pawar ps_curr_layer->i4_inp_stride,
7197*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7198*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7199*c83a76b0SSuyog Pawar 4,
7200*c83a76b0SSuyog Pawar 4,
7201*c83a76b0SSuyog Pawar 32,
7202*c83a76b0SSuyog Pawar 32,
7203*c83a76b0SSuyog Pawar 1,
7204*c83a76b0SSuyog Pawar 16);
7205*c83a76b0SSuyog Pawar }
7206*c83a76b0SSuyog Pawar else
7207*c83a76b0SSuyog Pawar {
7208*c83a76b0SSuyog Pawar S32 j;
7209*c83a76b0SSuyog Pawar
7210*c83a76b0SSuyog Pawar U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7211*c83a76b0SSuyog Pawar U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7212*c83a76b0SSuyog Pawar S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7213*c83a76b0SSuyog Pawar (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7214*c83a76b0SSuyog Pawar ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7215*c83a76b0SSuyog Pawar
7216*c83a76b0SSuyog Pawar for(j = 0; j < 4; j++)
7217*c83a76b0SSuyog Pawar {
7218*c83a76b0SSuyog Pawar U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7219*c83a76b0SSuyog Pawar U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7220*c83a76b0SSuyog Pawar S32 i4_16x16_blk_index_in_ctb =
7221*c83a76b0SSuyog Pawar i4_16x16_blk_start_index_in_i_th_32x32_blk +
7222*c83a76b0SSuyog Pawar ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7223*c83a76b0SSuyog Pawar ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7224*c83a76b0SSuyog Pawar
7225*c83a76b0SSuyog Pawar //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7226*c83a76b0SSuyog Pawar
7227*c83a76b0SSuyog Pawar if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7228*c83a76b0SSuyog Pawar {
7229*c83a76b0SSuyog Pawar U08 *pu1_inp =
7230*c83a76b0SSuyog Pawar ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7231*c83a76b0SSuyog Pawar (s_common_frm_prms.i4_ctb_y_off *
7232*c83a76b0SSuyog Pawar ps_curr_layer->i4_inp_stride));
7233*c83a76b0SSuyog Pawar
7234*c83a76b0SSuyog Pawar U08 u1_cu_size = 16;
7235*c83a76b0SSuyog Pawar WORD32 i4_inp_buf_offset =
7236*c83a76b0SSuyog Pawar (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7237*c83a76b0SSuyog Pawar ((i4_16x16_blk_index_in_ctb / 4) *
7238*c83a76b0SSuyog Pawar (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7239*c83a76b0SSuyog Pawar
7240*c83a76b0SSuyog Pawar U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7241*c83a76b0SSuyog Pawar U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7242*c83a76b0SSuyog Pawar S32 i4_sigma_arr_offset =
7243*c83a76b0SSuyog Pawar (((i4_16x16_blk_index_in_ctb % 4) *
7244*c83a76b0SSuyog Pawar u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7245*c83a76b0SSuyog Pawar ((i4_16x16_blk_index_in_ctb / 4) *
7246*c83a76b0SSuyog Pawar u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7247*c83a76b0SSuyog Pawar
7248*c83a76b0SSuyog Pawar hme_compute_sigmaX_and_sigmaXSquared(
7249*c83a76b0SSuyog Pawar pu1_inp + i4_inp_buf_offset,
7250*c83a76b0SSuyog Pawar ps_curr_layer->i4_inp_stride,
7251*c83a76b0SSuyog Pawar (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7252*c83a76b0SSuyog Pawar (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7253*c83a76b0SSuyog Pawar 4,
7254*c83a76b0SSuyog Pawar 4,
7255*c83a76b0SSuyog Pawar 16,
7256*c83a76b0SSuyog Pawar 16,
7257*c83a76b0SSuyog Pawar 1,
7258*c83a76b0SSuyog Pawar 16);
7259*c83a76b0SSuyog Pawar }
7260*c83a76b0SSuyog Pawar }
7261*c83a76b0SSuyog Pawar }
7262*c83a76b0SSuyog Pawar }
7263*c83a76b0SSuyog Pawar }
7264*c83a76b0SSuyog Pawar }
7265*c83a76b0SSuyog Pawar else
7266*c83a76b0SSuyog Pawar {
7267*c83a76b0SSuyog Pawar memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7268*c83a76b0SSuyog Pawar
7269*c83a76b0SSuyog Pawar memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7270*c83a76b0SSuyog Pawar
7271*c83a76b0SSuyog Pawar memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7272*c83a76b0SSuyog Pawar }
7273*c83a76b0SSuyog Pawar
7274*c83a76b0SSuyog Pawar for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7275*c83a76b0SSuyog Pawar {
7276*c83a76b0SSuyog Pawar S32 ref_ctr;
7277*c83a76b0SSuyog Pawar U08 au1_pred_dir_searched[2];
7278*c83a76b0SSuyog Pawar U08 u1_is_cu_noisy;
7279*c83a76b0SSuyog Pawar ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7280*c83a76b0SSuyog Pawar
7281*c83a76b0SSuyog Pawar {
7282*c83a76b0SSuyog Pawar blk_x = (i4_ctb_x << 2) +
7283*c83a76b0SSuyog Pawar (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7284*c83a76b0SSuyog Pawar blk_y = (i4_ctb_y << 2) +
7285*c83a76b0SSuyog Pawar (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7286*c83a76b0SSuyog Pawar
7287*c83a76b0SSuyog Pawar blk_id_in_full_ctb =
7288*c83a76b0SSuyog Pawar ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7289*c83a76b0SSuyog Pawar blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7290*c83a76b0SSuyog Pawar ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7291*c83a76b0SSuyog Pawar s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7292*c83a76b0SSuyog Pawar s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7293*c83a76b0SSuyog Pawar }
7294*c83a76b0SSuyog Pawar
7295*c83a76b0SSuyog Pawar /* get the current input blk point */
7296*c83a76b0SSuyog Pawar pos_x = blk_x << blk_size_shift;
7297*c83a76b0SSuyog Pawar pos_y = blk_y << blk_size_shift;
7298*c83a76b0SSuyog Pawar pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7299*c83a76b0SSuyog Pawar
7300*c83a76b0SSuyog Pawar /*********************************************************************/
7301*c83a76b0SSuyog Pawar /* For every blk in the picture, the search range needs to be derived*/
7302*c83a76b0SSuyog Pawar /* Any blk can have any mv, but practical search constraints are */
7303*c83a76b0SSuyog Pawar /* imposed by the picture boundary and amt of padding. */
7304*c83a76b0SSuyog Pawar /*********************************************************************/
7305*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
7306*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7307*c83a76b0SSuyog Pawar {
7308*c83a76b0SSuyog Pawar if(!s_search_prms_blk.i4_use_rec)
7309*c83a76b0SSuyog Pawar {
7310*c83a76b0SSuyog Pawar hme_derive_search_range(
7311*c83a76b0SSuyog Pawar &as_range_prms_inp[ref_ctr],
7312*c83a76b0SSuyog Pawar &s_pic_limit_inp,
7313*c83a76b0SSuyog Pawar &as_mv_limit[ref_ctr],
7314*c83a76b0SSuyog Pawar pos_x,
7315*c83a76b0SSuyog Pawar pos_y,
7316*c83a76b0SSuyog Pawar blk_wd,
7317*c83a76b0SSuyog Pawar blk_ht);
7318*c83a76b0SSuyog Pawar }
7319*c83a76b0SSuyog Pawar else
7320*c83a76b0SSuyog Pawar {
7321*c83a76b0SSuyog Pawar hme_derive_search_range(
7322*c83a76b0SSuyog Pawar &as_range_prms_rec[ref_ctr],
7323*c83a76b0SSuyog Pawar &s_pic_limit_rec,
7324*c83a76b0SSuyog Pawar &as_mv_limit[ref_ctr],
7325*c83a76b0SSuyog Pawar pos_x,
7326*c83a76b0SSuyog Pawar pos_y,
7327*c83a76b0SSuyog Pawar blk_wd,
7328*c83a76b0SSuyog Pawar blk_ht);
7329*c83a76b0SSuyog Pawar }
7330*c83a76b0SSuyog Pawar }
7331*c83a76b0SSuyog Pawar s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7332*c83a76b0SSuyog Pawar s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7333*c83a76b0SSuyog Pawar /* Select search results from a suitable search result in the context */
7334*c83a76b0SSuyog Pawar {
7335*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7336*c83a76b0SSuyog Pawar
7337*c83a76b0SSuyog Pawar if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7338*c83a76b0SSuyog Pawar {
7339*c83a76b0SSuyog Pawar S32 i;
7340*c83a76b0SSuyog Pawar
7341*c83a76b0SSuyog Pawar for(i = 0; i < 2; i++)
7342*c83a76b0SSuyog Pawar {
7343*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7344*c83a76b0SSuyog Pawar }
7345*c83a76b0SSuyog Pawar }
7346*c83a76b0SSuyog Pawar }
7347*c83a76b0SSuyog Pawar
7348*c83a76b0SSuyog Pawar u1_is_cu_noisy = au1_is_16x16Blk_noisy
7349*c83a76b0SSuyog Pawar [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7350*c83a76b0SSuyog Pawar
7351*c83a76b0SSuyog Pawar s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7352*c83a76b0SSuyog Pawar
7353*c83a76b0SSuyog Pawar #if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7354*c83a76b0SSuyog Pawar if(u1_is_cu_noisy)
7355*c83a76b0SSuyog Pawar {
7356*c83a76b0SSuyog Pawar S32 j;
7357*c83a76b0SSuyog Pawar S32 lambda;
7358*c83a76b0SSuyog Pawar
7359*c83a76b0SSuyog Pawar lambda = lambda_recon;
7360*c83a76b0SSuyog Pawar lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7361*c83a76b0SSuyog Pawar
7362*c83a76b0SSuyog Pawar for(j = 0; j < 2; j++)
7363*c83a76b0SSuyog Pawar {
7364*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[j].lambda = lambda;
7365*c83a76b0SSuyog Pawar }
7366*c83a76b0SSuyog Pawar }
7367*c83a76b0SSuyog Pawar else
7368*c83a76b0SSuyog Pawar {
7369*c83a76b0SSuyog Pawar S32 j;
7370*c83a76b0SSuyog Pawar S32 lambda;
7371*c83a76b0SSuyog Pawar
7372*c83a76b0SSuyog Pawar lambda = lambda_recon;
7373*c83a76b0SSuyog Pawar
7374*c83a76b0SSuyog Pawar for(j = 0; j < 2; j++)
7375*c83a76b0SSuyog Pawar {
7376*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[j].lambda = lambda;
7377*c83a76b0SSuyog Pawar }
7378*c83a76b0SSuyog Pawar }
7379*c83a76b0SSuyog Pawar #endif
7380*c83a76b0SSuyog Pawar
7381*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_results = ps_search_results;
7382*c83a76b0SSuyog Pawar
7383*c83a76b0SSuyog Pawar s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7384*c83a76b0SSuyog Pawar pu1_inp,
7385*c83a76b0SSuyog Pawar i4_inp_stride,
7386*c83a76b0SSuyog Pawar ps_refine_prms->limit_active_partitions,
7387*c83a76b0SSuyog Pawar ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7388*c83a76b0SSuyog Pawar ps_ctxt->u1_is_curFrame_a_refFrame,
7389*c83a76b0SSuyog Pawar blk_8x8_mask,
7390*c83a76b0SSuyog Pawar e_me_quality_presets);
7391*c83a76b0SSuyog Pawar
7392*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7393*c83a76b0SSuyog Pawar {
7394*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7395*c83a76b0SSuyog Pawar s_search_prms_blk.i4_part_mask;
7396*c83a76b0SSuyog Pawar }
7397*c83a76b0SSuyog Pawar
7398*c83a76b0SSuyog Pawar /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7399*c83a76b0SSuyog Pawar {
7400*c83a76b0SSuyog Pawar /* Setting u1_num_active_refs to 2 */
7401*c83a76b0SSuyog Pawar /* for the sole purpose of the */
7402*c83a76b0SSuyog Pawar /* function called below */
7403*c83a76b0SSuyog Pawar ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7404*c83a76b0SSuyog Pawar
7405*c83a76b0SSuyog Pawar hme_reset_search_results(
7406*c83a76b0SSuyog Pawar ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7407*c83a76b0SSuyog Pawar
7408*c83a76b0SSuyog Pawar ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7409*c83a76b0SSuyog Pawar }
7410*c83a76b0SSuyog Pawar
7411*c83a76b0SSuyog Pawar if(0 == blk_id_in_ctb)
7412*c83a76b0SSuyog Pawar {
7413*c83a76b0SSuyog Pawar UWORD8 u1_ctr;
7414*c83a76b0SSuyog Pawar for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7415*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7416*c83a76b0SSuyog Pawar u1_ctr++)
7417*c83a76b0SSuyog Pawar {
7418*c83a76b0SSuyog Pawar WORD32 i4_max_dep_ctb_y;
7419*c83a76b0SSuyog Pawar WORD32 i4_max_dep_ctb_x;
7420*c83a76b0SSuyog Pawar
7421*c83a76b0SSuyog Pawar /* Set max mv in ctb units */
7422*c83a76b0SSuyog Pawar i4_max_mv_x_in_ctb =
7423*c83a76b0SSuyog Pawar (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7424*c83a76b0SSuyog Pawar ps_ctxt->log_ctb_size;
7425*c83a76b0SSuyog Pawar
7426*c83a76b0SSuyog Pawar i4_max_mv_y_in_ctb =
7427*c83a76b0SSuyog Pawar (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7428*c83a76b0SSuyog Pawar ps_ctxt->log_ctb_size;
7429*c83a76b0SSuyog Pawar /********************************************************************/
7430*c83a76b0SSuyog Pawar /* Set max ctb_x and ctb_y dependency on reference picture */
7431*c83a76b0SSuyog Pawar /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */
7432*c83a76b0SSuyog Pawar /********************************************************************/
7433*c83a76b0SSuyog Pawar i4_max_dep_ctb_x = CLIP3(
7434*c83a76b0SSuyog Pawar (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7435*c83a76b0SSuyog Pawar 0,
7436*c83a76b0SSuyog Pawar ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7437*c83a76b0SSuyog Pawar i4_max_dep_ctb_y = CLIP3(
7438*c83a76b0SSuyog Pawar (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7439*c83a76b0SSuyog Pawar 0,
7440*c83a76b0SSuyog Pawar ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7441*c83a76b0SSuyog Pawar
7442*c83a76b0SSuyog Pawar ihevce_dmgr_map_chk_sync(
7443*c83a76b0SSuyog Pawar ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7444*c83a76b0SSuyog Pawar ps_ctxt->thrd_id,
7445*c83a76b0SSuyog Pawar i4_ctb_x,
7446*c83a76b0SSuyog Pawar i4_ctb_y,
7447*c83a76b0SSuyog Pawar i4_max_mv_x_in_ctb,
7448*c83a76b0SSuyog Pawar i4_max_mv_y_in_ctb);
7449*c83a76b0SSuyog Pawar }
7450*c83a76b0SSuyog Pawar }
7451*c83a76b0SSuyog Pawar
7452*c83a76b0SSuyog Pawar /* Loop across different Ref IDx */
7453*c83a76b0SSuyog Pawar for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7454*c83a76b0SSuyog Pawar {
7455*c83a76b0SSuyog Pawar S32 resultid;
7456*c83a76b0SSuyog Pawar S08 u1_default_ref_id;
7457*c83a76b0SSuyog Pawar S32 i4_num_srch_cands = 0;
7458*c83a76b0SSuyog Pawar S32 i4_num_refinement_iterations;
7459*c83a76b0SSuyog Pawar S32 i4_refine_iter_ctr;
7460*c83a76b0SSuyog Pawar
7461*c83a76b0SSuyog Pawar if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7462*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7463*c83a76b0SSuyog Pawar {
7464*c83a76b0SSuyog Pawar u1_pred_dir = u1_pred_dir_ctr;
7465*c83a76b0SSuyog Pawar }
7466*c83a76b0SSuyog Pawar else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7467*c83a76b0SSuyog Pawar {
7468*c83a76b0SSuyog Pawar u1_pred_dir = 1;
7469*c83a76b0SSuyog Pawar }
7470*c83a76b0SSuyog Pawar
7471*c83a76b0SSuyog Pawar u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7472*c83a76b0SSuyog Pawar : ps_ctxt->ai1_future_list[0];
7473*c83a76b0SSuyog Pawar au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7474*c83a76b0SSuyog Pawar
7475*c83a76b0SSuyog Pawar i4_num_srch_cands = 0;
7476*c83a76b0SSuyog Pawar resultid = 0;
7477*c83a76b0SSuyog Pawar
7478*c83a76b0SSuyog Pawar /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7479*c83a76b0SSuyog Pawar if(0 == blk_id_in_ctb)
7480*c83a76b0SSuyog Pawar {
7481*c83a76b0SSuyog Pawar /*****************************************************************/
7482*c83a76b0SSuyog Pawar /* Initialize the mv grid with results of neighbours for the next*/
7483*c83a76b0SSuyog Pawar /* ctb. */
7484*c83a76b0SSuyog Pawar /*****************************************************************/
7485*c83a76b0SSuyog Pawar hme_fill_ctb_neighbour_mvs(
7486*c83a76b0SSuyog Pawar ps_curr_layer,
7487*c83a76b0SSuyog Pawar blk_x,
7488*c83a76b0SSuyog Pawar blk_y,
7489*c83a76b0SSuyog Pawar aps_mv_grid[u1_pred_dir],
7490*c83a76b0SSuyog Pawar u1_pred_dir_ctr,
7491*c83a76b0SSuyog Pawar u1_default_ref_id,
7492*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7493*c83a76b0SSuyog Pawar }
7494*c83a76b0SSuyog Pawar
7495*c83a76b0SSuyog Pawar s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7496*c83a76b0SSuyog Pawar
7497*c83a76b0SSuyog Pawar {
7498*c83a76b0SSuyog Pawar if((blk_id_in_full_ctb % 4) == 0)
7499*c83a76b0SSuyog Pawar {
7500*c83a76b0SSuyog Pawar ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7501*c83a76b0SSuyog Pawar .as_pred_ctxt[u1_pred_dir]
7502*c83a76b0SSuyog Pawar .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7503*c83a76b0SSuyog Pawar }
7504*c83a76b0SSuyog Pawar
7505*c83a76b0SSuyog Pawar if(blk_id_in_full_ctb == 0)
7506*c83a76b0SSuyog Pawar {
7507*c83a76b0SSuyog Pawar ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7508*c83a76b0SSuyog Pawar }
7509*c83a76b0SSuyog Pawar
7510*c83a76b0SSuyog Pawar ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7511*c83a76b0SSuyog Pawar !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7512*c83a76b0SSuyog Pawar }
7513*c83a76b0SSuyog Pawar
7514*c83a76b0SSuyog Pawar {
7515*c83a76b0SSuyog Pawar S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7516*c83a76b0SSuyog Pawar S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7517*c83a76b0SSuyog Pawar U08 u1_is_blk_at_ctb_boundary = !y;
7518*c83a76b0SSuyog Pawar
7519*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_left_available =
7520*c83a76b0SSuyog Pawar !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7521*c83a76b0SSuyog Pawar
7522*c83a76b0SSuyog Pawar if(u1_is_blk_at_ctb_boundary)
7523*c83a76b0SSuyog Pawar {
7524*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_topRight_available = 0;
7525*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_topLeft_available = 0;
7526*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_top_available = 0;
7527*c83a76b0SSuyog Pawar }
7528*c83a76b0SSuyog Pawar else
7529*c83a76b0SSuyog Pawar {
7530*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_topRight_available =
7531*c83a76b0SSuyog Pawar gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7532*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_top_available = 1;
7533*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_topLeft_available =
7534*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_is_left_available;
7535*c83a76b0SSuyog Pawar }
7536*c83a76b0SSuyog Pawar }
7537*c83a76b0SSuyog Pawar
7538*c83a76b0SSuyog Pawar s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7539*c83a76b0SSuyog Pawar s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7540*c83a76b0SSuyog Pawar s_srch_cand_init_data.i4_pos_x = pos_x;
7541*c83a76b0SSuyog Pawar s_srch_cand_init_data.i4_pos_y = pos_y;
7542*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7543*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7544*c83a76b0SSuyog Pawar s_srch_cand_init_data.u1_search_candidate_list_index =
7545*c83a76b0SSuyog Pawar au1_search_candidate_list_index[u1_pred_dir];
7546*c83a76b0SSuyog Pawar
7547*c83a76b0SSuyog Pawar i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7548*c83a76b0SSuyog Pawar
7549*c83a76b0SSuyog Pawar /* Note this block also clips the MV range for all candidates */
7550*c83a76b0SSuyog Pawar {
7551*c83a76b0SSuyog Pawar S08 i1_check_for_mult_refs;
7552*c83a76b0SSuyog Pawar
7553*c83a76b0SSuyog Pawar i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7554*c83a76b0SSuyog Pawar : (ps_ctxt->num_ref_past > 1);
7555*c83a76b0SSuyog Pawar
7556*c83a76b0SSuyog Pawar ps_me_optimised_function_list->pf_mv_clipper(
7557*c83a76b0SSuyog Pawar &s_search_prms_blk,
7558*c83a76b0SSuyog Pawar i4_num_srch_cands,
7559*c83a76b0SSuyog Pawar i1_check_for_mult_refs,
7560*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_fpel_refine,
7561*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_hpel_refine,
7562*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_qpel_refine);
7563*c83a76b0SSuyog Pawar }
7564*c83a76b0SSuyog Pawar
7565*c83a76b0SSuyog Pawar #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7566*c83a76b0SSuyog Pawar i4_num_refinement_iterations =
7567*c83a76b0SSuyog Pawar ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7568*c83a76b0SSuyog Pawar ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7569*c83a76b0SSuyog Pawar : 1;
7570*c83a76b0SSuyog Pawar #else
7571*c83a76b0SSuyog Pawar i4_num_refinement_iterations =
7572*c83a76b0SSuyog Pawar ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7573*c83a76b0SSuyog Pawar #endif
7574*c83a76b0SSuyog Pawar
7575*c83a76b0SSuyog Pawar #if ENABLE_EXPLICIT_SEARCH_IN_PQ
7576*c83a76b0SSuyog Pawar if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7577*c83a76b0SSuyog Pawar {
7578*c83a76b0SSuyog Pawar i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7579*c83a76b0SSuyog Pawar : i4_num_act_ref_l1;
7580*c83a76b0SSuyog Pawar }
7581*c83a76b0SSuyog Pawar #endif
7582*c83a76b0SSuyog Pawar
7583*c83a76b0SSuyog Pawar for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7584*c83a76b0SSuyog Pawar i4_refine_iter_ctr++)
7585*c83a76b0SSuyog Pawar {
7586*c83a76b0SSuyog Pawar S32 center_x;
7587*c83a76b0SSuyog Pawar S32 center_y;
7588*c83a76b0SSuyog Pawar S32 center_ref_idx;
7589*c83a76b0SSuyog Pawar
7590*c83a76b0SSuyog Pawar S08 *pi1_pred_dir_to_ref_idx =
7591*c83a76b0SSuyog Pawar (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7592*c83a76b0SSuyog Pawar
7593*c83a76b0SSuyog Pawar {
7594*c83a76b0SSuyog Pawar WORD32 i4_i;
7595*c83a76b0SSuyog Pawar
7596*c83a76b0SSuyog Pawar for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7597*c83a76b0SSuyog Pawar {
7598*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7599*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7600*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7601*c83a76b0SSuyog Pawar MAX_SIGNED_16BIT_VAL;
7602*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7603*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7604*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7605*c83a76b0SSuyog Pawar
7606*c83a76b0SSuyog Pawar if(ps_refine_prms->i4_num_results_per_part == 2)
7607*c83a76b0SSuyog Pawar {
7608*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7609*c83a76b0SSuyog Pawar MAX_SIGNED_16BIT_VAL;
7610*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7611*c83a76b0SSuyog Pawar MAX_SIGNED_16BIT_VAL;
7612*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7613*c83a76b0SSuyog Pawar MAX_SIGNED_16BIT_VAL;
7614*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7615*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7616*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7617*c83a76b0SSuyog Pawar }
7618*c83a76b0SSuyog Pawar }
7619*c83a76b0SSuyog Pawar
7620*c83a76b0SSuyog Pawar s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7621*c83a76b0SSuyog Pawar s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7622*c83a76b0SSuyog Pawar }
7623*c83a76b0SSuyog Pawar
7624*c83a76b0SSuyog Pawar {
7625*c83a76b0SSuyog Pawar search_node_t *ps_coloc_node;
7626*c83a76b0SSuyog Pawar
7627*c83a76b0SSuyog Pawar S32 i = 0;
7628*c83a76b0SSuyog Pawar
7629*c83a76b0SSuyog Pawar if(i4_num_refinement_iterations > 1)
7630*c83a76b0SSuyog Pawar {
7631*c83a76b0SSuyog Pawar for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7632*c83a76b0SSuyog Pawar {
7633*c83a76b0SSuyog Pawar ps_coloc_node =
7634*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7635*c83a76b0SSuyog Pawar .ps_search_node;
7636*c83a76b0SSuyog Pawar
7637*c83a76b0SSuyog Pawar if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7638*c83a76b0SSuyog Pawar ps_coloc_node->i1_ref_idx)
7639*c83a76b0SSuyog Pawar {
7640*c83a76b0SSuyog Pawar break;
7641*c83a76b0SSuyog Pawar }
7642*c83a76b0SSuyog Pawar }
7643*c83a76b0SSuyog Pawar
7644*c83a76b0SSuyog Pawar if(i == ai4_num_coloc_cands[u1_pred_dir])
7645*c83a76b0SSuyog Pawar {
7646*c83a76b0SSuyog Pawar i = 0;
7647*c83a76b0SSuyog Pawar }
7648*c83a76b0SSuyog Pawar }
7649*c83a76b0SSuyog Pawar else
7650*c83a76b0SSuyog Pawar {
7651*c83a76b0SSuyog Pawar ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7652*c83a76b0SSuyog Pawar .ps_search_node;
7653*c83a76b0SSuyog Pawar }
7654*c83a76b0SSuyog Pawar
7655*c83a76b0SSuyog Pawar hme_set_mvp_node(
7656*c83a76b0SSuyog Pawar ps_search_results,
7657*c83a76b0SSuyog Pawar ps_coloc_node,
7658*c83a76b0SSuyog Pawar u1_pred_dir,
7659*c83a76b0SSuyog Pawar (i4_num_refinement_iterations > 1)
7660*c83a76b0SSuyog Pawar ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7661*c83a76b0SSuyog Pawar : u1_default_ref_id);
7662*c83a76b0SSuyog Pawar
7663*c83a76b0SSuyog Pawar center_x = ps_coloc_node->ps_mv->i2_mvx;
7664*c83a76b0SSuyog Pawar center_y = ps_coloc_node->ps_mv->i2_mvy;
7665*c83a76b0SSuyog Pawar center_ref_idx = ps_coloc_node->i1_ref_idx;
7666*c83a76b0SSuyog Pawar }
7667*c83a76b0SSuyog Pawar
7668*c83a76b0SSuyog Pawar /* Full-Pel search */
7669*c83a76b0SSuyog Pawar {
7670*c83a76b0SSuyog Pawar S32 num_unique_nodes;
7671*c83a76b0SSuyog Pawar
7672*c83a76b0SSuyog Pawar memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7673*c83a76b0SSuyog Pawar
7674*c83a76b0SSuyog Pawar num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7675*c83a76b0SSuyog Pawar as_unique_search_nodes,
7676*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_candts,
7677*c83a76b0SSuyog Pawar au4_unique_node_map,
7678*c83a76b0SSuyog Pawar pi1_pred_dir_to_ref_idx,
7679*c83a76b0SSuyog Pawar i4_num_srch_cands,
7680*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_init_candts,
7681*c83a76b0SSuyog Pawar i4_refine_iter_ctr,
7682*c83a76b0SSuyog Pawar i4_num_refinement_iterations,
7683*c83a76b0SSuyog Pawar i4_num_act_ref_l0,
7684*c83a76b0SSuyog Pawar center_ref_idx,
7685*c83a76b0SSuyog Pawar center_x,
7686*c83a76b0SSuyog Pawar center_y,
7687*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.bidir_enabled,
7688*c83a76b0SSuyog Pawar e_me_quality_presets);
7689*c83a76b0SSuyog Pawar
7690*c83a76b0SSuyog Pawar /*************************************************************************/
7691*c83a76b0SSuyog Pawar /* This array stores the ids of the partitions whose */
7692*c83a76b0SSuyog Pawar /* SADs are updated. Since the partitions whose SADs are updated may not */
7693*c83a76b0SSuyog Pawar /* be in contiguous order, we supply another level of indirection. */
7694*c83a76b0SSuyog Pawar /*************************************************************************/
7695*c83a76b0SSuyog Pawar ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7696*c83a76b0SSuyog Pawar s_search_prms_blk.i4_part_mask,
7697*c83a76b0SSuyog Pawar &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7698*c83a76b0SSuyog Pawar
7699*c83a76b0SSuyog Pawar if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7700*c83a76b0SSuyog Pawar {
7701*c83a76b0SSuyog Pawar S32 i;
7702*c83a76b0SSuyog Pawar /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7703*c83a76b0SSuyog Pawar S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7704*c83a76b0SSuyog Pawar (s_search_prms_blk.i4_cu_y_off * 4);
7705*c83a76b0SSuyog Pawar
7706*c83a76b0SSuyog Pawar for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7707*c83a76b0SSuyog Pawar {
7708*c83a76b0SSuyog Pawar S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7709*c83a76b0SSuyog Pawar
7710*c83a76b0SSuyog Pawar hme_compute_final_sigma_of_pu_from_base_blocks(
7711*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7712*c83a76b0SSuyog Pawar ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7713*c83a76b0SSuyog Pawar au8_final_src_sigmaX,
7714*c83a76b0SSuyog Pawar au8_final_src_sigmaXSquared,
7715*c83a76b0SSuyog Pawar 16,
7716*c83a76b0SSuyog Pawar 4,
7717*c83a76b0SSuyog Pawar i4_part_id,
7718*c83a76b0SSuyog Pawar 16);
7719*c83a76b0SSuyog Pawar }
7720*c83a76b0SSuyog Pawar
7721*c83a76b0SSuyog Pawar s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7722*c83a76b0SSuyog Pawar s_common_frm_prms.pu8_part_src_sigmaXSquared =
7723*c83a76b0SSuyog Pawar au8_final_src_sigmaXSquared;
7724*c83a76b0SSuyog Pawar
7725*c83a76b0SSuyog Pawar s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7726*c83a76b0SSuyog Pawar s_search_prms_blk.pu8_part_src_sigmaXSquared =
7727*c83a76b0SSuyog Pawar au8_final_src_sigmaXSquared;
7728*c83a76b0SSuyog Pawar }
7729*c83a76b0SSuyog Pawar
7730*c83a76b0SSuyog Pawar if(0 == num_unique_nodes)
7731*c83a76b0SSuyog Pawar {
7732*c83a76b0SSuyog Pawar continue;
7733*c83a76b0SSuyog Pawar }
7734*c83a76b0SSuyog Pawar
7735*c83a76b0SSuyog Pawar if(num_unique_nodes >= 2)
7736*c83a76b0SSuyog Pawar {
7737*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7738*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7739*c83a76b0SSuyog Pawar if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7740*c83a76b0SSuyog Pawar {
7741*c83a76b0SSuyog Pawar if(ps_ctxt->i4_temporal_layer == 1)
7742*c83a76b0SSuyog Pawar {
7743*c83a76b0SSuyog Pawar hme_fullpel_cand_sifter(
7744*c83a76b0SSuyog Pawar &s_search_prms_blk,
7745*c83a76b0SSuyog Pawar ps_curr_layer,
7746*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
7747*c83a76b0SSuyog Pawar ALPHA_FOR_NOISE_TERM_IN_ME,
7748*c83a76b0SSuyog Pawar u1_is_cu_noisy,
7749*c83a76b0SSuyog Pawar ps_me_optimised_function_list);
7750*c83a76b0SSuyog Pawar }
7751*c83a76b0SSuyog Pawar else
7752*c83a76b0SSuyog Pawar {
7753*c83a76b0SSuyog Pawar hme_fullpel_cand_sifter(
7754*c83a76b0SSuyog Pawar &s_search_prms_blk,
7755*c83a76b0SSuyog Pawar ps_curr_layer,
7756*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
7757*c83a76b0SSuyog Pawar ALPHA_FOR_NOISE_TERM_IN_ME,
7758*c83a76b0SSuyog Pawar u1_is_cu_noisy,
7759*c83a76b0SSuyog Pawar ps_me_optimised_function_list);
7760*c83a76b0SSuyog Pawar }
7761*c83a76b0SSuyog Pawar }
7762*c83a76b0SSuyog Pawar else
7763*c83a76b0SSuyog Pawar {
7764*c83a76b0SSuyog Pawar hme_fullpel_cand_sifter(
7765*c83a76b0SSuyog Pawar &s_search_prms_blk,
7766*c83a76b0SSuyog Pawar ps_curr_layer,
7767*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
7768*c83a76b0SSuyog Pawar ALPHA_FOR_NOISE_TERM_IN_ME_P,
7769*c83a76b0SSuyog Pawar u1_is_cu_noisy,
7770*c83a76b0SSuyog Pawar ps_me_optimised_function_list);
7771*c83a76b0SSuyog Pawar }
7772*c83a76b0SSuyog Pawar }
7773*c83a76b0SSuyog Pawar
7774*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7775*c83a76b0SSuyog Pawar
7776*c83a76b0SSuyog Pawar hme_fullpel_refine(
7777*c83a76b0SSuyog Pawar ps_refine_prms,
7778*c83a76b0SSuyog Pawar &s_search_prms_blk,
7779*c83a76b0SSuyog Pawar ps_curr_layer,
7780*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
7781*c83a76b0SSuyog Pawar au4_unique_node_map,
7782*c83a76b0SSuyog Pawar num_unique_nodes,
7783*c83a76b0SSuyog Pawar blk_8x8_mask,
7784*c83a76b0SSuyog Pawar center_x,
7785*c83a76b0SSuyog Pawar center_y,
7786*c83a76b0SSuyog Pawar center_ref_idx,
7787*c83a76b0SSuyog Pawar e_me_quality_presets,
7788*c83a76b0SSuyog Pawar ps_me_optimised_function_list);
7789*c83a76b0SSuyog Pawar }
7790*c83a76b0SSuyog Pawar
7791*c83a76b0SSuyog Pawar /* Sub-Pel search */
7792*c83a76b0SSuyog Pawar {
7793*c83a76b0SSuyog Pawar hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7794*c83a76b0SSuyog Pawar
7795*c83a76b0SSuyog Pawar s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7796*c83a76b0SSuyog Pawar &ps_ctxt->s_buf_mgr,
7797*c83a76b0SSuyog Pawar INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7798*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
7799*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7800*c83a76b0SSuyog Pawar {
7801*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS(
7802*c83a76b0SSuyog Pawar as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7803*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS(
7804*c83a76b0SSuyog Pawar as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7805*c83a76b0SSuyog Pawar }
7806*c83a76b0SSuyog Pawar s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7807*c83a76b0SSuyog Pawar s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7808*c83a76b0SSuyog Pawar
7809*c83a76b0SSuyog Pawar hme_subpel_refine_cu_hs(
7810*c83a76b0SSuyog Pawar &s_subpel_prms,
7811*c83a76b0SSuyog Pawar ps_curr_layer,
7812*c83a76b0SSuyog Pawar ps_search_results,
7813*c83a76b0SSuyog Pawar u1_pred_dir,
7814*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
7815*c83a76b0SSuyog Pawar blk_8x8_mask,
7816*c83a76b0SSuyog Pawar ps_ctxt->ps_func_selector,
7817*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list,
7818*c83a76b0SSuyog Pawar ps_me_optimised_function_list);
7819*c83a76b0SSuyog Pawar }
7820*c83a76b0SSuyog Pawar }
7821*c83a76b0SSuyog Pawar }
7822*c83a76b0SSuyog Pawar /* Populate the new PU struct with the results post subpel refinement*/
7823*c83a76b0SSuyog Pawar {
7824*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results;
7825*c83a76b0SSuyog Pawar WORD32 best_inter_cost, intra_cost, posx, posy;
7826*c83a76b0SSuyog Pawar
7827*c83a76b0SSuyog Pawar UWORD8 intra_8x8_enabled = 0;
7828*c83a76b0SSuyog Pawar
7829*c83a76b0SSuyog Pawar /* cost of 16x16 cu parent */
7830*c83a76b0SSuyog Pawar WORD32 parent_cost = MAX_32BIT_VAL;
7831*c83a76b0SSuyog Pawar
7832*c83a76b0SSuyog Pawar /* cost of 8x8 cu children */
7833*c83a76b0SSuyog Pawar /*********************************************************************/
7834*c83a76b0SSuyog Pawar /* Assuming parent is not split, then we signal 1 bit for this parent*/
7835*c83a76b0SSuyog Pawar /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7836*c83a76b0SSuyog Pawar /* So, 4*lambda is extra for children cost. */
7837*c83a76b0SSuyog Pawar /*********************************************************************/
7838*c83a76b0SSuyog Pawar WORD32 child_cost = 0;
7839*c83a76b0SSuyog Pawar
7840*c83a76b0SSuyog Pawar ps_cu_results = ps_search_results->ps_cu_results;
7841*c83a76b0SSuyog Pawar
7842*c83a76b0SSuyog Pawar /* Initialize the pu_results pointers to the first struct in the stack array */
7843*c83a76b0SSuyog Pawar ps_pu_results = as_inter_pu_results;
7844*c83a76b0SSuyog Pawar
7845*c83a76b0SSuyog Pawar hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7846*c83a76b0SSuyog Pawar
7847*c83a76b0SSuyog Pawar hme_populate_pus(
7848*c83a76b0SSuyog Pawar ps_thrd_ctxt,
7849*c83a76b0SSuyog Pawar ps_ctxt,
7850*c83a76b0SSuyog Pawar &s_subpel_prms,
7851*c83a76b0SSuyog Pawar ps_search_results,
7852*c83a76b0SSuyog Pawar ps_cu_results,
7853*c83a76b0SSuyog Pawar ps_pu_results,
7854*c83a76b0SSuyog Pawar &(as_pu_results[0][0][0]),
7855*c83a76b0SSuyog Pawar &s_common_frm_prms,
7856*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
7857*c83a76b0SSuyog Pawar ps_curr_layer,
7858*c83a76b0SSuyog Pawar au1_pred_dir_searched,
7859*c83a76b0SSuyog Pawar i4_num_pred_dir);
7860*c83a76b0SSuyog Pawar
7861*c83a76b0SSuyog Pawar ps_cu_results->i4_inp_offset =
7862*c83a76b0SSuyog Pawar (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7863*c83a76b0SSuyog Pawar
7864*c83a76b0SSuyog Pawar hme_decide_part_types(
7865*c83a76b0SSuyog Pawar ps_cu_results,
7866*c83a76b0SSuyog Pawar ps_pu_results,
7867*c83a76b0SSuyog Pawar &s_common_frm_prms,
7868*c83a76b0SSuyog Pawar ps_ctxt,
7869*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list,
7870*c83a76b0SSuyog Pawar ps_me_optimised_function_list
7871*c83a76b0SSuyog Pawar
7872*c83a76b0SSuyog Pawar );
7873*c83a76b0SSuyog Pawar
7874*c83a76b0SSuyog Pawar /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7875*c83a76b0SSuyog Pawar /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7876*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7877*c83a76b0SSuyog Pawar {
7878*c83a76b0SSuyog Pawar WORD32 res_ctr;
7879*c83a76b0SSuyog Pawar
7880*c83a76b0SSuyog Pawar for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7881*c83a76b0SSuyog Pawar {
7882*c83a76b0SSuyog Pawar WORD32 num_part = 2, part_ctr;
7883*c83a76b0SSuyog Pawar part_type_results_t *ps_best_results =
7884*c83a76b0SSuyog Pawar &ps_cu_results->ps_best_results[res_ctr];
7885*c83a76b0SSuyog Pawar
7886*c83a76b0SSuyog Pawar if(PRT_2Nx2N == ps_best_results->u1_part_type)
7887*c83a76b0SSuyog Pawar num_part = 1;
7888*c83a76b0SSuyog Pawar
7889*c83a76b0SSuyog Pawar for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7890*c83a76b0SSuyog Pawar {
7891*c83a76b0SSuyog Pawar pu_result_t *ps_pu_results =
7892*c83a76b0SSuyog Pawar &ps_best_results->as_pu_results[part_ctr];
7893*c83a76b0SSuyog Pawar
7894*c83a76b0SSuyog Pawar ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7895*c83a76b0SSuyog Pawar
7896*c83a76b0SSuyog Pawar hme_update_dynamic_search_params(
7897*c83a76b0SSuyog Pawar &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7898*c83a76b0SSuyog Pawar .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7899*c83a76b0SSuyog Pawar ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7900*c83a76b0SSuyog Pawar
7901*c83a76b0SSuyog Pawar /* Sanity Check */
7902*c83a76b0SSuyog Pawar ASSERT(
7903*c83a76b0SSuyog Pawar ps_pu_results->pu.mv.i1_l0_ref_idx <
7904*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7905*c83a76b0SSuyog Pawar
7906*c83a76b0SSuyog Pawar /* No L1 for P Pic. */
7907*c83a76b0SSuyog Pawar ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7908*c83a76b0SSuyog Pawar /* No BI for P Pic. */
7909*c83a76b0SSuyog Pawar ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7910*c83a76b0SSuyog Pawar }
7911*c83a76b0SSuyog Pawar }
7912*c83a76b0SSuyog Pawar }
7913*c83a76b0SSuyog Pawar
7914*c83a76b0SSuyog Pawar /*****************************************************************/
7915*c83a76b0SSuyog Pawar /* INSERT INTRA RESULTS AT 16x16 LEVEL. */
7916*c83a76b0SSuyog Pawar /*****************************************************************/
7917*c83a76b0SSuyog Pawar
7918*c83a76b0SSuyog Pawar #if DISABLE_INTRA_IN_BPICS
7919*c83a76b0SSuyog Pawar if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7920*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7921*c83a76b0SSuyog Pawar #endif
7922*c83a76b0SSuyog Pawar {
7923*c83a76b0SSuyog Pawar if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7924*c83a76b0SSuyog Pawar {
7925*c83a76b0SSuyog Pawar hme_insert_intra_nodes_post_bipred(
7926*c83a76b0SSuyog Pawar ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7927*c83a76b0SSuyog Pawar }
7928*c83a76b0SSuyog Pawar }
7929*c83a76b0SSuyog Pawar
7930*c83a76b0SSuyog Pawar #if DISABLE_INTRA_IN_BPICS
7931*c83a76b0SSuyog Pawar if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7932*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7933*c83a76b0SSuyog Pawar {
7934*c83a76b0SSuyog Pawar intra_8x8_enabled = 0;
7935*c83a76b0SSuyog Pawar }
7936*c83a76b0SSuyog Pawar else
7937*c83a76b0SSuyog Pawar #endif
7938*c83a76b0SSuyog Pawar {
7939*c83a76b0SSuyog Pawar /*TRAQO intra flag updation*/
7940*c83a76b0SSuyog Pawar if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7941*c83a76b0SSuyog Pawar {
7942*c83a76b0SSuyog Pawar best_inter_cost =
7943*c83a76b0SSuyog Pawar ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7944*c83a76b0SSuyog Pawar intra_cost =
7945*c83a76b0SSuyog Pawar ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7946*c83a76b0SSuyog Pawar /*@16x16 level*/
7947*c83a76b0SSuyog Pawar posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7948*c83a76b0SSuyog Pawar << 2) >>
7949*c83a76b0SSuyog Pawar 4;
7950*c83a76b0SSuyog Pawar posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7951*c83a76b0SSuyog Pawar << 2) >>
7952*c83a76b0SSuyog Pawar 4;
7953*c83a76b0SSuyog Pawar }
7954*c83a76b0SSuyog Pawar else
7955*c83a76b0SSuyog Pawar {
7956*c83a76b0SSuyog Pawar best_inter_cost =
7957*c83a76b0SSuyog Pawar ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7958*c83a76b0SSuyog Pawar posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7959*c83a76b0SSuyog Pawar << 2) >>
7960*c83a76b0SSuyog Pawar 3;
7961*c83a76b0SSuyog Pawar posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7962*c83a76b0SSuyog Pawar << 2) >>
7963*c83a76b0SSuyog Pawar 3;
7964*c83a76b0SSuyog Pawar }
7965*c83a76b0SSuyog Pawar
7966*c83a76b0SSuyog Pawar /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7967*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->u1_split_flag)
7968*c83a76b0SSuyog Pawar {
7969*c83a76b0SSuyog Pawar /* Id of the 32x32 block, 16x16 block in a CTB */
7970*c83a76b0SSuyog Pawar WORD32 i4_32x32_id =
7971*c83a76b0SSuyog Pawar (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7972*c83a76b0SSuyog Pawar WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7973*c83a76b0SSuyog Pawar ((ps_cu_results->u1_x_off >> 4) & 0x1);
7974*c83a76b0SSuyog Pawar
7975*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7976*c83a76b0SSuyog Pawar {
7977*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978*c83a76b0SSuyog Pawar .as_intra16_analyse[i4_16x16_id]
7979*c83a76b0SSuyog Pawar .b1_split_flag)
7980*c83a76b0SSuyog Pawar {
7981*c83a76b0SSuyog Pawar intra_8x8_enabled =
7982*c83a76b0SSuyog Pawar ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983*c83a76b0SSuyog Pawar .as_intra16_analyse[i4_16x16_id]
7984*c83a76b0SSuyog Pawar .as_intra8_analyse[0]
7985*c83a76b0SSuyog Pawar .b1_valid_cu;
7986*c83a76b0SSuyog Pawar intra_8x8_enabled &=
7987*c83a76b0SSuyog Pawar ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7988*c83a76b0SSuyog Pawar .as_intra16_analyse[i4_16x16_id]
7989*c83a76b0SSuyog Pawar .as_intra8_analyse[1]
7990*c83a76b0SSuyog Pawar .b1_valid_cu;
7991*c83a76b0SSuyog Pawar intra_8x8_enabled &=
7992*c83a76b0SSuyog Pawar ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7993*c83a76b0SSuyog Pawar .as_intra16_analyse[i4_16x16_id]
7994*c83a76b0SSuyog Pawar .as_intra8_analyse[2]
7995*c83a76b0SSuyog Pawar .b1_valid_cu;
7996*c83a76b0SSuyog Pawar intra_8x8_enabled &=
7997*c83a76b0SSuyog Pawar ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7998*c83a76b0SSuyog Pawar .as_intra16_analyse[i4_16x16_id]
7999*c83a76b0SSuyog Pawar .as_intra8_analyse[3]
8000*c83a76b0SSuyog Pawar .b1_valid_cu;
8001*c83a76b0SSuyog Pawar }
8002*c83a76b0SSuyog Pawar }
8003*c83a76b0SSuyog Pawar }
8004*c83a76b0SSuyog Pawar }
8005*c83a76b0SSuyog Pawar
8006*c83a76b0SSuyog Pawar if(blk_8x8_mask == 0xf)
8007*c83a76b0SSuyog Pawar {
8008*c83a76b0SSuyog Pawar parent_cost =
8009*c83a76b0SSuyog Pawar ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
8010*c83a76b0SSuyog Pawar ps_search_results->u1_split_flag = 0;
8011*c83a76b0SSuyog Pawar }
8012*c83a76b0SSuyog Pawar else
8013*c83a76b0SSuyog Pawar {
8014*c83a76b0SSuyog Pawar ps_search_results->u1_split_flag = 1;
8015*c83a76b0SSuyog Pawar }
8016*c83a76b0SSuyog Pawar
8017*c83a76b0SSuyog Pawar ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8018*c83a76b0SSuyog Pawar
8019*c83a76b0SSuyog Pawar if(s_common_frm_prms.u1_is_cu_noisy)
8020*c83a76b0SSuyog Pawar {
8021*c83a76b0SSuyog Pawar intra_8x8_enabled = 0;
8022*c83a76b0SSuyog Pawar }
8023*c83a76b0SSuyog Pawar
8024*c83a76b0SSuyog Pawar /* Evalaute 8x8 if NxN part id is enabled */
8025*c83a76b0SSuyog Pawar if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8026*c83a76b0SSuyog Pawar {
8027*c83a76b0SSuyog Pawar /* Populates the PU's for the 4 8x8's in one call */
8028*c83a76b0SSuyog Pawar hme_populate_pus_8x8_cu(
8029*c83a76b0SSuyog Pawar ps_thrd_ctxt,
8030*c83a76b0SSuyog Pawar ps_ctxt,
8031*c83a76b0SSuyog Pawar &s_subpel_prms,
8032*c83a76b0SSuyog Pawar ps_search_results,
8033*c83a76b0SSuyog Pawar ps_cu_results,
8034*c83a76b0SSuyog Pawar ps_pu_results,
8035*c83a76b0SSuyog Pawar &(as_pu_results[0][0][0]),
8036*c83a76b0SSuyog Pawar &s_common_frm_prms,
8037*c83a76b0SSuyog Pawar au1_pred_dir_searched,
8038*c83a76b0SSuyog Pawar i4_num_pred_dir,
8039*c83a76b0SSuyog Pawar blk_8x8_mask);
8040*c83a76b0SSuyog Pawar
8041*c83a76b0SSuyog Pawar /* Re-initialize the pu_results pointers to the first struct in the stack array */
8042*c83a76b0SSuyog Pawar ps_pu_results = as_inter_pu_results;
8043*c83a76b0SSuyog Pawar
8044*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
8045*c83a76b0SSuyog Pawar {
8046*c83a76b0SSuyog Pawar if((blk_8x8_mask & (1 << i)))
8047*c83a76b0SSuyog Pawar {
8048*c83a76b0SSuyog Pawar if(ps_cu_results->i4_part_mask)
8049*c83a76b0SSuyog Pawar {
8050*c83a76b0SSuyog Pawar hme_decide_part_types(
8051*c83a76b0SSuyog Pawar ps_cu_results,
8052*c83a76b0SSuyog Pawar ps_pu_results,
8053*c83a76b0SSuyog Pawar &s_common_frm_prms,
8054*c83a76b0SSuyog Pawar ps_ctxt,
8055*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list,
8056*c83a76b0SSuyog Pawar ps_me_optimised_function_list
8057*c83a76b0SSuyog Pawar
8058*c83a76b0SSuyog Pawar );
8059*c83a76b0SSuyog Pawar }
8060*c83a76b0SSuyog Pawar /*****************************************************************/
8061*c83a76b0SSuyog Pawar /* INSERT INTRA RESULTS AT 8x8 LEVEL. */
8062*c83a76b0SSuyog Pawar /*****************************************************************/
8063*c83a76b0SSuyog Pawar #if DISABLE_INTRA_IN_BPICS
8064*c83a76b0SSuyog Pawar if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8065*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8066*c83a76b0SSuyog Pawar TEMPORAL_LAYER_DISABLE)))
8067*c83a76b0SSuyog Pawar #endif
8068*c83a76b0SSuyog Pawar {
8069*c83a76b0SSuyog Pawar if(!(DISABLE_INTRA_WHEN_NOISY &&
8070*c83a76b0SSuyog Pawar s_common_frm_prms.u1_is_cu_noisy))
8071*c83a76b0SSuyog Pawar {
8072*c83a76b0SSuyog Pawar hme_insert_intra_nodes_post_bipred(
8073*c83a76b0SSuyog Pawar ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8074*c83a76b0SSuyog Pawar }
8075*c83a76b0SSuyog Pawar }
8076*c83a76b0SSuyog Pawar
8077*c83a76b0SSuyog Pawar child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8078*c83a76b0SSuyog Pawar }
8079*c83a76b0SSuyog Pawar
8080*c83a76b0SSuyog Pawar ps_cu_results++;
8081*c83a76b0SSuyog Pawar ps_pu_results++;
8082*c83a76b0SSuyog Pawar }
8083*c83a76b0SSuyog Pawar
8084*c83a76b0SSuyog Pawar /* Compare 16x16 vs 8x8 cost */
8085*c83a76b0SSuyog Pawar if(child_cost < parent_cost)
8086*c83a76b0SSuyog Pawar {
8087*c83a76b0SSuyog Pawar ps_search_results->best_cu_cost = child_cost;
8088*c83a76b0SSuyog Pawar ps_search_results->u1_split_flag = 1;
8089*c83a76b0SSuyog Pawar }
8090*c83a76b0SSuyog Pawar }
8091*c83a76b0SSuyog Pawar }
8092*c83a76b0SSuyog Pawar
8093*c83a76b0SSuyog Pawar hme_update_mv_bank_encode(
8094*c83a76b0SSuyog Pawar ps_search_results,
8095*c83a76b0SSuyog Pawar ps_curr_layer->ps_layer_mvbank,
8096*c83a76b0SSuyog Pawar blk_x,
8097*c83a76b0SSuyog Pawar blk_y,
8098*c83a76b0SSuyog Pawar &s_mv_update_prms,
8099*c83a76b0SSuyog Pawar au1_pred_dir_searched,
8100*c83a76b0SSuyog Pawar i4_num_act_ref_l0);
8101*c83a76b0SSuyog Pawar
8102*c83a76b0SSuyog Pawar /*********************************************************************/
8103*c83a76b0SSuyog Pawar /* Map the best results to an MV Grid. This is a 18x18 grid that is */
8104*c83a76b0SSuyog Pawar /* useful for doing things like predictor for cost calculation or */
8105*c83a76b0SSuyog Pawar /* also for merge calculations if need be. */
8106*c83a76b0SSuyog Pawar /*********************************************************************/
8107*c83a76b0SSuyog Pawar hme_map_mvs_to_grid(
8108*c83a76b0SSuyog Pawar &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8109*c83a76b0SSuyog Pawar }
8110*c83a76b0SSuyog Pawar
8111*c83a76b0SSuyog Pawar /* Set the CU tree nodes appropriately */
8112*c83a76b0SSuyog Pawar if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8113*c83a76b0SSuyog Pawar {
8114*c83a76b0SSuyog Pawar WORD32 i, j;
8115*c83a76b0SSuyog Pawar
8116*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
8117*c83a76b0SSuyog Pawar {
8118*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree_node =
8119*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8120*c83a76b0SSuyog Pawar search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8121*c83a76b0SSuyog Pawar
8122*c83a76b0SSuyog Pawar switch(i >> 2)
8123*c83a76b0SSuyog Pawar {
8124*c83a76b0SSuyog Pawar case 0:
8125*c83a76b0SSuyog Pawar {
8126*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_tl;
8127*c83a76b0SSuyog Pawar
8128*c83a76b0SSuyog Pawar break;
8129*c83a76b0SSuyog Pawar }
8130*c83a76b0SSuyog Pawar case 1:
8131*c83a76b0SSuyog Pawar {
8132*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_tr;
8133*c83a76b0SSuyog Pawar
8134*c83a76b0SSuyog Pawar break;
8135*c83a76b0SSuyog Pawar }
8136*c83a76b0SSuyog Pawar case 2:
8137*c83a76b0SSuyog Pawar {
8138*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_bl;
8139*c83a76b0SSuyog Pawar
8140*c83a76b0SSuyog Pawar break;
8141*c83a76b0SSuyog Pawar }
8142*c83a76b0SSuyog Pawar case 3:
8143*c83a76b0SSuyog Pawar {
8144*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_br;
8145*c83a76b0SSuyog Pawar
8146*c83a76b0SSuyog Pawar break;
8147*c83a76b0SSuyog Pawar }
8148*c83a76b0SSuyog Pawar }
8149*c83a76b0SSuyog Pawar
8150*c83a76b0SSuyog Pawar switch(i % 4)
8151*c83a76b0SSuyog Pawar {
8152*c83a76b0SSuyog Pawar case 0:
8153*c83a76b0SSuyog Pawar {
8154*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_tl;
8155*c83a76b0SSuyog Pawar
8156*c83a76b0SSuyog Pawar break;
8157*c83a76b0SSuyog Pawar }
8158*c83a76b0SSuyog Pawar case 1:
8159*c83a76b0SSuyog Pawar {
8160*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_tr;
8161*c83a76b0SSuyog Pawar
8162*c83a76b0SSuyog Pawar break;
8163*c83a76b0SSuyog Pawar }
8164*c83a76b0SSuyog Pawar case 2:
8165*c83a76b0SSuyog Pawar {
8166*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_bl;
8167*c83a76b0SSuyog Pawar
8168*c83a76b0SSuyog Pawar break;
8169*c83a76b0SSuyog Pawar }
8170*c83a76b0SSuyog Pawar case 3:
8171*c83a76b0SSuyog Pawar {
8172*c83a76b0SSuyog Pawar ps_tree_node = ps_tree_node->ps_child_node_br;
8173*c83a76b0SSuyog Pawar
8174*c83a76b0SSuyog Pawar break;
8175*c83a76b0SSuyog Pawar }
8176*c83a76b0SSuyog Pawar }
8177*c83a76b0SSuyog Pawar
8178*c83a76b0SSuyog Pawar if(ai4_blk_8x8_mask[i] == 15)
8179*c83a76b0SSuyog Pawar {
8180*c83a76b0SSuyog Pawar if(!ps_results->u1_split_flag)
8181*c83a76b0SSuyog Pawar {
8182*c83a76b0SSuyog Pawar ps_tree_node->is_node_valid = 1;
8183*c83a76b0SSuyog Pawar NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8184*c83a76b0SSuyog Pawar }
8185*c83a76b0SSuyog Pawar else
8186*c83a76b0SSuyog Pawar {
8187*c83a76b0SSuyog Pawar ps_tree_node->is_node_valid = 0;
8188*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8189*c83a76b0SSuyog Pawar }
8190*c83a76b0SSuyog Pawar }
8191*c83a76b0SSuyog Pawar else
8192*c83a76b0SSuyog Pawar {
8193*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree_child;
8194*c83a76b0SSuyog Pawar
8195*c83a76b0SSuyog Pawar ps_tree_node->is_node_valid = 0;
8196*c83a76b0SSuyog Pawar
8197*c83a76b0SSuyog Pawar for(j = 0; j < 4; j++)
8198*c83a76b0SSuyog Pawar {
8199*c83a76b0SSuyog Pawar switch(j)
8200*c83a76b0SSuyog Pawar {
8201*c83a76b0SSuyog Pawar case 0:
8202*c83a76b0SSuyog Pawar {
8203*c83a76b0SSuyog Pawar ps_tree_child = ps_tree_node->ps_child_node_tl;
8204*c83a76b0SSuyog Pawar
8205*c83a76b0SSuyog Pawar break;
8206*c83a76b0SSuyog Pawar }
8207*c83a76b0SSuyog Pawar case 1:
8208*c83a76b0SSuyog Pawar {
8209*c83a76b0SSuyog Pawar ps_tree_child = ps_tree_node->ps_child_node_tr;
8210*c83a76b0SSuyog Pawar
8211*c83a76b0SSuyog Pawar break;
8212*c83a76b0SSuyog Pawar }
8213*c83a76b0SSuyog Pawar case 2:
8214*c83a76b0SSuyog Pawar {
8215*c83a76b0SSuyog Pawar ps_tree_child = ps_tree_node->ps_child_node_bl;
8216*c83a76b0SSuyog Pawar
8217*c83a76b0SSuyog Pawar break;
8218*c83a76b0SSuyog Pawar }
8219*c83a76b0SSuyog Pawar case 3:
8220*c83a76b0SSuyog Pawar {
8221*c83a76b0SSuyog Pawar ps_tree_child = ps_tree_node->ps_child_node_br;
8222*c83a76b0SSuyog Pawar
8223*c83a76b0SSuyog Pawar break;
8224*c83a76b0SSuyog Pawar }
8225*c83a76b0SSuyog Pawar }
8226*c83a76b0SSuyog Pawar
8227*c83a76b0SSuyog Pawar ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8228*c83a76b0SSuyog Pawar }
8229*c83a76b0SSuyog Pawar }
8230*c83a76b0SSuyog Pawar }
8231*c83a76b0SSuyog Pawar }
8232*c83a76b0SSuyog Pawar
8233*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8234*c83a76b0SSuyog Pawar {
8235*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8236*c83a76b0SSuyog Pawar
8237*c83a76b0SSuyog Pawar hme_analyse_mv_clustering(
8238*c83a76b0SSuyog Pawar ps_ctxt->as_search_results_16x16,
8239*c83a76b0SSuyog Pawar ps_ctxt->as_cu16x16_results,
8240*c83a76b0SSuyog Pawar ps_ctxt->as_cu8x8_results,
8241*c83a76b0SSuyog Pawar ps_ctxt->ps_ctb_cluster_info,
8242*c83a76b0SSuyog Pawar ps_ctxt->ai1_future_list,
8243*c83a76b0SSuyog Pawar ps_ctxt->ai1_past_list,
8244*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.bidir_enabled,
8245*c83a76b0SSuyog Pawar e_me_quality_presets);
8246*c83a76b0SSuyog Pawar
8247*c83a76b0SSuyog Pawar #if DISABLE_BLK_MERGE_WHEN_NOISY
8248*c83a76b0SSuyog Pawar ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8249*c83a76b0SSuyog Pawar ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8250*c83a76b0SSuyog Pawar ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8251*c83a76b0SSuyog Pawar ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8252*c83a76b0SSuyog Pawar ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8253*c83a76b0SSuyog Pawar ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8254*c83a76b0SSuyog Pawar ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8255*c83a76b0SSuyog Pawar ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8256*c83a76b0SSuyog Pawar ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8257*c83a76b0SSuyog Pawar ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8258*c83a76b0SSuyog Pawar #endif
8259*c83a76b0SSuyog Pawar
8260*c83a76b0SSuyog Pawar en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8261*c83a76b0SSuyog Pawar (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8262*c83a76b0SSuyog Pawar (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8263*c83a76b0SSuyog Pawar (ps_tree->ps_child_node_br->is_node_valid << 3);
8264*c83a76b0SSuyog Pawar
8265*c83a76b0SSuyog Pawar en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8266*c83a76b0SSuyog Pawar (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8267*c83a76b0SSuyog Pawar (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8268*c83a76b0SSuyog Pawar (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8269*c83a76b0SSuyog Pawar (ps_tree->u1_inter_eval_enable << 4);
8270*c83a76b0SSuyog Pawar }
8271*c83a76b0SSuyog Pawar else
8272*c83a76b0SSuyog Pawar {
8273*c83a76b0SSuyog Pawar en_merge_execution = 0x1f;
8274*c83a76b0SSuyog Pawar
8275*c83a76b0SSuyog Pawar #if DISABLE_BLK_MERGE_WHEN_NOISY
8276*c83a76b0SSuyog Pawar en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8277*c83a76b0SSuyog Pawar ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8278*c83a76b0SSuyog Pawar ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8279*c83a76b0SSuyog Pawar ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8280*c83a76b0SSuyog Pawar #endif
8281*c83a76b0SSuyog Pawar }
8282*c83a76b0SSuyog Pawar
8283*c83a76b0SSuyog Pawar /* Re-initialize the pu_results pointers to the first struct in the stack array */
8284*c83a76b0SSuyog Pawar ps_pu_results = as_inter_pu_results;
8285*c83a76b0SSuyog Pawar
8286*c83a76b0SSuyog Pawar {
8287*c83a76b0SSuyog Pawar WORD32 ref_ctr;
8288*c83a76b0SSuyog Pawar
8289*c83a76b0SSuyog Pawar s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8290*c83a76b0SSuyog Pawar s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8291*c83a76b0SSuyog Pawar
8292*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
8293*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8294*c83a76b0SSuyog Pawar {
8295*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8296*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8297*c83a76b0SSuyog Pawar }
8298*c83a76b0SSuyog Pawar
8299*c83a76b0SSuyog Pawar e_merge_result = CU_SPLIT;
8300*c83a76b0SSuyog Pawar merge_count_32x32 = 0;
8301*c83a76b0SSuyog Pawar
8302*c83a76b0SSuyog Pawar if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8303*c83a76b0SSuyog Pawar {
8304*c83a76b0SSuyog Pawar range_prms_t *ps_pic_limit;
8305*c83a76b0SSuyog Pawar if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8306*c83a76b0SSuyog Pawar {
8307*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_rec;
8308*c83a76b0SSuyog Pawar }
8309*c83a76b0SSuyog Pawar else
8310*c83a76b0SSuyog Pawar {
8311*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_inp;
8312*c83a76b0SSuyog Pawar }
8313*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
8314*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8315*c83a76b0SSuyog Pawar {
8316*c83a76b0SSuyog Pawar hme_derive_search_range(
8317*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8318*c83a76b0SSuyog Pawar ps_pic_limit,
8319*c83a76b0SSuyog Pawar &as_mv_limit[ref_ctr],
8320*c83a76b0SSuyog Pawar i4_ctb_x << 6,
8321*c83a76b0SSuyog Pawar i4_ctb_y << 6,
8322*c83a76b0SSuyog Pawar 32,
8323*c83a76b0SSuyog Pawar 32);
8324*c83a76b0SSuyog Pawar
8325*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS_POINTERS(
8326*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8327*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8328*c83a76b0SSuyog Pawar 2);
8329*c83a76b0SSuyog Pawar }
8330*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8331*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8332*c83a76b0SSuyog Pawar s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8333*c83a76b0SSuyog Pawar
8334*c83a76b0SSuyog Pawar e_merge_result = hme_try_merge_high_speed(
8335*c83a76b0SSuyog Pawar ps_thrd_ctxt,
8336*c83a76b0SSuyog Pawar ps_ctxt,
8337*c83a76b0SSuyog Pawar ps_cur_ipe_ctb,
8338*c83a76b0SSuyog Pawar &s_subpel_prms,
8339*c83a76b0SSuyog Pawar &s_merge_prms_32x32_tl,
8340*c83a76b0SSuyog Pawar ps_pu_results,
8341*c83a76b0SSuyog Pawar &as_pu_results[0][0][0]);
8342*c83a76b0SSuyog Pawar
8343*c83a76b0SSuyog Pawar if(e_merge_result == CU_MERGED)
8344*c83a76b0SSuyog Pawar {
8345*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results =
8346*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8347*c83a76b0SSuyog Pawar
8348*c83a76b0SSuyog Pawar if(!((ps_cu_results->u1_num_best_results == 1) &&
8349*c83a76b0SSuyog Pawar (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8350*c83a76b0SSuyog Pawar {
8351*c83a76b0SSuyog Pawar hme_map_mvs_to_grid(
8352*c83a76b0SSuyog Pawar &aps_mv_grid[0],
8353*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.ps_results_merge,
8354*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.au1_pred_dir_searched,
8355*c83a76b0SSuyog Pawar s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8356*c83a76b0SSuyog Pawar }
8357*c83a76b0SSuyog Pawar
8358*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8359*c83a76b0SSuyog Pawar {
8360*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8361*c83a76b0SSuyog Pawar .ps_child_node_tl->is_node_valid = 1;
8362*c83a76b0SSuyog Pawar NULLIFY_THE_CHILDREN_NODES(
8363*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8364*c83a76b0SSuyog Pawar .ps_child_node_tl);
8365*c83a76b0SSuyog Pawar }
8366*c83a76b0SSuyog Pawar
8367*c83a76b0SSuyog Pawar merge_count_32x32++;
8368*c83a76b0SSuyog Pawar e_merge_result = CU_SPLIT;
8369*c83a76b0SSuyog Pawar }
8370*c83a76b0SSuyog Pawar else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8371*c83a76b0SSuyog Pawar {
8372*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8373*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8374*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375*c83a76b0SSuyog Pawar
8376*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8377*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8378*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8379*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8380*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8381*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8382*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8383*c83a76b0SSuyog Pawar #endif
8384*c83a76b0SSuyog Pawar }
8385*c83a76b0SSuyog Pawar }
8386*c83a76b0SSuyog Pawar else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8387*c83a76b0SSuyog Pawar {
8388*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8389*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8390*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8391*c83a76b0SSuyog Pawar
8392*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8393*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8394*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8395*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8396*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8397*c83a76b0SSuyog Pawar #endif
8398*c83a76b0SSuyog Pawar
8399*c83a76b0SSuyog Pawar if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8400*c83a76b0SSuyog Pawar {
8401*c83a76b0SSuyog Pawar ps_tree->is_node_valid = 0;
8402*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8403*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8404*c83a76b0SSuyog Pawar }
8405*c83a76b0SSuyog Pawar }
8406*c83a76b0SSuyog Pawar
8407*c83a76b0SSuyog Pawar if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8408*c83a76b0SSuyog Pawar {
8409*c83a76b0SSuyog Pawar range_prms_t *ps_pic_limit;
8410*c83a76b0SSuyog Pawar if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8411*c83a76b0SSuyog Pawar {
8412*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_rec;
8413*c83a76b0SSuyog Pawar }
8414*c83a76b0SSuyog Pawar else
8415*c83a76b0SSuyog Pawar {
8416*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_inp;
8417*c83a76b0SSuyog Pawar }
8418*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
8419*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8420*c83a76b0SSuyog Pawar {
8421*c83a76b0SSuyog Pawar hme_derive_search_range(
8422*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8423*c83a76b0SSuyog Pawar ps_pic_limit,
8424*c83a76b0SSuyog Pawar &as_mv_limit[ref_ctr],
8425*c83a76b0SSuyog Pawar (i4_ctb_x << 6) + 32,
8426*c83a76b0SSuyog Pawar i4_ctb_y << 6,
8427*c83a76b0SSuyog Pawar 32,
8428*c83a76b0SSuyog Pawar 32);
8429*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS_POINTERS(
8430*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8431*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8432*c83a76b0SSuyog Pawar 2);
8433*c83a76b0SSuyog Pawar }
8434*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8435*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8436*c83a76b0SSuyog Pawar s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8437*c83a76b0SSuyog Pawar
8438*c83a76b0SSuyog Pawar e_merge_result = hme_try_merge_high_speed(
8439*c83a76b0SSuyog Pawar ps_thrd_ctxt,
8440*c83a76b0SSuyog Pawar ps_ctxt,
8441*c83a76b0SSuyog Pawar ps_cur_ipe_ctb,
8442*c83a76b0SSuyog Pawar &s_subpel_prms,
8443*c83a76b0SSuyog Pawar &s_merge_prms_32x32_tr,
8444*c83a76b0SSuyog Pawar ps_pu_results,
8445*c83a76b0SSuyog Pawar &as_pu_results[0][0][0]);
8446*c83a76b0SSuyog Pawar
8447*c83a76b0SSuyog Pawar if(e_merge_result == CU_MERGED)
8448*c83a76b0SSuyog Pawar {
8449*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results =
8450*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8451*c83a76b0SSuyog Pawar
8452*c83a76b0SSuyog Pawar if(!((ps_cu_results->u1_num_best_results == 1) &&
8453*c83a76b0SSuyog Pawar (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8454*c83a76b0SSuyog Pawar {
8455*c83a76b0SSuyog Pawar hme_map_mvs_to_grid(
8456*c83a76b0SSuyog Pawar &aps_mv_grid[0],
8457*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.ps_results_merge,
8458*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.au1_pred_dir_searched,
8459*c83a76b0SSuyog Pawar s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8460*c83a76b0SSuyog Pawar }
8461*c83a76b0SSuyog Pawar
8462*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8463*c83a76b0SSuyog Pawar {
8464*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8465*c83a76b0SSuyog Pawar .ps_child_node_tr->is_node_valid = 1;
8466*c83a76b0SSuyog Pawar NULLIFY_THE_CHILDREN_NODES(
8467*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8468*c83a76b0SSuyog Pawar .ps_child_node_tr);
8469*c83a76b0SSuyog Pawar }
8470*c83a76b0SSuyog Pawar
8471*c83a76b0SSuyog Pawar merge_count_32x32++;
8472*c83a76b0SSuyog Pawar e_merge_result = CU_SPLIT;
8473*c83a76b0SSuyog Pawar }
8474*c83a76b0SSuyog Pawar else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8475*c83a76b0SSuyog Pawar {
8476*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8477*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8478*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479*c83a76b0SSuyog Pawar
8480*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8481*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8482*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8483*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8484*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8485*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8486*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8487*c83a76b0SSuyog Pawar #endif
8488*c83a76b0SSuyog Pawar }
8489*c83a76b0SSuyog Pawar }
8490*c83a76b0SSuyog Pawar else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8491*c83a76b0SSuyog Pawar {
8492*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8493*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8494*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8495*c83a76b0SSuyog Pawar
8496*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8497*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8498*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8499*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8500*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8501*c83a76b0SSuyog Pawar #endif
8502*c83a76b0SSuyog Pawar
8503*c83a76b0SSuyog Pawar if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8504*c83a76b0SSuyog Pawar {
8505*c83a76b0SSuyog Pawar ps_tree->is_node_valid = 0;
8506*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8507*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8508*c83a76b0SSuyog Pawar }
8509*c83a76b0SSuyog Pawar }
8510*c83a76b0SSuyog Pawar
8511*c83a76b0SSuyog Pawar if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8512*c83a76b0SSuyog Pawar {
8513*c83a76b0SSuyog Pawar range_prms_t *ps_pic_limit;
8514*c83a76b0SSuyog Pawar if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8515*c83a76b0SSuyog Pawar {
8516*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_rec;
8517*c83a76b0SSuyog Pawar }
8518*c83a76b0SSuyog Pawar else
8519*c83a76b0SSuyog Pawar {
8520*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_inp;
8521*c83a76b0SSuyog Pawar }
8522*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
8523*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8524*c83a76b0SSuyog Pawar {
8525*c83a76b0SSuyog Pawar hme_derive_search_range(
8526*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8527*c83a76b0SSuyog Pawar ps_pic_limit,
8528*c83a76b0SSuyog Pawar &as_mv_limit[ref_ctr],
8529*c83a76b0SSuyog Pawar i4_ctb_x << 6,
8530*c83a76b0SSuyog Pawar (i4_ctb_y << 6) + 32,
8531*c83a76b0SSuyog Pawar 32,
8532*c83a76b0SSuyog Pawar 32);
8533*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS_POINTERS(
8534*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8535*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8536*c83a76b0SSuyog Pawar 2);
8537*c83a76b0SSuyog Pawar }
8538*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8539*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8540*c83a76b0SSuyog Pawar s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8541*c83a76b0SSuyog Pawar
8542*c83a76b0SSuyog Pawar e_merge_result = hme_try_merge_high_speed(
8543*c83a76b0SSuyog Pawar ps_thrd_ctxt,
8544*c83a76b0SSuyog Pawar ps_ctxt,
8545*c83a76b0SSuyog Pawar ps_cur_ipe_ctb,
8546*c83a76b0SSuyog Pawar &s_subpel_prms,
8547*c83a76b0SSuyog Pawar &s_merge_prms_32x32_bl,
8548*c83a76b0SSuyog Pawar ps_pu_results,
8549*c83a76b0SSuyog Pawar &as_pu_results[0][0][0]);
8550*c83a76b0SSuyog Pawar
8551*c83a76b0SSuyog Pawar if(e_merge_result == CU_MERGED)
8552*c83a76b0SSuyog Pawar {
8553*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results =
8554*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8555*c83a76b0SSuyog Pawar
8556*c83a76b0SSuyog Pawar if(!((ps_cu_results->u1_num_best_results == 1) &&
8557*c83a76b0SSuyog Pawar (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8558*c83a76b0SSuyog Pawar {
8559*c83a76b0SSuyog Pawar hme_map_mvs_to_grid(
8560*c83a76b0SSuyog Pawar &aps_mv_grid[0],
8561*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.ps_results_merge,
8562*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.au1_pred_dir_searched,
8563*c83a76b0SSuyog Pawar s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8564*c83a76b0SSuyog Pawar }
8565*c83a76b0SSuyog Pawar
8566*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8567*c83a76b0SSuyog Pawar {
8568*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8569*c83a76b0SSuyog Pawar .ps_child_node_bl->is_node_valid = 1;
8570*c83a76b0SSuyog Pawar NULLIFY_THE_CHILDREN_NODES(
8571*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8572*c83a76b0SSuyog Pawar .ps_child_node_bl);
8573*c83a76b0SSuyog Pawar }
8574*c83a76b0SSuyog Pawar
8575*c83a76b0SSuyog Pawar merge_count_32x32++;
8576*c83a76b0SSuyog Pawar e_merge_result = CU_SPLIT;
8577*c83a76b0SSuyog Pawar }
8578*c83a76b0SSuyog Pawar else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8579*c83a76b0SSuyog Pawar {
8580*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8581*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8582*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583*c83a76b0SSuyog Pawar
8584*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8585*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8586*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8587*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8588*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8589*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8590*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8591*c83a76b0SSuyog Pawar #endif
8592*c83a76b0SSuyog Pawar }
8593*c83a76b0SSuyog Pawar }
8594*c83a76b0SSuyog Pawar else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8595*c83a76b0SSuyog Pawar {
8596*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8597*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8598*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8599*c83a76b0SSuyog Pawar
8600*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8601*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8602*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8603*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8604*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8605*c83a76b0SSuyog Pawar #endif
8606*c83a76b0SSuyog Pawar
8607*c83a76b0SSuyog Pawar if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8608*c83a76b0SSuyog Pawar {
8609*c83a76b0SSuyog Pawar ps_tree->is_node_valid = 0;
8610*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8611*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8612*c83a76b0SSuyog Pawar }
8613*c83a76b0SSuyog Pawar }
8614*c83a76b0SSuyog Pawar
8615*c83a76b0SSuyog Pawar if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8616*c83a76b0SSuyog Pawar {
8617*c83a76b0SSuyog Pawar range_prms_t *ps_pic_limit;
8618*c83a76b0SSuyog Pawar if(s_merge_prms_32x32_br.i4_use_rec == 1)
8619*c83a76b0SSuyog Pawar {
8620*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_rec;
8621*c83a76b0SSuyog Pawar }
8622*c83a76b0SSuyog Pawar else
8623*c83a76b0SSuyog Pawar {
8624*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_inp;
8625*c83a76b0SSuyog Pawar }
8626*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
8627*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8628*c83a76b0SSuyog Pawar {
8629*c83a76b0SSuyog Pawar hme_derive_search_range(
8630*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8631*c83a76b0SSuyog Pawar ps_pic_limit,
8632*c83a76b0SSuyog Pawar &as_mv_limit[ref_ctr],
8633*c83a76b0SSuyog Pawar (i4_ctb_x << 6) + 32,
8634*c83a76b0SSuyog Pawar (i4_ctb_y << 6) + 32,
8635*c83a76b0SSuyog Pawar 32,
8636*c83a76b0SSuyog Pawar 32);
8637*c83a76b0SSuyog Pawar
8638*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS_POINTERS(
8639*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8640*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8641*c83a76b0SSuyog Pawar 2);
8642*c83a76b0SSuyog Pawar }
8643*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8644*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8645*c83a76b0SSuyog Pawar s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8646*c83a76b0SSuyog Pawar
8647*c83a76b0SSuyog Pawar e_merge_result = hme_try_merge_high_speed(
8648*c83a76b0SSuyog Pawar ps_thrd_ctxt,
8649*c83a76b0SSuyog Pawar ps_ctxt,
8650*c83a76b0SSuyog Pawar ps_cur_ipe_ctb,
8651*c83a76b0SSuyog Pawar &s_subpel_prms,
8652*c83a76b0SSuyog Pawar &s_merge_prms_32x32_br,
8653*c83a76b0SSuyog Pawar ps_pu_results,
8654*c83a76b0SSuyog Pawar &as_pu_results[0][0][0]);
8655*c83a76b0SSuyog Pawar
8656*c83a76b0SSuyog Pawar if(e_merge_result == CU_MERGED)
8657*c83a76b0SSuyog Pawar {
8658*c83a76b0SSuyog Pawar /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8659*c83a76b0SSuyog Pawar
8660*c83a76b0SSuyog Pawar if(!((ps_cu_results->u1_num_best_results == 1) &&
8661*c83a76b0SSuyog Pawar (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8662*c83a76b0SSuyog Pawar {
8663*c83a76b0SSuyog Pawar hme_map_mvs_to_grid
8664*c83a76b0SSuyog Pawar (
8665*c83a76b0SSuyog Pawar &aps_mv_grid[0],
8666*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.ps_results_merge,
8667*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.au1_pred_dir_searched,
8668*c83a76b0SSuyog Pawar s_merge_prms_32x32_br.i4_num_pred_dir_actual
8669*c83a76b0SSuyog Pawar );
8670*c83a76b0SSuyog Pawar }*/
8671*c83a76b0SSuyog Pawar
8672*c83a76b0SSuyog Pawar if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8673*c83a76b0SSuyog Pawar {
8674*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8675*c83a76b0SSuyog Pawar .ps_child_node_br->is_node_valid = 1;
8676*c83a76b0SSuyog Pawar NULLIFY_THE_CHILDREN_NODES(
8677*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8678*c83a76b0SSuyog Pawar .ps_child_node_br);
8679*c83a76b0SSuyog Pawar }
8680*c83a76b0SSuyog Pawar
8681*c83a76b0SSuyog Pawar merge_count_32x32++;
8682*c83a76b0SSuyog Pawar e_merge_result = CU_SPLIT;
8683*c83a76b0SSuyog Pawar }
8684*c83a76b0SSuyog Pawar else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8685*c83a76b0SSuyog Pawar {
8686*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8687*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8688*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689*c83a76b0SSuyog Pawar
8690*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8691*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8692*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8693*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8694*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8695*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8696*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8697*c83a76b0SSuyog Pawar #endif
8698*c83a76b0SSuyog Pawar }
8699*c83a76b0SSuyog Pawar }
8700*c83a76b0SSuyog Pawar else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8701*c83a76b0SSuyog Pawar {
8702*c83a76b0SSuyog Pawar #if ENABLE_CU_TREE_CULLING
8703*c83a76b0SSuyog Pawar cur_ctb_cu_tree_t *ps_tree =
8704*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8705*c83a76b0SSuyog Pawar
8706*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree);
8707*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8708*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8709*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8710*c83a76b0SSuyog Pawar ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8711*c83a76b0SSuyog Pawar #endif
8712*c83a76b0SSuyog Pawar
8713*c83a76b0SSuyog Pawar if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8714*c83a76b0SSuyog Pawar {
8715*c83a76b0SSuyog Pawar ps_tree->is_node_valid = 0;
8716*c83a76b0SSuyog Pawar ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8717*c83a76b0SSuyog Pawar en_merge_execution = (en_merge_execution & (~(1 << 4)));
8718*c83a76b0SSuyog Pawar }
8719*c83a76b0SSuyog Pawar }
8720*c83a76b0SSuyog Pawar
8721*c83a76b0SSuyog Pawar /* Try merging all 32x32 to 64x64 candts */
8722*c83a76b0SSuyog Pawar if(((en_merge_32x32 & 0xf) == 0xf) &&
8723*c83a76b0SSuyog Pawar (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8724*c83a76b0SSuyog Pawar ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8725*c83a76b0SSuyog Pawar if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8726*c83a76b0SSuyog Pawar !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8727*c83a76b0SSuyog Pawar (e_me_quality_presets != ME_XTREME_SPEED_25)))
8728*c83a76b0SSuyog Pawar {
8729*c83a76b0SSuyog Pawar range_prms_t *ps_pic_limit;
8730*c83a76b0SSuyog Pawar if(s_merge_prms_64x64.i4_use_rec == 1)
8731*c83a76b0SSuyog Pawar {
8732*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_rec;
8733*c83a76b0SSuyog Pawar }
8734*c83a76b0SSuyog Pawar else
8735*c83a76b0SSuyog Pawar {
8736*c83a76b0SSuyog Pawar ps_pic_limit = &s_pic_limit_inp;
8737*c83a76b0SSuyog Pawar }
8738*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
8739*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8740*c83a76b0SSuyog Pawar {
8741*c83a76b0SSuyog Pawar hme_derive_search_range(
8742*c83a76b0SSuyog Pawar s_merge_prms_64x64.aps_mv_range[ref_ctr],
8743*c83a76b0SSuyog Pawar ps_pic_limit,
8744*c83a76b0SSuyog Pawar &as_mv_limit[ref_ctr],
8745*c83a76b0SSuyog Pawar i4_ctb_x << 6,
8746*c83a76b0SSuyog Pawar i4_ctb_y << 6,
8747*c83a76b0SSuyog Pawar 64,
8748*c83a76b0SSuyog Pawar 64);
8749*c83a76b0SSuyog Pawar
8750*c83a76b0SSuyog Pawar SCALE_RANGE_PRMS_POINTERS(
8751*c83a76b0SSuyog Pawar s_merge_prms_64x64.aps_mv_range[ref_ctr],
8752*c83a76b0SSuyog Pawar s_merge_prms_64x64.aps_mv_range[ref_ctr],
8753*c83a76b0SSuyog Pawar 2);
8754*c83a76b0SSuyog Pawar }
8755*c83a76b0SSuyog Pawar s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8756*c83a76b0SSuyog Pawar s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8757*c83a76b0SSuyog Pawar s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8758*c83a76b0SSuyog Pawar
8759*c83a76b0SSuyog Pawar e_merge_result = hme_try_merge_high_speed(
8760*c83a76b0SSuyog Pawar ps_thrd_ctxt,
8761*c83a76b0SSuyog Pawar ps_ctxt,
8762*c83a76b0SSuyog Pawar ps_cur_ipe_ctb,
8763*c83a76b0SSuyog Pawar &s_subpel_prms,
8764*c83a76b0SSuyog Pawar &s_merge_prms_64x64,
8765*c83a76b0SSuyog Pawar ps_pu_results,
8766*c83a76b0SSuyog Pawar &as_pu_results[0][0][0]);
8767*c83a76b0SSuyog Pawar
8768*c83a76b0SSuyog Pawar if((e_merge_result == CU_MERGED) &&
8769*c83a76b0SSuyog Pawar (ME_PRISTINE_QUALITY != e_me_quality_presets))
8770*c83a76b0SSuyog Pawar {
8771*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8772*c83a76b0SSuyog Pawar .is_node_valid = 1;
8773*c83a76b0SSuyog Pawar NULLIFY_THE_CHILDREN_NODES(
8774*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8775*c83a76b0SSuyog Pawar }
8776*c83a76b0SSuyog Pawar else if(
8777*c83a76b0SSuyog Pawar (e_merge_result == CU_SPLIT) &&
8778*c83a76b0SSuyog Pawar (ME_PRISTINE_QUALITY == e_me_quality_presets))
8779*c83a76b0SSuyog Pawar {
8780*c83a76b0SSuyog Pawar ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8781*c83a76b0SSuyog Pawar .is_node_valid = 0;
8782*c83a76b0SSuyog Pawar }
8783*c83a76b0SSuyog Pawar }
8784*c83a76b0SSuyog Pawar
8785*c83a76b0SSuyog Pawar /*****************************************************************/
8786*c83a76b0SSuyog Pawar /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */
8787*c83a76b0SSuyog Pawar /*****************************************************************/
8788*c83a76b0SSuyog Pawar pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8789*c83a76b0SSuyog Pawar
8790*c83a76b0SSuyog Pawar {
8791*c83a76b0SSuyog Pawar #ifdef _DEBUG
8792*c83a76b0SSuyog Pawar S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8793*c83a76b0SSuyog Pawar ? 64
8794*c83a76b0SSuyog Pawar : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8795*c83a76b0SSuyog Pawar S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8796*c83a76b0SSuyog Pawar ? 64
8797*c83a76b0SSuyog Pawar : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8798*c83a76b0SSuyog Pawar ASSERT(
8799*c83a76b0SSuyog Pawar (wd * ht) ==
8800*c83a76b0SSuyog Pawar ihevce_compute_area_of_valid_cus_in_ctb(
8801*c83a76b0SSuyog Pawar &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8802*c83a76b0SSuyog Pawar #endif
8803*c83a76b0SSuyog Pawar }
8804*c83a76b0SSuyog Pawar }
8805*c83a76b0SSuyog Pawar
8806*c83a76b0SSuyog Pawar /* set the dependency for the corresponding row in enc loop */
8807*c83a76b0SSuyog Pawar ihevce_dmgr_set_row_row_sync(
8808*c83a76b0SSuyog Pawar pv_dep_mngr_encloop_dep_me,
8809*c83a76b0SSuyog Pawar (i4_ctb_x + 1),
8810*c83a76b0SSuyog Pawar i4_ctb_y,
8811*c83a76b0SSuyog Pawar tile_col_idx /* Col Tile No. */);
8812*c83a76b0SSuyog Pawar
8813*c83a76b0SSuyog Pawar left_ctb_in_diff_tile = 0;
8814*c83a76b0SSuyog Pawar }
8815*c83a76b0SSuyog Pawar }
8816*c83a76b0SSuyog Pawar }
8817*c83a76b0SSuyog Pawar
8818*c83a76b0SSuyog Pawar /**
8819*c83a76b0SSuyog Pawar ********************************************************************************
8820*c83a76b0SSuyog Pawar * @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8821*c83a76b0SSuyog Pawar * refine_layer_prms_t *ps_refine_prms)
8822*c83a76b0SSuyog Pawar *
8823*c83a76b0SSuyog Pawar * @brief Top level entry point for refinement ME
8824*c83a76b0SSuyog Pawar *
8825*c83a76b0SSuyog Pawar * @param[in,out] ps_ctxt: ME Handle
8826*c83a76b0SSuyog Pawar *
8827*c83a76b0SSuyog Pawar * @param[in] ps_refine_prms : refinement layer prms
8828*c83a76b0SSuyog Pawar *
8829*c83a76b0SSuyog Pawar * @return None
8830*c83a76b0SSuyog Pawar ********************************************************************************
8831*c83a76b0SSuyog Pawar */
hme_refine_no_encode(coarse_me_ctxt_t * ps_ctxt,refine_prms_t * ps_refine_prms,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,S32 lyr_job_type,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)8832*c83a76b0SSuyog Pawar void hme_refine_no_encode(
8833*c83a76b0SSuyog Pawar coarse_me_ctxt_t *ps_ctxt,
8834*c83a76b0SSuyog Pawar refine_prms_t *ps_refine_prms,
8835*c83a76b0SSuyog Pawar multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8836*c83a76b0SSuyog Pawar S32 lyr_job_type,
8837*c83a76b0SSuyog Pawar WORD32 i4_ping_pong,
8838*c83a76b0SSuyog Pawar void **ppv_dep_mngr_hme_sync)
8839*c83a76b0SSuyog Pawar {
8840*c83a76b0SSuyog Pawar BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8841*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_me_quality_presets =
8842*c83a76b0SSuyog Pawar ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8843*c83a76b0SSuyog Pawar
8844*c83a76b0SSuyog Pawar /*************************************************************************/
8845*c83a76b0SSuyog Pawar /* Complexity of search: Low to High */
8846*c83a76b0SSuyog Pawar /*************************************************************************/
8847*c83a76b0SSuyog Pawar SEARCH_COMPLEXITY_T e_search_complexity;
8848*c83a76b0SSuyog Pawar
8849*c83a76b0SSuyog Pawar /*************************************************************************/
8850*c83a76b0SSuyog Pawar /* Config parameter structures for varius ME submodules */
8851*c83a76b0SSuyog Pawar /*************************************************************************/
8852*c83a76b0SSuyog Pawar hme_search_prms_t s_search_prms_blk;
8853*c83a76b0SSuyog Pawar mvbank_update_prms_t s_mv_update_prms;
8854*c83a76b0SSuyog Pawar
8855*c83a76b0SSuyog Pawar /*************************************************************************/
8856*c83a76b0SSuyog Pawar /* All types of search candidates for predictor based search. */
8857*c83a76b0SSuyog Pawar /*************************************************************************/
8858*c83a76b0SSuyog Pawar S32 num_init_candts = 0;
8859*c83a76b0SSuyog Pawar search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8860*c83a76b0SSuyog Pawar search_node_t as_top_neighbours[4], as_left_neighbours[3];
8861*c83a76b0SSuyog Pawar search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8862*c83a76b0SSuyog Pawar search_node_t *ps_candt_l, *ps_candt_t;
8863*c83a76b0SSuyog Pawar search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8864*c83a76b0SSuyog Pawar search_node_t *ps_candt_prj_bl[2];
8865*c83a76b0SSuyog Pawar search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8866*c83a76b0SSuyog Pawar search_node_t *ps_candt_prj_coloc[2];
8867*c83a76b0SSuyog Pawar
8868*c83a76b0SSuyog Pawar pf_get_wt_inp fp_get_wt_inp;
8869*c83a76b0SSuyog Pawar
8870*c83a76b0SSuyog Pawar search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8871*c83a76b0SSuyog Pawar U32 au4_unique_node_map[MAP_X_MAX * 2];
8872*c83a76b0SSuyog Pawar
8873*c83a76b0SSuyog Pawar /*EIID */
8874*c83a76b0SSuyog Pawar WORD32 i4_num_inter_wins = 0; //debug code to find stat of
8875*c83a76b0SSuyog Pawar WORD32 i4_num_comparisions = 0; //debug code
8876*c83a76b0SSuyog Pawar WORD32 i4_threshold_multiplier;
8877*c83a76b0SSuyog Pawar WORD32 i4_threshold_divider;
8878*c83a76b0SSuyog Pawar WORD32 i4_temporal_layer =
8879*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8880*c83a76b0SSuyog Pawar
8881*c83a76b0SSuyog Pawar /*************************************************************************/
8882*c83a76b0SSuyog Pawar /* points ot the search results for the blk level search (8x8/16x16) */
8883*c83a76b0SSuyog Pawar /*************************************************************************/
8884*c83a76b0SSuyog Pawar search_results_t *ps_search_results;
8885*c83a76b0SSuyog Pawar
8886*c83a76b0SSuyog Pawar /*************************************************************************/
8887*c83a76b0SSuyog Pawar /* Coordinates */
8888*c83a76b0SSuyog Pawar /*************************************************************************/
8889*c83a76b0SSuyog Pawar S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8890*c83a76b0SSuyog Pawar //S32 i4_ctb_y;
8891*c83a76b0SSuyog Pawar S32 pos_x, pos_y;
8892*c83a76b0SSuyog Pawar S32 blk_id_in_full_ctb;
8893*c83a76b0SSuyog Pawar S32 i4_num_srch_cands;
8894*c83a76b0SSuyog Pawar
8895*c83a76b0SSuyog Pawar S32 blk_y;
8896*c83a76b0SSuyog Pawar
8897*c83a76b0SSuyog Pawar /*************************************************************************/
8898*c83a76b0SSuyog Pawar /* Related to dimensions of block being searched and pic dimensions */
8899*c83a76b0SSuyog Pawar /*************************************************************************/
8900*c83a76b0SSuyog Pawar S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8901*c83a76b0SSuyog Pawar S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8902*c83a76b0SSuyog Pawar S32 num_results_prev_layer;
8903*c83a76b0SSuyog Pawar
8904*c83a76b0SSuyog Pawar /*************************************************************************/
8905*c83a76b0SSuyog Pawar /* Size of a basic unit for this layer. For non encode layers, we search */
8906*c83a76b0SSuyog Pawar /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8907*c83a76b0SSuyog Pawar /* basic unit size is the ctb size. */
8908*c83a76b0SSuyog Pawar /*************************************************************************/
8909*c83a76b0SSuyog Pawar S32 unit_size;
8910*c83a76b0SSuyog Pawar
8911*c83a76b0SSuyog Pawar /*************************************************************************/
8912*c83a76b0SSuyog Pawar /* Pointers to context in current and coarser layers */
8913*c83a76b0SSuyog Pawar /*************************************************************************/
8914*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8915*c83a76b0SSuyog Pawar
8916*c83a76b0SSuyog Pawar /*************************************************************************/
8917*c83a76b0SSuyog Pawar /* to store mv range per blk, and picture limit, allowed search range */
8918*c83a76b0SSuyog Pawar /* range prms in hpel and qpel units as well */
8919*c83a76b0SSuyog Pawar /*************************************************************************/
8920*c83a76b0SSuyog Pawar range_prms_t s_range_prms_inp, s_range_prms_rec;
8921*c83a76b0SSuyog Pawar range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8922*c83a76b0SSuyog Pawar /*************************************************************************/
8923*c83a76b0SSuyog Pawar /* These variables are used to track number of references at different */
8924*c83a76b0SSuyog Pawar /* stages of ME. */
8925*c83a76b0SSuyog Pawar /*************************************************************************/
8926*c83a76b0SSuyog Pawar S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8927*c83a76b0SSuyog Pawar S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8928*c83a76b0SSuyog Pawar S32 lambda_inp = ps_refine_prms->lambda_inp;
8929*c83a76b0SSuyog Pawar
8930*c83a76b0SSuyog Pawar /*************************************************************************/
8931*c83a76b0SSuyog Pawar /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8932*c83a76b0SSuyog Pawar /* Explicit means it searches on all active ref idx. */
8933*c83a76b0SSuyog Pawar /*************************************************************************/
8934*c83a76b0SSuyog Pawar S32 curr_layer_implicit, prev_layer_implicit;
8935*c83a76b0SSuyog Pawar
8936*c83a76b0SSuyog Pawar /*************************************************************************/
8937*c83a76b0SSuyog Pawar /* Variables for loop counts */
8938*c83a76b0SSuyog Pawar /*************************************************************************/
8939*c83a76b0SSuyog Pawar S32 id;
8940*c83a76b0SSuyog Pawar S08 i1_ref_idx;
8941*c83a76b0SSuyog Pawar
8942*c83a76b0SSuyog Pawar /*************************************************************************/
8943*c83a76b0SSuyog Pawar /* Input pointer and stride */
8944*c83a76b0SSuyog Pawar /*************************************************************************/
8945*c83a76b0SSuyog Pawar U08 *pu1_inp;
8946*c83a76b0SSuyog Pawar S32 i4_inp_stride;
8947*c83a76b0SSuyog Pawar
8948*c83a76b0SSuyog Pawar S32 end_of_frame;
8949*c83a76b0SSuyog Pawar
8950*c83a76b0SSuyog Pawar S32 num_sync_units_in_row;
8951*c83a76b0SSuyog Pawar
8952*c83a76b0SSuyog Pawar PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8953*c83a76b0SSuyog Pawar ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8954*c83a76b0SSuyog Pawar
8955*c83a76b0SSuyog Pawar /*************************************************************************/
8956*c83a76b0SSuyog Pawar /* Pointers to current and coarse layer are needed for projection */
8957*c83a76b0SSuyog Pawar /* Pointer to prev layer are needed for other candts like coloc */
8958*c83a76b0SSuyog Pawar /*************************************************************************/
8959*c83a76b0SSuyog Pawar ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8960*c83a76b0SSuyog Pawar
8961*c83a76b0SSuyog Pawar ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8962*c83a76b0SSuyog Pawar
8963*c83a76b0SSuyog Pawar num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8964*c83a76b0SSuyog Pawar
8965*c83a76b0SSuyog Pawar /* Function pointer is selected based on the C vc X86 macro */
8966*c83a76b0SSuyog Pawar
8967*c83a76b0SSuyog Pawar fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8968*c83a76b0SSuyog Pawar ->pf_get_wt_inp_8x8;
8969*c83a76b0SSuyog Pawar
8970*c83a76b0SSuyog Pawar i4_inp_stride = ps_curr_layer->i4_inp_stride;
8971*c83a76b0SSuyog Pawar i4_pic_wd = ps_curr_layer->i4_wd;
8972*c83a76b0SSuyog Pawar i4_pic_ht = ps_curr_layer->i4_ht;
8973*c83a76b0SSuyog Pawar e_search_complexity = ps_refine_prms->e_search_complexity;
8974*c83a76b0SSuyog Pawar
8975*c83a76b0SSuyog Pawar end_of_frame = 0;
8976*c83a76b0SSuyog Pawar
8977*c83a76b0SSuyog Pawar /* If the previous layer is non-encode layer, then use dyadic projection */
8978*c83a76b0SSuyog Pawar if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8979*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8980*c83a76b0SSuyog Pawar else
8981*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt = hme_project_coloc_candt;
8982*c83a76b0SSuyog Pawar
8983*c83a76b0SSuyog Pawar /* This points to all the initial candts */
8984*c83a76b0SSuyog Pawar ps_search_candts = &as_search_candts[0];
8985*c83a76b0SSuyog Pawar
8986*c83a76b0SSuyog Pawar {
8987*c83a76b0SSuyog Pawar e_search_blk_size = BLK_8x8;
8988*c83a76b0SSuyog Pawar blk_wd = blk_ht = 8;
8989*c83a76b0SSuyog Pawar blk_size_shift = 3;
8990*c83a76b0SSuyog Pawar s_mv_update_prms.i4_shift = 0;
8991*c83a76b0SSuyog Pawar /*********************************************************************/
8992*c83a76b0SSuyog Pawar /* In case we do not encode this layer, we search 8x8 with or without*/
8993*c83a76b0SSuyog Pawar /* enable 4x4 SAD. */
8994*c83a76b0SSuyog Pawar /*********************************************************************/
8995*c83a76b0SSuyog Pawar {
8996*c83a76b0SSuyog Pawar S32 i4_mask = (ENABLE_2Nx2N);
8997*c83a76b0SSuyog Pawar
8998*c83a76b0SSuyog Pawar e_result_blk_size = BLK_8x8;
8999*c83a76b0SSuyog Pawar if(ps_refine_prms->i4_enable_4x4_part)
9000*c83a76b0SSuyog Pawar {
9001*c83a76b0SSuyog Pawar i4_mask |= (ENABLE_NxN);
9002*c83a76b0SSuyog Pawar e_result_blk_size = BLK_4x4;
9003*c83a76b0SSuyog Pawar s_mv_update_prms.i4_shift = 1;
9004*c83a76b0SSuyog Pawar }
9005*c83a76b0SSuyog Pawar
9006*c83a76b0SSuyog Pawar s_search_prms_blk.i4_part_mask = i4_mask;
9007*c83a76b0SSuyog Pawar }
9008*c83a76b0SSuyog Pawar
9009*c83a76b0SSuyog Pawar unit_size = blk_wd;
9010*c83a76b0SSuyog Pawar s_search_prms_blk.i4_inp_stride = unit_size;
9011*c83a76b0SSuyog Pawar }
9012*c83a76b0SSuyog Pawar
9013*c83a76b0SSuyog Pawar /* This is required to properly update the layer mv bank */
9014*c83a76b0SSuyog Pawar s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9015*c83a76b0SSuyog Pawar s_search_prms_blk.e_blk_size = e_search_blk_size;
9016*c83a76b0SSuyog Pawar
9017*c83a76b0SSuyog Pawar /*************************************************************************/
9018*c83a76b0SSuyog Pawar /* If current layer is explicit, then the number of ref frames are to */
9019*c83a76b0SSuyog Pawar /* be same as previous layer. Else it will be 2 */
9020*c83a76b0SSuyog Pawar /*************************************************************************/
9021*c83a76b0SSuyog Pawar i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9022*c83a76b0SSuyog Pawar if(ps_refine_prms->explicit_ref)
9023*c83a76b0SSuyog Pawar {
9024*c83a76b0SSuyog Pawar curr_layer_implicit = 0;
9025*c83a76b0SSuyog Pawar i4_num_ref_fpel = i4_num_ref_prev_layer;
9026*c83a76b0SSuyog Pawar /* 100578 : Using same mv cost fun. for all presets. */
9027*c83a76b0SSuyog Pawar s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9028*c83a76b0SSuyog Pawar }
9029*c83a76b0SSuyog Pawar else
9030*c83a76b0SSuyog Pawar {
9031*c83a76b0SSuyog Pawar i4_num_ref_fpel = 2;
9032*c83a76b0SSuyog Pawar curr_layer_implicit = 1;
9033*c83a76b0SSuyog Pawar {
9034*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED > e_me_quality_presets)
9035*c83a76b0SSuyog Pawar {
9036*c83a76b0SSuyog Pawar s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9037*c83a76b0SSuyog Pawar }
9038*c83a76b0SSuyog Pawar else
9039*c83a76b0SSuyog Pawar {
9040*c83a76b0SSuyog Pawar #if USE_MODIFIED == 1
9041*c83a76b0SSuyog Pawar s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9042*c83a76b0SSuyog Pawar #else
9043*c83a76b0SSuyog Pawar s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9044*c83a76b0SSuyog Pawar #endif
9045*c83a76b0SSuyog Pawar }
9046*c83a76b0SSuyog Pawar }
9047*c83a76b0SSuyog Pawar }
9048*c83a76b0SSuyog Pawar
9049*c83a76b0SSuyog Pawar i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9050*c83a76b0SSuyog Pawar if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9051*c83a76b0SSuyog Pawar IV_IDR_FRAME ||
9052*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9053*c83a76b0SSuyog Pawar {
9054*c83a76b0SSuyog Pawar i4_num_ref_fpel = 1;
9055*c83a76b0SSuyog Pawar }
9056*c83a76b0SSuyog Pawar if(i4_num_ref_prev_layer <= 2)
9057*c83a76b0SSuyog Pawar {
9058*c83a76b0SSuyog Pawar prev_layer_implicit = 1;
9059*c83a76b0SSuyog Pawar curr_layer_implicit = 1;
9060*c83a76b0SSuyog Pawar i4_num_ref_each_dir = 1;
9061*c83a76b0SSuyog Pawar }
9062*c83a76b0SSuyog Pawar else
9063*c83a76b0SSuyog Pawar {
9064*c83a76b0SSuyog Pawar /* It is assumed that we have equal number of references in each dir */
9065*c83a76b0SSuyog Pawar //ASSERT(!(i4_num_ref_prev_layer & 1));
9066*c83a76b0SSuyog Pawar prev_layer_implicit = 0;
9067*c83a76b0SSuyog Pawar i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9068*c83a76b0SSuyog Pawar }
9069*c83a76b0SSuyog Pawar s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9070*c83a76b0SSuyog Pawar s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9071*c83a76b0SSuyog Pawar s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9072*c83a76b0SSuyog Pawar
9073*c83a76b0SSuyog Pawar /* this can be kept to 1 or 2 */
9074*c83a76b0SSuyog Pawar i4_num_ref_before_merge = 2;
9075*c83a76b0SSuyog Pawar i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9076*c83a76b0SSuyog Pawar
9077*c83a76b0SSuyog Pawar /* Set up place holders to hold the search nodes of each initial candt */
9078*c83a76b0SSuyog Pawar for(i = 0; i < MAX_INIT_CANDTS; i++)
9079*c83a76b0SSuyog Pawar {
9080*c83a76b0SSuyog Pawar ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9081*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9082*c83a76b0SSuyog Pawar }
9083*c83a76b0SSuyog Pawar
9084*c83a76b0SSuyog Pawar /* redundant, but doing it here since it is used in pred ctxt init */
9085*c83a76b0SSuyog Pawar ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9086*c83a76b0SSuyog Pawar for(i = 0; i < 3; i++)
9087*c83a76b0SSuyog Pawar {
9088*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
9089*c83a76b0SSuyog Pawar ps_search_node = &as_left_neighbours[i];
9090*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(ps_search_node, 0);
9091*c83a76b0SSuyog Pawar ps_search_node = &as_top_neighbours[i];
9092*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(ps_search_node, 0);
9093*c83a76b0SSuyog Pawar }
9094*c83a76b0SSuyog Pawar
9095*c83a76b0SSuyog Pawar INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9096*c83a76b0SSuyog Pawar /* bottom left node always not available for the blk being searched */
9097*c83a76b0SSuyog Pawar as_left_neighbours[2].u1_is_avail = 0;
9098*c83a76b0SSuyog Pawar /*************************************************************************/
9099*c83a76b0SSuyog Pawar /* Initialize all the search results structure here. We update all the */
9100*c83a76b0SSuyog Pawar /* search results to default values, and configure things like blk sizes */
9101*c83a76b0SSuyog Pawar /*************************************************************************/
9102*c83a76b0SSuyog Pawar if(ps_refine_prms->i4_encode == 0)
9103*c83a76b0SSuyog Pawar {
9104*c83a76b0SSuyog Pawar S32 pred_lx;
9105*c83a76b0SSuyog Pawar search_results_t *ps_search_results;
9106*c83a76b0SSuyog Pawar
9107*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->s_search_results_8x8;
9108*c83a76b0SSuyog Pawar hme_init_search_results(
9109*c83a76b0SSuyog Pawar ps_search_results,
9110*c83a76b0SSuyog Pawar i4_num_ref_fpel,
9111*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_fpel_results,
9112*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_results_per_part,
9113*c83a76b0SSuyog Pawar e_search_blk_size,
9114*c83a76b0SSuyog Pawar 0,
9115*c83a76b0SSuyog Pawar 0,
9116*c83a76b0SSuyog Pawar &ps_ctxt->au1_is_past[0]);
9117*c83a76b0SSuyog Pawar for(pred_lx = 0; pred_lx < 2; pred_lx++)
9118*c83a76b0SSuyog Pawar {
9119*c83a76b0SSuyog Pawar hme_init_pred_ctxt_no_encode(
9120*c83a76b0SSuyog Pawar &ps_search_results->as_pred_ctxt[pred_lx],
9121*c83a76b0SSuyog Pawar ps_search_results,
9122*c83a76b0SSuyog Pawar &as_top_neighbours[0],
9123*c83a76b0SSuyog Pawar &as_left_neighbours[0],
9124*c83a76b0SSuyog Pawar &ps_candt_prj_coloc[0],
9125*c83a76b0SSuyog Pawar ps_candt_zeromv,
9126*c83a76b0SSuyog Pawar ps_candt_zeromv,
9127*c83a76b0SSuyog Pawar pred_lx,
9128*c83a76b0SSuyog Pawar lambda_inp,
9129*c83a76b0SSuyog Pawar ps_refine_prms->lambda_q_shift,
9130*c83a76b0SSuyog Pawar &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9131*c83a76b0SSuyog Pawar &ps_ctxt->ai2_ref_scf[0]);
9132*c83a76b0SSuyog Pawar }
9133*c83a76b0SSuyog Pawar }
9134*c83a76b0SSuyog Pawar
9135*c83a76b0SSuyog Pawar /*********************************************************************/
9136*c83a76b0SSuyog Pawar /* Initialize the dyn. search range params. for each reference index */
9137*c83a76b0SSuyog Pawar /* in current layer ctxt */
9138*c83a76b0SSuyog Pawar /*********************************************************************/
9139*c83a76b0SSuyog Pawar /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9140*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9141*c83a76b0SSuyog Pawar {
9142*c83a76b0SSuyog Pawar WORD32 ref_ctr;
9143*c83a76b0SSuyog Pawar
9144*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9145*c83a76b0SSuyog Pawar {
9146*c83a76b0SSuyog Pawar INIT_DYN_SEARCH_PRMS(
9147*c83a76b0SSuyog Pawar &ps_ctxt->s_coarse_dyn_range_prms
9148*c83a76b0SSuyog Pawar .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9149*c83a76b0SSuyog Pawar ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9150*c83a76b0SSuyog Pawar }
9151*c83a76b0SSuyog Pawar }
9152*c83a76b0SSuyog Pawar
9153*c83a76b0SSuyog Pawar /* Next set up initial candidates according to a given set of rules. */
9154*c83a76b0SSuyog Pawar /* The number of initial candidates affects the quality of ME in the */
9155*c83a76b0SSuyog Pawar /* case of motion with multiple degrees of freedom. In case of simple */
9156*c83a76b0SSuyog Pawar /* translational motion, a current and a few causal and non causal */
9157*c83a76b0SSuyog Pawar /* candts would suffice. More candidates help to cover more complex */
9158*c83a76b0SSuyog Pawar /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9159*c83a76b0SSuyog Pawar /* where multiple ref helps etc. */
9160*c83a76b0SSuyog Pawar /* The candidate choice also depends on the following parameters. */
9161*c83a76b0SSuyog Pawar /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */
9162*c83a76b0SSuyog Pawar /* Whether we encode or not, and the type of search across reference */
9163*c83a76b0SSuyog Pawar /* i.e. the previous layer may have been explicit/implicit and curr */
9164*c83a76b0SSuyog Pawar /* layer may be explicit/implicit */
9165*c83a76b0SSuyog Pawar
9166*c83a76b0SSuyog Pawar /* 0, 0, L, T, projected coloc best always presnt by default */
9167*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9168*c83a76b0SSuyog Pawar ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9169*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 0;
9170*c83a76b0SSuyog Pawar ps_candt_zeromv->s_mv.i2_mvx = 0;
9171*c83a76b0SSuyog Pawar ps_candt_zeromv->s_mv.i2_mvy = 0;
9172*c83a76b0SSuyog Pawar
9173*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9174*c83a76b0SSuyog Pawar ps_candt_l = ps_search_candts[id].ps_search_node;
9175*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 0;
9176*c83a76b0SSuyog Pawar
9177*c83a76b0SSuyog Pawar /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9178*c83a76b0SSuyog Pawar /* not at the CTB boundary use the causal T and */
9179*c83a76b0SSuyog Pawar /* not the projected T, although the candidate is */
9180*c83a76b0SSuyog Pawar /* still pointed to by ps_candt_prj_t[0] */
9181*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9182*c83a76b0SSuyog Pawar {
9183*c83a76b0SSuyog Pawar /* Using Projected top to eliminate sync */
9184*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9185*c83a76b0SSuyog Pawar PROJECTED_TOP0, e_me_quality_presets);
9186*c83a76b0SSuyog Pawar ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9187*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9188*c83a76b0SSuyog Pawar }
9189*c83a76b0SSuyog Pawar else
9190*c83a76b0SSuyog Pawar {
9191*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9192*c83a76b0SSuyog Pawar SPATIAL_TOP0, e_me_quality_presets);
9193*c83a76b0SSuyog Pawar ps_candt_t = ps_search_candts[id].ps_search_node;
9194*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 0;
9195*c83a76b0SSuyog Pawar }
9196*c83a76b0SSuyog Pawar
9197*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9198*c83a76b0SSuyog Pawar PROJECTED_COLOC0, e_me_quality_presets);
9199*c83a76b0SSuyog Pawar ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9200*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9201*c83a76b0SSuyog Pawar
9202*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9203*c83a76b0SSuyog Pawar PROJECTED_COLOC1, e_me_quality_presets);
9204*c83a76b0SSuyog Pawar ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9205*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9206*c83a76b0SSuyog Pawar
9207*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9208*c83a76b0SSuyog Pawar {
9209*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9210*c83a76b0SSuyog Pawar PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9211*c83a76b0SSuyog Pawar ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9212*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9213*c83a76b0SSuyog Pawar
9214*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9215*c83a76b0SSuyog Pawar PROJECTED_TOP_LEFT0, e_me_quality_presets);
9216*c83a76b0SSuyog Pawar ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9217*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9218*c83a76b0SSuyog Pawar }
9219*c83a76b0SSuyog Pawar else
9220*c83a76b0SSuyog Pawar {
9221*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222*c83a76b0SSuyog Pawar SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9223*c83a76b0SSuyog Pawar ps_candt_tr = ps_search_candts[id].ps_search_node;
9224*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 0;
9225*c83a76b0SSuyog Pawar
9226*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227*c83a76b0SSuyog Pawar SPATIAL_TOP_LEFT0, e_me_quality_presets);
9228*c83a76b0SSuyog Pawar ps_candt_tl = ps_search_candts[id].ps_search_node;
9229*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 0;
9230*c83a76b0SSuyog Pawar }
9231*c83a76b0SSuyog Pawar
9232*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233*c83a76b0SSuyog Pawar PROJECTED_RIGHT0, e_me_quality_presets);
9234*c83a76b0SSuyog Pawar ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9235*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9236*c83a76b0SSuyog Pawar
9237*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238*c83a76b0SSuyog Pawar PROJECTED_BOTTOM0, e_me_quality_presets);
9239*c83a76b0SSuyog Pawar ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9240*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9241*c83a76b0SSuyog Pawar
9242*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243*c83a76b0SSuyog Pawar PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9244*c83a76b0SSuyog Pawar ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9245*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9246*c83a76b0SSuyog Pawar
9247*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248*c83a76b0SSuyog Pawar PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9249*c83a76b0SSuyog Pawar ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9250*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9251*c83a76b0SSuyog Pawar
9252*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253*c83a76b0SSuyog Pawar PROJECTED_RIGHT1, e_me_quality_presets);
9254*c83a76b0SSuyog Pawar ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9255*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9256*c83a76b0SSuyog Pawar
9257*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9258*c83a76b0SSuyog Pawar PROJECTED_BOTTOM1, e_me_quality_presets);
9259*c83a76b0SSuyog Pawar ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9260*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9261*c83a76b0SSuyog Pawar
9262*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9263*c83a76b0SSuyog Pawar PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9264*c83a76b0SSuyog Pawar ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9265*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9266*c83a76b0SSuyog Pawar
9267*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9268*c83a76b0SSuyog Pawar PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9269*c83a76b0SSuyog Pawar ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9270*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9271*c83a76b0SSuyog Pawar
9272*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9273*c83a76b0SSuyog Pawar ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9274*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9275*c83a76b0SSuyog Pawar
9276*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9277*c83a76b0SSuyog Pawar PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9278*c83a76b0SSuyog Pawar ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9279*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9280*c83a76b0SSuyog Pawar
9281*c83a76b0SSuyog Pawar id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9282*c83a76b0SSuyog Pawar PROJECTED_TOP_LEFT1, e_me_quality_presets);
9283*c83a76b0SSuyog Pawar ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9284*c83a76b0SSuyog Pawar ps_search_candts[id].u1_num_steps_refine = 1;
9285*c83a76b0SSuyog Pawar
9286*c83a76b0SSuyog Pawar /*************************************************************************/
9287*c83a76b0SSuyog Pawar /* Now that the candidates have been ordered, to choose the right number */
9288*c83a76b0SSuyog Pawar /* of initial candidates. */
9289*c83a76b0SSuyog Pawar /*************************************************************************/
9290*c83a76b0SSuyog Pawar if(curr_layer_implicit && !prev_layer_implicit)
9291*c83a76b0SSuyog Pawar {
9292*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
9293*c83a76b0SSuyog Pawar num_init_candts = 7;
9294*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
9295*c83a76b0SSuyog Pawar num_init_candts = 13;
9296*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
9297*c83a76b0SSuyog Pawar num_init_candts = 18;
9298*c83a76b0SSuyog Pawar else
9299*c83a76b0SSuyog Pawar ASSERT(0);
9300*c83a76b0SSuyog Pawar }
9301*c83a76b0SSuyog Pawar else
9302*c83a76b0SSuyog Pawar {
9303*c83a76b0SSuyog Pawar if(e_search_complexity == SEARCH_CX_LOW)
9304*c83a76b0SSuyog Pawar num_init_candts = 5;
9305*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_MED)
9306*c83a76b0SSuyog Pawar num_init_candts = 11;
9307*c83a76b0SSuyog Pawar else if(e_search_complexity == SEARCH_CX_HIGH)
9308*c83a76b0SSuyog Pawar num_init_candts = 16;
9309*c83a76b0SSuyog Pawar else
9310*c83a76b0SSuyog Pawar ASSERT(0);
9311*c83a76b0SSuyog Pawar }
9312*c83a76b0SSuyog Pawar
9313*c83a76b0SSuyog Pawar if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9314*c83a76b0SSuyog Pawar {
9315*c83a76b0SSuyog Pawar num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9316*c83a76b0SSuyog Pawar }
9317*c83a76b0SSuyog Pawar
9318*c83a76b0SSuyog Pawar /*************************************************************************/
9319*c83a76b0SSuyog Pawar /* The following search parameters are fixed throughout the search across*/
9320*c83a76b0SSuyog Pawar /* all blks. So these are configured outside processing loop */
9321*c83a76b0SSuyog Pawar /*************************************************************************/
9322*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_init_candts = num_init_candts;
9323*c83a76b0SSuyog Pawar s_search_prms_blk.i4_start_step = 1;
9324*c83a76b0SSuyog Pawar s_search_prms_blk.i4_use_satd = 0;
9325*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9326*c83a76b0SSuyog Pawar /* we use recon only for encoded layers, otherwise it is not available */
9327*c83a76b0SSuyog Pawar s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9328*c83a76b0SSuyog Pawar
9329*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_candts = ps_search_candts;
9330*c83a76b0SSuyog Pawar /* We use the same mv_range for all ref. pic. So assign to member 0 */
9331*c83a76b0SSuyog Pawar if(s_search_prms_blk.i4_use_rec)
9332*c83a76b0SSuyog Pawar s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9333*c83a76b0SSuyog Pawar else
9334*c83a76b0SSuyog Pawar s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9335*c83a76b0SSuyog Pawar /*************************************************************************/
9336*c83a76b0SSuyog Pawar /* Initialize coordinates. Meaning as follows */
9337*c83a76b0SSuyog Pawar /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
9338*c83a76b0SSuyog Pawar /* blk_y : same as above, y coord. */
9339*c83a76b0SSuyog Pawar /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
9340*c83a76b0SSuyog Pawar /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
9341*c83a76b0SSuyog Pawar /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
9342*c83a76b0SSuyog Pawar /* corner of the picture. Always multiple of 64. */
9343*c83a76b0SSuyog Pawar /* blk_id_in_ctb : encode order id of the blk in the ctb. */
9344*c83a76b0SSuyog Pawar /*************************************************************************/
9345*c83a76b0SSuyog Pawar blk_y = 0;
9346*c83a76b0SSuyog Pawar blk_id_in_ctb = 0;
9347*c83a76b0SSuyog Pawar
9348*c83a76b0SSuyog Pawar GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9349*c83a76b0SSuyog Pawar
9350*c83a76b0SSuyog Pawar /* Get the number of sync units in a row based on encode/non enocde layer */
9351*c83a76b0SSuyog Pawar num_sync_units_in_row = num_blks_in_row;
9352*c83a76b0SSuyog Pawar
9353*c83a76b0SSuyog Pawar /*************************************************************************/
9354*c83a76b0SSuyog Pawar /* Picture limit on all 4 sides. This will be used to set mv limits for */
9355*c83a76b0SSuyog Pawar /* every block given its coordinate. Note thsi assumes that the min amt */
9356*c83a76b0SSuyog Pawar /* of padding to right of pic is equal to the blk size. If we go all the */
9357*c83a76b0SSuyog Pawar /* way upto 64x64, then the min padding on right size of picture should */
9358*c83a76b0SSuyog Pawar /* be 64, and also on bottom side of picture. */
9359*c83a76b0SSuyog Pawar /*************************************************************************/
9360*c83a76b0SSuyog Pawar SET_PIC_LIMIT(
9361*c83a76b0SSuyog Pawar s_pic_limit_inp,
9362*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_x_inp,
9363*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_y_inp,
9364*c83a76b0SSuyog Pawar ps_curr_layer->i4_wd,
9365*c83a76b0SSuyog Pawar ps_curr_layer->i4_ht,
9366*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_steps_post_refine);
9367*c83a76b0SSuyog Pawar
9368*c83a76b0SSuyog Pawar SET_PIC_LIMIT(
9369*c83a76b0SSuyog Pawar s_pic_limit_rec,
9370*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_x_rec,
9371*c83a76b0SSuyog Pawar ps_curr_layer->i4_pad_y_rec,
9372*c83a76b0SSuyog Pawar ps_curr_layer->i4_wd,
9373*c83a76b0SSuyog Pawar ps_curr_layer->i4_ht,
9374*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_steps_post_refine);
9375*c83a76b0SSuyog Pawar
9376*c83a76b0SSuyog Pawar /*************************************************************************/
9377*c83a76b0SSuyog Pawar /* set the MV limit per ref. pic. */
9378*c83a76b0SSuyog Pawar /* - P pic. : Based on the config params. */
9379*c83a76b0SSuyog Pawar /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9380*c83a76b0SSuyog Pawar /*************************************************************************/
9381*c83a76b0SSuyog Pawar {
9382*c83a76b0SSuyog Pawar WORD32 ref_ctr;
9383*c83a76b0SSuyog Pawar /* Only for B/b pic. */
9384*c83a76b0SSuyog Pawar if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9385*c83a76b0SSuyog Pawar {
9386*c83a76b0SSuyog Pawar WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9387*c83a76b0SSuyog Pawar WORD32 cur_poc, ref_poc, abs_poc_diff;
9388*c83a76b0SSuyog Pawar
9389*c83a76b0SSuyog Pawar cur_poc = ps_ctxt->i4_curr_poc;
9390*c83a76b0SSuyog Pawar
9391*c83a76b0SSuyog Pawar /* Get abs MAX for symmetric search */
9392*c83a76b0SSuyog Pawar i2_mv_y_per_poc = MAX(
9393*c83a76b0SSuyog Pawar ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9394*c83a76b0SSuyog Pawar (ABS(ps_ctxt->s_coarse_dyn_range_prms
9395*c83a76b0SSuyog Pawar .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9396*c83a76b0SSuyog Pawar
9397*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9398*c83a76b0SSuyog Pawar {
9399*c83a76b0SSuyog Pawar ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9400*c83a76b0SSuyog Pawar abs_poc_diff = ABS((cur_poc - ref_poc));
9401*c83a76b0SSuyog Pawar /* Get the cur. max MV based on POC distance */
9402*c83a76b0SSuyog Pawar i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9403*c83a76b0SSuyog Pawar i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9404*c83a76b0SSuyog Pawar
9405*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9406*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9407*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9408*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9409*c83a76b0SSuyog Pawar }
9410*c83a76b0SSuyog Pawar }
9411*c83a76b0SSuyog Pawar else
9412*c83a76b0SSuyog Pawar {
9413*c83a76b0SSuyog Pawar /* Set the Config. File Params for P pic. */
9414*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9415*c83a76b0SSuyog Pawar {
9416*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9417*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9418*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9419*c83a76b0SSuyog Pawar as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9420*c83a76b0SSuyog Pawar }
9421*c83a76b0SSuyog Pawar }
9422*c83a76b0SSuyog Pawar }
9423*c83a76b0SSuyog Pawar
9424*c83a76b0SSuyog Pawar /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9425*c83a76b0SSuyog Pawar if(e_me_quality_presets == ME_MEDIUM_SPEED)
9426*c83a76b0SSuyog Pawar {
9427*c83a76b0SSuyog Pawar i4_threshold_multiplier = 1;
9428*c83a76b0SSuyog Pawar i4_threshold_divider = 4;
9429*c83a76b0SSuyog Pawar }
9430*c83a76b0SSuyog Pawar else if(e_me_quality_presets == ME_HIGH_SPEED)
9431*c83a76b0SSuyog Pawar {
9432*c83a76b0SSuyog Pawar i4_threshold_multiplier = 1;
9433*c83a76b0SSuyog Pawar i4_threshold_divider = 2;
9434*c83a76b0SSuyog Pawar }
9435*c83a76b0SSuyog Pawar else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9436*c83a76b0SSuyog Pawar {
9437*c83a76b0SSuyog Pawar #if OLD_XTREME_SPEED
9438*c83a76b0SSuyog Pawar /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9439*c83a76b0SSuyog Pawar i4_temporal_layer = 1;
9440*c83a76b0SSuyog Pawar #endif
9441*c83a76b0SSuyog Pawar if(i4_temporal_layer == 0)
9442*c83a76b0SSuyog Pawar {
9443*c83a76b0SSuyog Pawar i4_threshold_multiplier = 3;
9444*c83a76b0SSuyog Pawar i4_threshold_divider = 4;
9445*c83a76b0SSuyog Pawar }
9446*c83a76b0SSuyog Pawar else if(i4_temporal_layer == 1)
9447*c83a76b0SSuyog Pawar {
9448*c83a76b0SSuyog Pawar i4_threshold_multiplier = 3;
9449*c83a76b0SSuyog Pawar i4_threshold_divider = 4;
9450*c83a76b0SSuyog Pawar }
9451*c83a76b0SSuyog Pawar else if(i4_temporal_layer == 2)
9452*c83a76b0SSuyog Pawar {
9453*c83a76b0SSuyog Pawar i4_threshold_multiplier = 1;
9454*c83a76b0SSuyog Pawar i4_threshold_divider = 1;
9455*c83a76b0SSuyog Pawar }
9456*c83a76b0SSuyog Pawar else
9457*c83a76b0SSuyog Pawar {
9458*c83a76b0SSuyog Pawar i4_threshold_multiplier = 5;
9459*c83a76b0SSuyog Pawar i4_threshold_divider = 4;
9460*c83a76b0SSuyog Pawar }
9461*c83a76b0SSuyog Pawar }
9462*c83a76b0SSuyog Pawar else if(e_me_quality_presets == ME_HIGH_QUALITY)
9463*c83a76b0SSuyog Pawar {
9464*c83a76b0SSuyog Pawar i4_threshold_multiplier = 1;
9465*c83a76b0SSuyog Pawar i4_threshold_divider = 1;
9466*c83a76b0SSuyog Pawar }
9467*c83a76b0SSuyog Pawar
9468*c83a76b0SSuyog Pawar /*************************************************************************/
9469*c83a76b0SSuyog Pawar /*************************************************************************/
9470*c83a76b0SSuyog Pawar /*************************************************************************/
9471*c83a76b0SSuyog Pawar /* START OF THE CORE LOOP */
9472*c83a76b0SSuyog Pawar /* If Encode is 0, then we just loop over each blk */
9473*c83a76b0SSuyog Pawar /*************************************************************************/
9474*c83a76b0SSuyog Pawar /*************************************************************************/
9475*c83a76b0SSuyog Pawar /*************************************************************************/
9476*c83a76b0SSuyog Pawar while(0 == end_of_frame)
9477*c83a76b0SSuyog Pawar {
9478*c83a76b0SSuyog Pawar job_queue_t *ps_job;
9479*c83a76b0SSuyog Pawar ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID
9480*c83a76b0SSuyog Pawar WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4)
9481*c83a76b0SSuyog Pawar WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only
9482*c83a76b0SSuyog Pawar //+3 to get ceil values when divided by 4
9483*c83a76b0SSuyog Pawar WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9484*c83a76b0SSuyog Pawar 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now
9485*c83a76b0SSuyog Pawar //if there is variable for ctb size use that and this variable can be derived
9486*c83a76b0SSuyog Pawar WORD32 offset_val, check_dep_pos, set_dep_pos;
9487*c83a76b0SSuyog Pawar void *pv_hme_dep_mngr;
9488*c83a76b0SSuyog Pawar ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9489*c83a76b0SSuyog Pawar
9490*c83a76b0SSuyog Pawar /* Get the current layer HME Dep Mngr */
9491*c83a76b0SSuyog Pawar /* Note : Use layer_id - 1 in HME layers */
9492*c83a76b0SSuyog Pawar
9493*c83a76b0SSuyog Pawar pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9494*c83a76b0SSuyog Pawar
9495*c83a76b0SSuyog Pawar /* Get the current row from the job queue */
9496*c83a76b0SSuyog Pawar ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9497*c83a76b0SSuyog Pawar ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9498*c83a76b0SSuyog Pawar
9499*c83a76b0SSuyog Pawar /* If all rows are done, set the end of process flag to 1, */
9500*c83a76b0SSuyog Pawar /* and the current row to -1 */
9501*c83a76b0SSuyog Pawar if(NULL == ps_job)
9502*c83a76b0SSuyog Pawar {
9503*c83a76b0SSuyog Pawar blk_y = -1;
9504*c83a76b0SSuyog Pawar end_of_frame = 1;
9505*c83a76b0SSuyog Pawar
9506*c83a76b0SSuyog Pawar continue;
9507*c83a76b0SSuyog Pawar }
9508*c83a76b0SSuyog Pawar
9509*c83a76b0SSuyog Pawar if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9510*c83a76b0SSuyog Pawar {
9511*c83a76b0SSuyog Pawar /* set the output dependency of current row */
9512*c83a76b0SSuyog Pawar ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9513*c83a76b0SSuyog Pawar continue;
9514*c83a76b0SSuyog Pawar }
9515*c83a76b0SSuyog Pawar
9516*c83a76b0SSuyog Pawar blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9517*c83a76b0SSuyog Pawar blk_x = 0;
9518*c83a76b0SSuyog Pawar i4_ctb_x = 0;
9519*c83a76b0SSuyog Pawar
9520*c83a76b0SSuyog Pawar /* wait for Corresponding Pre intra Job to be completed */
9521*c83a76b0SSuyog Pawar if(1 == ps_refine_prms->i4_layer_id)
9522*c83a76b0SSuyog Pawar {
9523*c83a76b0SSuyog Pawar volatile UWORD32 i4_l1_done;
9524*c83a76b0SSuyog Pawar volatile UWORD32 *pi4_l1_done;
9525*c83a76b0SSuyog Pawar pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9526*c83a76b0SSuyog Pawar ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9527*c83a76b0SSuyog Pawar i4_l1_done = *pi4_l1_done;
9528*c83a76b0SSuyog Pawar while(!i4_l1_done)
9529*c83a76b0SSuyog Pawar {
9530*c83a76b0SSuyog Pawar i4_l1_done = *pi4_l1_done;
9531*c83a76b0SSuyog Pawar }
9532*c83a76b0SSuyog Pawar }
9533*c83a76b0SSuyog Pawar /* Set Variables for Dep. Checking and Setting */
9534*c83a76b0SSuyog Pawar set_dep_pos = blk_y + 1;
9535*c83a76b0SSuyog Pawar if(blk_y > 0)
9536*c83a76b0SSuyog Pawar {
9537*c83a76b0SSuyog Pawar offset_val = 2;
9538*c83a76b0SSuyog Pawar check_dep_pos = blk_y - 1;
9539*c83a76b0SSuyog Pawar }
9540*c83a76b0SSuyog Pawar else
9541*c83a76b0SSuyog Pawar {
9542*c83a76b0SSuyog Pawar /* First row should run without waiting */
9543*c83a76b0SSuyog Pawar offset_val = -1;
9544*c83a76b0SSuyog Pawar check_dep_pos = 0;
9545*c83a76b0SSuyog Pawar }
9546*c83a76b0SSuyog Pawar
9547*c83a76b0SSuyog Pawar /* EIID: calculate ed_blk_ctxt pointer for current row */
9548*c83a76b0SSuyog Pawar /* valid for only layer-1. not varified and used for other layers */
9549*c83a76b0SSuyog Pawar i4_ctb_row_ctr = blk_y / 4;
9550*c83a76b0SSuyog Pawar ps_ed_blk_ctxt_curr_row =
9551*c83a76b0SSuyog Pawar ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9552*c83a76b0SSuyog Pawar i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only
9553*c83a76b0SSuyog Pawar ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9554*c83a76b0SSuyog Pawar
9555*c83a76b0SSuyog Pawar /* if non-encode layer then i4_ctb_x will be same as blk_x */
9556*c83a76b0SSuyog Pawar /* loop over all the units is a row */
9557*c83a76b0SSuyog Pawar for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9558*c83a76b0SSuyog Pawar {
9559*c83a76b0SSuyog Pawar ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD
9560*c83a76b0SSuyog Pawar ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9561*c83a76b0SSuyog Pawar WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9562*c83a76b0SSuyog Pawar
9563*c83a76b0SSuyog Pawar /* Wait till top row block is processed */
9564*c83a76b0SSuyog Pawar /* Currently checking till top right block*/
9565*c83a76b0SSuyog Pawar
9566*c83a76b0SSuyog Pawar /* Disabled since all candidates, except for */
9567*c83a76b0SSuyog Pawar /* L and C, are projected from the coarser layer, */
9568*c83a76b0SSuyog Pawar /* only in ME_HIGH_SPEED mode */
9569*c83a76b0SSuyog Pawar if((ME_MEDIUM_SPEED > e_me_quality_presets))
9570*c83a76b0SSuyog Pawar {
9571*c83a76b0SSuyog Pawar if(i4_ctb_x < (num_sync_units_in_row - 1))
9572*c83a76b0SSuyog Pawar {
9573*c83a76b0SSuyog Pawar ihevce_dmgr_chk_row_row_sync(
9574*c83a76b0SSuyog Pawar pv_hme_dep_mngr,
9575*c83a76b0SSuyog Pawar i4_ctb_x,
9576*c83a76b0SSuyog Pawar offset_val,
9577*c83a76b0SSuyog Pawar check_dep_pos,
9578*c83a76b0SSuyog Pawar 0, /* Col Tile No. : Not supported in PreEnc*/
9579*c83a76b0SSuyog Pawar ps_ctxt->thrd_id);
9580*c83a76b0SSuyog Pawar }
9581*c83a76b0SSuyog Pawar }
9582*c83a76b0SSuyog Pawar
9583*c83a76b0SSuyog Pawar {
9584*c83a76b0SSuyog Pawar /* for non encoder layer only one block is processed */
9585*c83a76b0SSuyog Pawar num_blks_in_this_ctb = 1;
9586*c83a76b0SSuyog Pawar }
9587*c83a76b0SSuyog Pawar
9588*c83a76b0SSuyog Pawar /* EIID: derive ed_ctxt ptr for current CTB */
9589*c83a76b0SSuyog Pawar ps_ed_blk_ctxt_curr_ctb =
9590*c83a76b0SSuyog Pawar ps_ed_blk_ctxt_curr_row +
9591*c83a76b0SSuyog Pawar (i4_ctb_blk_ctr *
9592*c83a76b0SSuyog Pawar i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only
9593*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9594*c83a76b0SSuyog Pawar
9595*c83a76b0SSuyog Pawar /* loop over all the blocks in CTB will always be 1 */
9596*c83a76b0SSuyog Pawar for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9597*c83a76b0SSuyog Pawar {
9598*c83a76b0SSuyog Pawar {
9599*c83a76b0SSuyog Pawar /* non encode layer */
9600*c83a76b0SSuyog Pawar blk_x = i4_ctb_x;
9601*c83a76b0SSuyog Pawar blk_id_in_full_ctb = 0;
9602*c83a76b0SSuyog Pawar s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9603*c83a76b0SSuyog Pawar }
9604*c83a76b0SSuyog Pawar
9605*c83a76b0SSuyog Pawar /* get the current input blk point */
9606*c83a76b0SSuyog Pawar pos_x = blk_x << blk_size_shift;
9607*c83a76b0SSuyog Pawar pos_y = blk_y << blk_size_shift;
9608*c83a76b0SSuyog Pawar pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9609*c83a76b0SSuyog Pawar
9610*c83a76b0SSuyog Pawar /*********************************************************************/
9611*c83a76b0SSuyog Pawar /* replicate the inp buffer at blk or ctb level for each ref id, */
9612*c83a76b0SSuyog Pawar /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9613*c83a76b0SSuyog Pawar /* thereby avoiding a bloat up of memory. If we did all references */
9614*c83a76b0SSuyog Pawar /* weighted pred, we will end up with a duplicate copy of each ref */
9615*c83a76b0SSuyog Pawar /* at each layer, since we need to preserve the original reference. */
9616*c83a76b0SSuyog Pawar /* ToDo: Need to observe performance with this mechanism and compare */
9617*c83a76b0SSuyog Pawar /* with case where ref is weighted. */
9618*c83a76b0SSuyog Pawar /*********************************************************************/
9619*c83a76b0SSuyog Pawar if(blk_id_in_ctb == 0)
9620*c83a76b0SSuyog Pawar {
9621*c83a76b0SSuyog Pawar fp_get_wt_inp(
9622*c83a76b0SSuyog Pawar ps_curr_layer,
9623*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
9624*c83a76b0SSuyog Pawar unit_size,
9625*c83a76b0SSuyog Pawar pos_x,
9626*c83a76b0SSuyog Pawar pos_y,
9627*c83a76b0SSuyog Pawar unit_size,
9628*c83a76b0SSuyog Pawar ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9629*c83a76b0SSuyog Pawar ps_ctxt->i4_wt_pred_enable_flag);
9630*c83a76b0SSuyog Pawar }
9631*c83a76b0SSuyog Pawar
9632*c83a76b0SSuyog Pawar s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9633*c83a76b0SSuyog Pawar s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9634*c83a76b0SSuyog Pawar /* Select search results from a suitable search result in the context */
9635*c83a76b0SSuyog Pawar {
9636*c83a76b0SSuyog Pawar ps_search_results = &ps_ctxt->s_search_results_8x8;
9637*c83a76b0SSuyog Pawar }
9638*c83a76b0SSuyog Pawar
9639*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_results = ps_search_results;
9640*c83a76b0SSuyog Pawar
9641*c83a76b0SSuyog Pawar /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9642*c83a76b0SSuyog Pawar hme_reset_search_results(
9643*c83a76b0SSuyog Pawar ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9644*c83a76b0SSuyog Pawar
9645*c83a76b0SSuyog Pawar /* Loop across different Ref IDx */
9646*c83a76b0SSuyog Pawar for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9647*c83a76b0SSuyog Pawar {
9648*c83a76b0SSuyog Pawar S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9649*c83a76b0SSuyog Pawar S32 prev_blk_offset = 6;
9650*c83a76b0SSuyog Pawar S32 resultid;
9651*c83a76b0SSuyog Pawar
9652*c83a76b0SSuyog Pawar /*********************************************************************/
9653*c83a76b0SSuyog Pawar /* For every blk in the picture, the search range needs to be derived*/
9654*c83a76b0SSuyog Pawar /* Any blk can have any mv, but practical search constraints are */
9655*c83a76b0SSuyog Pawar /* imposed by the picture boundary and amt of padding. */
9656*c83a76b0SSuyog Pawar /*********************************************************************/
9657*c83a76b0SSuyog Pawar /* MV limit is different based on ref. PIC */
9658*c83a76b0SSuyog Pawar hme_derive_search_range(
9659*c83a76b0SSuyog Pawar &s_range_prms_inp,
9660*c83a76b0SSuyog Pawar &s_pic_limit_inp,
9661*c83a76b0SSuyog Pawar &as_mv_limit[i1_ref_idx],
9662*c83a76b0SSuyog Pawar pos_x,
9663*c83a76b0SSuyog Pawar pos_y,
9664*c83a76b0SSuyog Pawar blk_wd,
9665*c83a76b0SSuyog Pawar blk_ht);
9666*c83a76b0SSuyog Pawar hme_derive_search_range(
9667*c83a76b0SSuyog Pawar &s_range_prms_rec,
9668*c83a76b0SSuyog Pawar &s_pic_limit_rec,
9669*c83a76b0SSuyog Pawar &as_mv_limit[i1_ref_idx],
9670*c83a76b0SSuyog Pawar pos_x,
9671*c83a76b0SSuyog Pawar pos_y,
9672*c83a76b0SSuyog Pawar blk_wd,
9673*c83a76b0SSuyog Pawar blk_ht);
9674*c83a76b0SSuyog Pawar
9675*c83a76b0SSuyog Pawar s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9676*c83a76b0SSuyog Pawar ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9677*c83a76b0SSuyog Pawar
9678*c83a76b0SSuyog Pawar i4_num_srch_cands = 1;
9679*c83a76b0SSuyog Pawar
9680*c83a76b0SSuyog Pawar if(1 != ps_refine_prms->i4_layer_id)
9681*c83a76b0SSuyog Pawar {
9682*c83a76b0SSuyog Pawar S32 x, y;
9683*c83a76b0SSuyog Pawar x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9684*c83a76b0SSuyog Pawar y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9685*c83a76b0SSuyog Pawar
9686*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED > e_me_quality_presets)
9687*c83a76b0SSuyog Pawar {
9688*c83a76b0SSuyog Pawar hme_get_spatial_candt(
9689*c83a76b0SSuyog Pawar ps_curr_layer,
9690*c83a76b0SSuyog Pawar e_search_blk_size,
9691*c83a76b0SSuyog Pawar blk_x,
9692*c83a76b0SSuyog Pawar blk_y,
9693*c83a76b0SSuyog Pawar i1_ref_idx,
9694*c83a76b0SSuyog Pawar &as_top_neighbours[0],
9695*c83a76b0SSuyog Pawar &as_left_neighbours[0],
9696*c83a76b0SSuyog Pawar 0,
9697*c83a76b0SSuyog Pawar ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9698*c83a76b0SSuyog Pawar 0,
9699*c83a76b0SSuyog Pawar ps_refine_prms->i4_encode);
9700*c83a76b0SSuyog Pawar
9701*c83a76b0SSuyog Pawar *ps_candt_tr = as_top_neighbours[3];
9702*c83a76b0SSuyog Pawar *ps_candt_t = as_top_neighbours[1];
9703*c83a76b0SSuyog Pawar *ps_candt_tl = as_top_neighbours[0];
9704*c83a76b0SSuyog Pawar i4_num_srch_cands += 3;
9705*c83a76b0SSuyog Pawar }
9706*c83a76b0SSuyog Pawar else
9707*c83a76b0SSuyog Pawar {
9708*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9709*c83a76b0SSuyog Pawar S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9710*c83a76b0SSuyog Pawar S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9711*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
9712*c83a76b0SSuyog Pawar S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9713*c83a76b0SSuyog Pawar hme_mv_t *ps_mv, *ps_mv_base;
9714*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx_base;
9715*c83a76b0SSuyog Pawar S32 jump = 1, mvs_in_blk, mvs_in_row;
9716*c83a76b0SSuyog Pawar S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9717*c83a76b0SSuyog Pawar
9718*c83a76b0SSuyog Pawar if(i4_blk_size1 != i4_blk_size2)
9719*c83a76b0SSuyog Pawar {
9720*c83a76b0SSuyog Pawar blk_x_temp <<= 1;
9721*c83a76b0SSuyog Pawar blk_y_temp <<= 1;
9722*c83a76b0SSuyog Pawar jump = 2;
9723*c83a76b0SSuyog Pawar if((i4_blk_size1 << 2) == i4_blk_size2)
9724*c83a76b0SSuyog Pawar {
9725*c83a76b0SSuyog Pawar blk_x_temp <<= 1;
9726*c83a76b0SSuyog Pawar blk_y_temp <<= 1;
9727*c83a76b0SSuyog Pawar jump = 4;
9728*c83a76b0SSuyog Pawar }
9729*c83a76b0SSuyog Pawar }
9730*c83a76b0SSuyog Pawar
9731*c83a76b0SSuyog Pawar mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9732*c83a76b0SSuyog Pawar mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9733*c83a76b0SSuyog Pawar
9734*c83a76b0SSuyog Pawar /* Adjust teh blk coord to point to top left locn */
9735*c83a76b0SSuyog Pawar blk_x_temp -= 1;
9736*c83a76b0SSuyog Pawar blk_y_temp -= 1;
9737*c83a76b0SSuyog Pawar
9738*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
9739*c83a76b0SSuyog Pawar i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9740*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9741*c83a76b0SSuyog Pawar
9742*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9743*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9744*c83a76b0SSuyog Pawar
9745*c83a76b0SSuyog Pawar ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9746*c83a76b0SSuyog Pawar pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9747*c83a76b0SSuyog Pawar
9748*c83a76b0SSuyog Pawar ps_mv_base = ps_mv;
9749*c83a76b0SSuyog Pawar pi1_ref_idx_base = pi1_ref_idx;
9750*c83a76b0SSuyog Pawar
9751*c83a76b0SSuyog Pawar ps_search_node = &as_left_neighbours[0];
9752*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + mvs_in_row;
9753*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9754*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
9755*c83a76b0SSuyog Pawar ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9756*c83a76b0SSuyog Pawar
9757*c83a76b0SSuyog Pawar i4_num_srch_cands++;
9758*c83a76b0SSuyog Pawar }
9759*c83a76b0SSuyog Pawar }
9760*c83a76b0SSuyog Pawar else
9761*c83a76b0SSuyog Pawar {
9762*c83a76b0SSuyog Pawar S32 x, y;
9763*c83a76b0SSuyog Pawar x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9764*c83a76b0SSuyog Pawar y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9765*c83a76b0SSuyog Pawar
9766*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED > e_me_quality_presets)
9767*c83a76b0SSuyog Pawar {
9768*c83a76b0SSuyog Pawar hme_get_spatial_candt_in_l1_me(
9769*c83a76b0SSuyog Pawar ps_curr_layer,
9770*c83a76b0SSuyog Pawar e_search_blk_size,
9771*c83a76b0SSuyog Pawar blk_x,
9772*c83a76b0SSuyog Pawar blk_y,
9773*c83a76b0SSuyog Pawar i1_ref_idx,
9774*c83a76b0SSuyog Pawar !ps_search_results->pu1_is_past[i1_ref_idx],
9775*c83a76b0SSuyog Pawar &as_top_neighbours[0],
9776*c83a76b0SSuyog Pawar &as_left_neighbours[0],
9777*c83a76b0SSuyog Pawar 0,
9778*c83a76b0SSuyog Pawar ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9779*c83a76b0SSuyog Pawar 0,
9780*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9781*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9782*c83a76b0SSuyog Pawar
9783*c83a76b0SSuyog Pawar *ps_candt_tr = as_top_neighbours[3];
9784*c83a76b0SSuyog Pawar *ps_candt_t = as_top_neighbours[1];
9785*c83a76b0SSuyog Pawar *ps_candt_tl = as_top_neighbours[0];
9786*c83a76b0SSuyog Pawar
9787*c83a76b0SSuyog Pawar i4_num_srch_cands += 3;
9788*c83a76b0SSuyog Pawar }
9789*c83a76b0SSuyog Pawar else
9790*c83a76b0SSuyog Pawar {
9791*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9792*c83a76b0SSuyog Pawar S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9793*c83a76b0SSuyog Pawar S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9794*c83a76b0SSuyog Pawar S32 i4_mv_pos_in_implicit_array;
9795*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
9796*c83a76b0SSuyog Pawar S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9797*c83a76b0SSuyog Pawar hme_mv_t *ps_mv, *ps_mv_base;
9798*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx_base;
9799*c83a76b0SSuyog Pawar S32 jump = 1, mvs_in_blk, mvs_in_row;
9800*c83a76b0SSuyog Pawar S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9801*c83a76b0SSuyog Pawar U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9802*c83a76b0SSuyog Pawar S32 i4_num_results_in_given_dir =
9803*c83a76b0SSuyog Pawar ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9804*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9805*c83a76b0SSuyog Pawar : (ps_layer_mvbank->i4_num_mvs_per_ref *
9806*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9807*c83a76b0SSuyog Pawar
9808*c83a76b0SSuyog Pawar if(i4_blk_size1 != i4_blk_size2)
9809*c83a76b0SSuyog Pawar {
9810*c83a76b0SSuyog Pawar blk_x_temp <<= 1;
9811*c83a76b0SSuyog Pawar blk_y_temp <<= 1;
9812*c83a76b0SSuyog Pawar jump = 2;
9813*c83a76b0SSuyog Pawar if((i4_blk_size1 << 2) == i4_blk_size2)
9814*c83a76b0SSuyog Pawar {
9815*c83a76b0SSuyog Pawar blk_x_temp <<= 1;
9816*c83a76b0SSuyog Pawar blk_y_temp <<= 1;
9817*c83a76b0SSuyog Pawar jump = 4;
9818*c83a76b0SSuyog Pawar }
9819*c83a76b0SSuyog Pawar }
9820*c83a76b0SSuyog Pawar
9821*c83a76b0SSuyog Pawar mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9822*c83a76b0SSuyog Pawar mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9823*c83a76b0SSuyog Pawar
9824*c83a76b0SSuyog Pawar /* Adjust teh blk coord to point to top left locn */
9825*c83a76b0SSuyog Pawar blk_x_temp -= 1;
9826*c83a76b0SSuyog Pawar blk_y_temp -= 1;
9827*c83a76b0SSuyog Pawar
9828*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
9829*c83a76b0SSuyog Pawar i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9830*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9831*c83a76b0SSuyog Pawar
9832*c83a76b0SSuyog Pawar i4_offset +=
9833*c83a76b0SSuyog Pawar ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9834*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9835*c83a76b0SSuyog Pawar : 0);
9836*c83a76b0SSuyog Pawar
9837*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9838*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9839*c83a76b0SSuyog Pawar
9840*c83a76b0SSuyog Pawar ps_mv_base = ps_mv;
9841*c83a76b0SSuyog Pawar pi1_ref_idx_base = pi1_ref_idx;
9842*c83a76b0SSuyog Pawar
9843*c83a76b0SSuyog Pawar {
9844*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
9845*c83a76b0SSuyog Pawar ps_search_node = &as_left_neighbours[0];
9846*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + mvs_in_row;
9847*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9848*c83a76b0SSuyog Pawar
9849*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array =
9850*c83a76b0SSuyog Pawar hme_find_pos_of_implicitly_stored_ref_id(
9851*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9852*c83a76b0SSuyog Pawar
9853*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
9854*c83a76b0SSuyog Pawar {
9855*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
9856*c83a76b0SSuyog Pawar ps_search_node,
9857*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
9858*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9859*c83a76b0SSuyog Pawar i1_ref_idx,
9860*c83a76b0SSuyog Pawar shift);
9861*c83a76b0SSuyog Pawar }
9862*c83a76b0SSuyog Pawar else
9863*c83a76b0SSuyog Pawar {
9864*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
9865*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
9866*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
9867*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
9868*c83a76b0SSuyog Pawar }
9869*c83a76b0SSuyog Pawar
9870*c83a76b0SSuyog Pawar i4_num_srch_cands++;
9871*c83a76b0SSuyog Pawar }
9872*c83a76b0SSuyog Pawar }
9873*c83a76b0SSuyog Pawar }
9874*c83a76b0SSuyog Pawar
9875*c83a76b0SSuyog Pawar *ps_candt_l = as_left_neighbours[0];
9876*c83a76b0SSuyog Pawar
9877*c83a76b0SSuyog Pawar /* when 16x16 is searched in an encode layer, and the prev layer */
9878*c83a76b0SSuyog Pawar /* stores results for 4x4 blks, we project 5 candts corresponding */
9879*c83a76b0SSuyog Pawar /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9880*c83a76b0SSuyog Pawar /* However in other cases, only 2,2 best and 2nd best reqd */
9881*c83a76b0SSuyog Pawar resultid = 0;
9882*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9883*c83a76b0SSuyog Pawar ps_candt_prj_coloc[0],
9884*c83a76b0SSuyog Pawar ps_curr_layer,
9885*c83a76b0SSuyog Pawar ps_coarse_layer,
9886*c83a76b0SSuyog Pawar pos_x + 2,
9887*c83a76b0SSuyog Pawar pos_y + 2,
9888*c83a76b0SSuyog Pawar i1_ref_idx,
9889*c83a76b0SSuyog Pawar resultid);
9890*c83a76b0SSuyog Pawar
9891*c83a76b0SSuyog Pawar i4_num_srch_cands++;
9892*c83a76b0SSuyog Pawar
9893*c83a76b0SSuyog Pawar resultid = 1;
9894*c83a76b0SSuyog Pawar if(num_results_prev_layer > 1)
9895*c83a76b0SSuyog Pawar {
9896*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9897*c83a76b0SSuyog Pawar ps_candt_prj_coloc[1],
9898*c83a76b0SSuyog Pawar ps_curr_layer,
9899*c83a76b0SSuyog Pawar ps_coarse_layer,
9900*c83a76b0SSuyog Pawar pos_x + 2,
9901*c83a76b0SSuyog Pawar pos_y + 2,
9902*c83a76b0SSuyog Pawar i1_ref_idx,
9903*c83a76b0SSuyog Pawar resultid);
9904*c83a76b0SSuyog Pawar
9905*c83a76b0SSuyog Pawar i4_num_srch_cands++;
9906*c83a76b0SSuyog Pawar }
9907*c83a76b0SSuyog Pawar
9908*c83a76b0SSuyog Pawar resultid = 0;
9909*c83a76b0SSuyog Pawar
9910*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9911*c83a76b0SSuyog Pawar {
9912*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9913*c83a76b0SSuyog Pawar ps_candt_prj_t[0],
9914*c83a76b0SSuyog Pawar ps_curr_layer,
9915*c83a76b0SSuyog Pawar ps_coarse_layer,
9916*c83a76b0SSuyog Pawar pos_x,
9917*c83a76b0SSuyog Pawar pos_y - prev_blk_offset,
9918*c83a76b0SSuyog Pawar i1_ref_idx,
9919*c83a76b0SSuyog Pawar resultid);
9920*c83a76b0SSuyog Pawar
9921*c83a76b0SSuyog Pawar i4_num_srch_cands++;
9922*c83a76b0SSuyog Pawar }
9923*c83a76b0SSuyog Pawar
9924*c83a76b0SSuyog Pawar {
9925*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9926*c83a76b0SSuyog Pawar ps_candt_prj_br[0],
9927*c83a76b0SSuyog Pawar ps_curr_layer,
9928*c83a76b0SSuyog Pawar ps_coarse_layer,
9929*c83a76b0SSuyog Pawar pos_x + next_blk_offset,
9930*c83a76b0SSuyog Pawar pos_y + next_blk_offset,
9931*c83a76b0SSuyog Pawar i1_ref_idx,
9932*c83a76b0SSuyog Pawar resultid);
9933*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9934*c83a76b0SSuyog Pawar ps_candt_prj_bl[0],
9935*c83a76b0SSuyog Pawar ps_curr_layer,
9936*c83a76b0SSuyog Pawar ps_coarse_layer,
9937*c83a76b0SSuyog Pawar pos_x - prev_blk_offset,
9938*c83a76b0SSuyog Pawar pos_y + next_blk_offset,
9939*c83a76b0SSuyog Pawar i1_ref_idx,
9940*c83a76b0SSuyog Pawar resultid);
9941*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9942*c83a76b0SSuyog Pawar ps_candt_prj_r[0],
9943*c83a76b0SSuyog Pawar ps_curr_layer,
9944*c83a76b0SSuyog Pawar ps_coarse_layer,
9945*c83a76b0SSuyog Pawar pos_x + next_blk_offset,
9946*c83a76b0SSuyog Pawar pos_y,
9947*c83a76b0SSuyog Pawar i1_ref_idx,
9948*c83a76b0SSuyog Pawar resultid);
9949*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9950*c83a76b0SSuyog Pawar ps_candt_prj_b[0],
9951*c83a76b0SSuyog Pawar ps_curr_layer,
9952*c83a76b0SSuyog Pawar ps_coarse_layer,
9953*c83a76b0SSuyog Pawar pos_x,
9954*c83a76b0SSuyog Pawar pos_y + next_blk_offset,
9955*c83a76b0SSuyog Pawar i1_ref_idx,
9956*c83a76b0SSuyog Pawar resultid);
9957*c83a76b0SSuyog Pawar
9958*c83a76b0SSuyog Pawar i4_num_srch_cands += 4;
9959*c83a76b0SSuyog Pawar
9960*c83a76b0SSuyog Pawar if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9961*c83a76b0SSuyog Pawar {
9962*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9963*c83a76b0SSuyog Pawar ps_candt_prj_tr[0],
9964*c83a76b0SSuyog Pawar ps_curr_layer,
9965*c83a76b0SSuyog Pawar ps_coarse_layer,
9966*c83a76b0SSuyog Pawar pos_x + next_blk_offset,
9967*c83a76b0SSuyog Pawar pos_y - prev_blk_offset,
9968*c83a76b0SSuyog Pawar i1_ref_idx,
9969*c83a76b0SSuyog Pawar resultid);
9970*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9971*c83a76b0SSuyog Pawar ps_candt_prj_tl[0],
9972*c83a76b0SSuyog Pawar ps_curr_layer,
9973*c83a76b0SSuyog Pawar ps_coarse_layer,
9974*c83a76b0SSuyog Pawar pos_x - prev_blk_offset,
9975*c83a76b0SSuyog Pawar pos_y - prev_blk_offset,
9976*c83a76b0SSuyog Pawar i1_ref_idx,
9977*c83a76b0SSuyog Pawar resultid);
9978*c83a76b0SSuyog Pawar
9979*c83a76b0SSuyog Pawar i4_num_srch_cands += 2;
9980*c83a76b0SSuyog Pawar }
9981*c83a76b0SSuyog Pawar }
9982*c83a76b0SSuyog Pawar if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9983*c83a76b0SSuyog Pawar {
9984*c83a76b0SSuyog Pawar resultid = 1;
9985*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9986*c83a76b0SSuyog Pawar ps_candt_prj_br[1],
9987*c83a76b0SSuyog Pawar ps_curr_layer,
9988*c83a76b0SSuyog Pawar ps_coarse_layer,
9989*c83a76b0SSuyog Pawar pos_x + next_blk_offset,
9990*c83a76b0SSuyog Pawar pos_y + next_blk_offset,
9991*c83a76b0SSuyog Pawar i1_ref_idx,
9992*c83a76b0SSuyog Pawar resultid);
9993*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
9994*c83a76b0SSuyog Pawar ps_candt_prj_bl[1],
9995*c83a76b0SSuyog Pawar ps_curr_layer,
9996*c83a76b0SSuyog Pawar ps_coarse_layer,
9997*c83a76b0SSuyog Pawar pos_x - prev_blk_offset,
9998*c83a76b0SSuyog Pawar pos_y + next_blk_offset,
9999*c83a76b0SSuyog Pawar i1_ref_idx,
10000*c83a76b0SSuyog Pawar resultid);
10001*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
10002*c83a76b0SSuyog Pawar ps_candt_prj_r[1],
10003*c83a76b0SSuyog Pawar ps_curr_layer,
10004*c83a76b0SSuyog Pawar ps_coarse_layer,
10005*c83a76b0SSuyog Pawar pos_x + next_blk_offset,
10006*c83a76b0SSuyog Pawar pos_y,
10007*c83a76b0SSuyog Pawar i1_ref_idx,
10008*c83a76b0SSuyog Pawar resultid);
10009*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
10010*c83a76b0SSuyog Pawar ps_candt_prj_b[1],
10011*c83a76b0SSuyog Pawar ps_curr_layer,
10012*c83a76b0SSuyog Pawar ps_coarse_layer,
10013*c83a76b0SSuyog Pawar pos_x,
10014*c83a76b0SSuyog Pawar pos_y + next_blk_offset,
10015*c83a76b0SSuyog Pawar i1_ref_idx,
10016*c83a76b0SSuyog Pawar resultid);
10017*c83a76b0SSuyog Pawar
10018*c83a76b0SSuyog Pawar i4_num_srch_cands += 4;
10019*c83a76b0SSuyog Pawar
10020*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
10021*c83a76b0SSuyog Pawar ps_candt_prj_tr[1],
10022*c83a76b0SSuyog Pawar ps_curr_layer,
10023*c83a76b0SSuyog Pawar ps_coarse_layer,
10024*c83a76b0SSuyog Pawar pos_x + next_blk_offset,
10025*c83a76b0SSuyog Pawar pos_y - prev_blk_offset,
10026*c83a76b0SSuyog Pawar i1_ref_idx,
10027*c83a76b0SSuyog Pawar resultid);
10028*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
10029*c83a76b0SSuyog Pawar ps_candt_prj_tl[1],
10030*c83a76b0SSuyog Pawar ps_curr_layer,
10031*c83a76b0SSuyog Pawar ps_coarse_layer,
10032*c83a76b0SSuyog Pawar pos_x - prev_blk_offset,
10033*c83a76b0SSuyog Pawar pos_y - prev_blk_offset,
10034*c83a76b0SSuyog Pawar i1_ref_idx,
10035*c83a76b0SSuyog Pawar resultid);
10036*c83a76b0SSuyog Pawar pf_hme_project_coloc_candt(
10037*c83a76b0SSuyog Pawar ps_candt_prj_t[1],
10038*c83a76b0SSuyog Pawar ps_curr_layer,
10039*c83a76b0SSuyog Pawar ps_coarse_layer,
10040*c83a76b0SSuyog Pawar pos_x,
10041*c83a76b0SSuyog Pawar pos_y - prev_blk_offset,
10042*c83a76b0SSuyog Pawar i1_ref_idx,
10043*c83a76b0SSuyog Pawar resultid);
10044*c83a76b0SSuyog Pawar
10045*c83a76b0SSuyog Pawar i4_num_srch_cands += 3;
10046*c83a76b0SSuyog Pawar }
10047*c83a76b0SSuyog Pawar
10048*c83a76b0SSuyog Pawar /* Note this block also clips the MV range for all candidates */
10049*c83a76b0SSuyog Pawar #ifdef _DEBUG
10050*c83a76b0SSuyog Pawar {
10051*c83a76b0SSuyog Pawar S32 candt;
10052*c83a76b0SSuyog Pawar range_prms_t *ps_range_prms;
10053*c83a76b0SSuyog Pawar
10054*c83a76b0SSuyog Pawar S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10055*c83a76b0SSuyog Pawar for(candt = 0; candt < i4_num_srch_cands; candt++)
10056*c83a76b0SSuyog Pawar {
10057*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
10058*c83a76b0SSuyog Pawar
10059*c83a76b0SSuyog Pawar ps_search_node =
10060*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10061*c83a76b0SSuyog Pawar
10062*c83a76b0SSuyog Pawar ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10063*c83a76b0SSuyog Pawar
10064*c83a76b0SSuyog Pawar if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10065*c83a76b0SSuyog Pawar (ps_search_node->i1_ref_idx < 0))
10066*c83a76b0SSuyog Pawar {
10067*c83a76b0SSuyog Pawar ASSERT(0);
10068*c83a76b0SSuyog Pawar }
10069*c83a76b0SSuyog Pawar }
10070*c83a76b0SSuyog Pawar }
10071*c83a76b0SSuyog Pawar #endif
10072*c83a76b0SSuyog Pawar
10073*c83a76b0SSuyog Pawar {
10074*c83a76b0SSuyog Pawar S32 srch_cand;
10075*c83a76b0SSuyog Pawar S32 num_unique_nodes = 0;
10076*c83a76b0SSuyog Pawar S32 num_nodes_searched = 0;
10077*c83a76b0SSuyog Pawar S32 num_best_cand = 0;
10078*c83a76b0SSuyog Pawar S08 i1_grid_enable = 0;
10079*c83a76b0SSuyog Pawar search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10080*c83a76b0SSuyog Pawar /* has list of valid partition to search terminated by -1 */
10081*c83a76b0SSuyog Pawar S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10082*c83a76b0SSuyog Pawar S32 center_x;
10083*c83a76b0SSuyog Pawar S32 center_y;
10084*c83a76b0SSuyog Pawar
10085*c83a76b0SSuyog Pawar /* indicates if the centre point of grid needs to be explicitly added for search */
10086*c83a76b0SSuyog Pawar S32 add_centre = 0;
10087*c83a76b0SSuyog Pawar
10088*c83a76b0SSuyog Pawar memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10089*c83a76b0SSuyog Pawar center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10090*c83a76b0SSuyog Pawar center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10091*c83a76b0SSuyog Pawar
10092*c83a76b0SSuyog Pawar for(srch_cand = 0;
10093*c83a76b0SSuyog Pawar (srch_cand < i4_num_srch_cands) &&
10094*c83a76b0SSuyog Pawar (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10095*c83a76b0SSuyog Pawar srch_cand++)
10096*c83a76b0SSuyog Pawar {
10097*c83a76b0SSuyog Pawar search_node_t s_search_node_temp =
10098*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10099*c83a76b0SSuyog Pawar
10100*c83a76b0SSuyog Pawar s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX;
10101*c83a76b0SSuyog Pawar
10102*c83a76b0SSuyog Pawar /* Clip the motion vectors as well here since after clipping
10103*c83a76b0SSuyog Pawar two candidates can become same and they will be removed during deduplication */
10104*c83a76b0SSuyog Pawar CLIP_MV_WITHIN_RANGE(
10105*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx,
10106*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy,
10107*c83a76b0SSuyog Pawar s_search_prms_blk.aps_mv_range[0],
10108*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_fpel_refine,
10109*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_hpel_refine,
10110*c83a76b0SSuyog Pawar ps_refine_prms->i4_num_steps_qpel_refine);
10111*c83a76b0SSuyog Pawar
10112*c83a76b0SSuyog Pawar /* PT_C */
10113*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10114*c83a76b0SSuyog Pawar as_unique_search_nodes,
10115*c83a76b0SSuyog Pawar num_unique_nodes,
10116*c83a76b0SSuyog Pawar s_search_node_temp,
10117*c83a76b0SSuyog Pawar 0,
10118*c83a76b0SSuyog Pawar au4_unique_node_map,
10119*c83a76b0SSuyog Pawar center_x,
10120*c83a76b0SSuyog Pawar center_y,
10121*c83a76b0SSuyog Pawar 1);
10122*c83a76b0SSuyog Pawar
10123*c83a76b0SSuyog Pawar num_nodes_searched += 1;
10124*c83a76b0SSuyog Pawar }
10125*c83a76b0SSuyog Pawar num_unique_nodes =
10126*c83a76b0SSuyog Pawar MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10127*c83a76b0SSuyog Pawar
10128*c83a76b0SSuyog Pawar /* If number of candidates projected/number of candidates to be refined are more than 2,
10129*c83a76b0SSuyog Pawar then filter out and choose the best two here */
10130*c83a76b0SSuyog Pawar if(num_unique_nodes >= 2)
10131*c83a76b0SSuyog Pawar {
10132*c83a76b0SSuyog Pawar S32 num_results;
10133*c83a76b0SSuyog Pawar S32 cnt;
10134*c83a76b0SSuyog Pawar S32 *pi4_valid_part_ids;
10135*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10136*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10137*c83a76b0SSuyog Pawar pi4_valid_part_ids = &ai4_valid_part_ids[0];
10138*c83a76b0SSuyog Pawar
10139*c83a76b0SSuyog Pawar /* pi4_valid_part_ids is updated inside */
10140*c83a76b0SSuyog Pawar hme_pred_search_no_encode(
10141*c83a76b0SSuyog Pawar &s_search_prms_blk,
10142*c83a76b0SSuyog Pawar ps_curr_layer,
10143*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
10144*c83a76b0SSuyog Pawar pi4_valid_part_ids,
10145*c83a76b0SSuyog Pawar 1,
10146*c83a76b0SSuyog Pawar e_me_quality_presets,
10147*c83a76b0SSuyog Pawar i1_grid_enable,
10148*c83a76b0SSuyog Pawar (ihevce_me_optimised_function_list_t *)
10149*c83a76b0SSuyog Pawar ps_ctxt->pv_me_optimised_function_list
10150*c83a76b0SSuyog Pawar
10151*c83a76b0SSuyog Pawar );
10152*c83a76b0SSuyog Pawar
10153*c83a76b0SSuyog Pawar num_best_cand = 0;
10154*c83a76b0SSuyog Pawar cnt = 0;
10155*c83a76b0SSuyog Pawar num_results = ps_search_results->u1_num_results_per_part;
10156*c83a76b0SSuyog Pawar
10157*c83a76b0SSuyog Pawar while((id = pi4_valid_part_ids[cnt++]) >= 0)
10158*c83a76b0SSuyog Pawar {
10159*c83a76b0SSuyog Pawar num_results =
10160*c83a76b0SSuyog Pawar MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10161*c83a76b0SSuyog Pawar
10162*c83a76b0SSuyog Pawar for(i = 0; i < num_results; i++)
10163*c83a76b0SSuyog Pawar {
10164*c83a76b0SSuyog Pawar search_node_t s_search_node_temp;
10165*c83a76b0SSuyog Pawar s_search_node_temp =
10166*c83a76b0SSuyog Pawar *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10167*c83a76b0SSuyog Pawar if(s_search_node_temp.i1_ref_idx >= 0)
10168*c83a76b0SSuyog Pawar {
10169*c83a76b0SSuyog Pawar INSERT_NEW_NODE_NOMAP(
10170*c83a76b0SSuyog Pawar as_best_two_proj_node,
10171*c83a76b0SSuyog Pawar num_best_cand,
10172*c83a76b0SSuyog Pawar s_search_node_temp,
10173*c83a76b0SSuyog Pawar 0);
10174*c83a76b0SSuyog Pawar }
10175*c83a76b0SSuyog Pawar }
10176*c83a76b0SSuyog Pawar }
10177*c83a76b0SSuyog Pawar }
10178*c83a76b0SSuyog Pawar else
10179*c83a76b0SSuyog Pawar {
10180*c83a76b0SSuyog Pawar add_centre = 1;
10181*c83a76b0SSuyog Pawar num_best_cand = num_unique_nodes;
10182*c83a76b0SSuyog Pawar as_best_two_proj_node[0] = as_unique_search_nodes[0];
10183*c83a76b0SSuyog Pawar }
10184*c83a76b0SSuyog Pawar
10185*c83a76b0SSuyog Pawar num_unique_nodes = 0;
10186*c83a76b0SSuyog Pawar num_nodes_searched = 0;
10187*c83a76b0SSuyog Pawar
10188*c83a76b0SSuyog Pawar if(1 == num_best_cand)
10189*c83a76b0SSuyog Pawar {
10190*c83a76b0SSuyog Pawar search_node_t s_search_node_temp = as_best_two_proj_node[0];
10191*c83a76b0SSuyog Pawar S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10192*c83a76b0SSuyog Pawar S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10193*c83a76b0SSuyog Pawar S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10194*c83a76b0SSuyog Pawar
10195*c83a76b0SSuyog Pawar i1_grid_enable = 1;
10196*c83a76b0SSuyog Pawar
10197*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10198*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10199*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200*c83a76b0SSuyog Pawar
10201*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10202*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10203*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204*c83a76b0SSuyog Pawar
10205*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10206*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10207*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208*c83a76b0SSuyog Pawar
10209*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10210*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10211*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212*c83a76b0SSuyog Pawar
10213*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10214*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10215*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10216*c83a76b0SSuyog Pawar
10217*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10218*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10219*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10220*c83a76b0SSuyog Pawar
10221*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10222*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10223*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10224*c83a76b0SSuyog Pawar
10225*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10226*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10227*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10228*c83a76b0SSuyog Pawar
10229*c83a76b0SSuyog Pawar if(add_centre)
10230*c83a76b0SSuyog Pawar {
10231*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10232*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10233*c83a76b0SSuyog Pawar as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10234*c83a76b0SSuyog Pawar }
10235*c83a76b0SSuyog Pawar }
10236*c83a76b0SSuyog Pawar else
10237*c83a76b0SSuyog Pawar {
10238*c83a76b0SSuyog Pawar /* For the candidates where refinement was required, choose the best two */
10239*c83a76b0SSuyog Pawar for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10240*c83a76b0SSuyog Pawar {
10241*c83a76b0SSuyog Pawar search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10242*c83a76b0SSuyog Pawar WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10243*c83a76b0SSuyog Pawar WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10244*c83a76b0SSuyog Pawar
10245*c83a76b0SSuyog Pawar /* Because there may not be two best unique candidates (because of clipping),
10246*c83a76b0SSuyog Pawar second best candidate can be uninitialized, ignore that */
10247*c83a76b0SSuyog Pawar if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10248*c83a76b0SSuyog Pawar s_search_node_temp.i1_ref_idx < 0)
10249*c83a76b0SSuyog Pawar {
10250*c83a76b0SSuyog Pawar num_nodes_searched++;
10251*c83a76b0SSuyog Pawar continue;
10252*c83a76b0SSuyog Pawar }
10253*c83a76b0SSuyog Pawar
10254*c83a76b0SSuyog Pawar /* PT_C */
10255*c83a76b0SSuyog Pawar /* Since the center point has already be evaluated and best results are persistent,
10256*c83a76b0SSuyog Pawar it will not be evaluated again */
10257*c83a76b0SSuyog Pawar if(add_centre) /* centre point added explicitly again if search results is not updated */
10258*c83a76b0SSuyog Pawar {
10259*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10260*c83a76b0SSuyog Pawar as_unique_search_nodes,
10261*c83a76b0SSuyog Pawar num_unique_nodes,
10262*c83a76b0SSuyog Pawar s_search_node_temp,
10263*c83a76b0SSuyog Pawar 0,
10264*c83a76b0SSuyog Pawar au4_unique_node_map,
10265*c83a76b0SSuyog Pawar center_x,
10266*c83a76b0SSuyog Pawar center_y,
10267*c83a76b0SSuyog Pawar 1);
10268*c83a76b0SSuyog Pawar }
10269*c83a76b0SSuyog Pawar
10270*c83a76b0SSuyog Pawar /* PT_L */
10271*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10272*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y;
10273*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10274*c83a76b0SSuyog Pawar as_unique_search_nodes,
10275*c83a76b0SSuyog Pawar num_unique_nodes,
10276*c83a76b0SSuyog Pawar s_search_node_temp,
10277*c83a76b0SSuyog Pawar 0,
10278*c83a76b0SSuyog Pawar au4_unique_node_map,
10279*c83a76b0SSuyog Pawar center_x,
10280*c83a76b0SSuyog Pawar center_y,
10281*c83a76b0SSuyog Pawar 1);
10282*c83a76b0SSuyog Pawar
10283*c83a76b0SSuyog Pawar /* PT_T */
10284*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x;
10285*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10286*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10287*c83a76b0SSuyog Pawar as_unique_search_nodes,
10288*c83a76b0SSuyog Pawar num_unique_nodes,
10289*c83a76b0SSuyog Pawar s_search_node_temp,
10290*c83a76b0SSuyog Pawar 0,
10291*c83a76b0SSuyog Pawar au4_unique_node_map,
10292*c83a76b0SSuyog Pawar center_x,
10293*c83a76b0SSuyog Pawar center_y,
10294*c83a76b0SSuyog Pawar 1);
10295*c83a76b0SSuyog Pawar
10296*c83a76b0SSuyog Pawar /* PT_R */
10297*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10298*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y;
10299*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10300*c83a76b0SSuyog Pawar as_unique_search_nodes,
10301*c83a76b0SSuyog Pawar num_unique_nodes,
10302*c83a76b0SSuyog Pawar s_search_node_temp,
10303*c83a76b0SSuyog Pawar 0,
10304*c83a76b0SSuyog Pawar au4_unique_node_map,
10305*c83a76b0SSuyog Pawar center_x,
10306*c83a76b0SSuyog Pawar center_y,
10307*c83a76b0SSuyog Pawar 1);
10308*c83a76b0SSuyog Pawar
10309*c83a76b0SSuyog Pawar /* PT_B */
10310*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x;
10311*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10312*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10313*c83a76b0SSuyog Pawar as_unique_search_nodes,
10314*c83a76b0SSuyog Pawar num_unique_nodes,
10315*c83a76b0SSuyog Pawar s_search_node_temp,
10316*c83a76b0SSuyog Pawar 0,
10317*c83a76b0SSuyog Pawar au4_unique_node_map,
10318*c83a76b0SSuyog Pawar center_x,
10319*c83a76b0SSuyog Pawar center_y,
10320*c83a76b0SSuyog Pawar 1);
10321*c83a76b0SSuyog Pawar
10322*c83a76b0SSuyog Pawar /* PT_TL */
10323*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10324*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10325*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10326*c83a76b0SSuyog Pawar as_unique_search_nodes,
10327*c83a76b0SSuyog Pawar num_unique_nodes,
10328*c83a76b0SSuyog Pawar s_search_node_temp,
10329*c83a76b0SSuyog Pawar 0,
10330*c83a76b0SSuyog Pawar au4_unique_node_map,
10331*c83a76b0SSuyog Pawar center_x,
10332*c83a76b0SSuyog Pawar center_y,
10333*c83a76b0SSuyog Pawar 1);
10334*c83a76b0SSuyog Pawar
10335*c83a76b0SSuyog Pawar /* PT_TR */
10336*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10337*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10338*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10339*c83a76b0SSuyog Pawar as_unique_search_nodes,
10340*c83a76b0SSuyog Pawar num_unique_nodes,
10341*c83a76b0SSuyog Pawar s_search_node_temp,
10342*c83a76b0SSuyog Pawar 0,
10343*c83a76b0SSuyog Pawar au4_unique_node_map,
10344*c83a76b0SSuyog Pawar center_x,
10345*c83a76b0SSuyog Pawar center_y,
10346*c83a76b0SSuyog Pawar 1);
10347*c83a76b0SSuyog Pawar
10348*c83a76b0SSuyog Pawar /* PT_BL */
10349*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10350*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10351*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10352*c83a76b0SSuyog Pawar as_unique_search_nodes,
10353*c83a76b0SSuyog Pawar num_unique_nodes,
10354*c83a76b0SSuyog Pawar s_search_node_temp,
10355*c83a76b0SSuyog Pawar 0,
10356*c83a76b0SSuyog Pawar au4_unique_node_map,
10357*c83a76b0SSuyog Pawar center_x,
10358*c83a76b0SSuyog Pawar center_y,
10359*c83a76b0SSuyog Pawar 1);
10360*c83a76b0SSuyog Pawar
10361*c83a76b0SSuyog Pawar /* PT_BR */
10362*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10363*c83a76b0SSuyog Pawar s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10364*c83a76b0SSuyog Pawar INSERT_NEW_NODE(
10365*c83a76b0SSuyog Pawar as_unique_search_nodes,
10366*c83a76b0SSuyog Pawar num_unique_nodes,
10367*c83a76b0SSuyog Pawar s_search_node_temp,
10368*c83a76b0SSuyog Pawar 0,
10369*c83a76b0SSuyog Pawar au4_unique_node_map,
10370*c83a76b0SSuyog Pawar center_x,
10371*c83a76b0SSuyog Pawar center_y,
10372*c83a76b0SSuyog Pawar 1);
10373*c83a76b0SSuyog Pawar }
10374*c83a76b0SSuyog Pawar }
10375*c83a76b0SSuyog Pawar
10376*c83a76b0SSuyog Pawar s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10377*c83a76b0SSuyog Pawar s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10378*c83a76b0SSuyog Pawar
10379*c83a76b0SSuyog Pawar /*****************************************************************/
10380*c83a76b0SSuyog Pawar /* Call the search algorithm, this includes: */
10381*c83a76b0SSuyog Pawar /* Pre-Search-Refinement (for coarse candts) */
10382*c83a76b0SSuyog Pawar /* Search on each candidate */
10383*c83a76b0SSuyog Pawar /* Post Search Refinement on winners/other new candidates */
10384*c83a76b0SSuyog Pawar /*****************************************************************/
10385*c83a76b0SSuyog Pawar
10386*c83a76b0SSuyog Pawar hme_pred_search_no_encode(
10387*c83a76b0SSuyog Pawar &s_search_prms_blk,
10388*c83a76b0SSuyog Pawar ps_curr_layer,
10389*c83a76b0SSuyog Pawar &ps_ctxt->s_wt_pred,
10390*c83a76b0SSuyog Pawar ai4_valid_part_ids,
10391*c83a76b0SSuyog Pawar 0,
10392*c83a76b0SSuyog Pawar e_me_quality_presets,
10393*c83a76b0SSuyog Pawar i1_grid_enable,
10394*c83a76b0SSuyog Pawar (ihevce_me_optimised_function_list_t *)
10395*c83a76b0SSuyog Pawar ps_ctxt->pv_me_optimised_function_list);
10396*c83a76b0SSuyog Pawar
10397*c83a76b0SSuyog Pawar i1_grid_enable = 0;
10398*c83a76b0SSuyog Pawar }
10399*c83a76b0SSuyog Pawar }
10400*c83a76b0SSuyog Pawar
10401*c83a76b0SSuyog Pawar /* for non encode layer update MV and end processing for block */
10402*c83a76b0SSuyog Pawar {
10403*c83a76b0SSuyog Pawar WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10404*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
10405*c83a76b0SSuyog Pawar /* now update the reqd results back to the layer mv bank. */
10406*c83a76b0SSuyog Pawar if(1 == ps_refine_prms->i4_layer_id)
10407*c83a76b0SSuyog Pawar {
10408*c83a76b0SSuyog Pawar hme_update_mv_bank_in_l1_me(
10409*c83a76b0SSuyog Pawar ps_search_results,
10410*c83a76b0SSuyog Pawar ps_curr_layer->ps_layer_mvbank,
10411*c83a76b0SSuyog Pawar blk_x,
10412*c83a76b0SSuyog Pawar blk_y,
10413*c83a76b0SSuyog Pawar &s_mv_update_prms);
10414*c83a76b0SSuyog Pawar }
10415*c83a76b0SSuyog Pawar else
10416*c83a76b0SSuyog Pawar {
10417*c83a76b0SSuyog Pawar hme_update_mv_bank_noencode(
10418*c83a76b0SSuyog Pawar ps_search_results,
10419*c83a76b0SSuyog Pawar ps_curr_layer->ps_layer_mvbank,
10420*c83a76b0SSuyog Pawar blk_x,
10421*c83a76b0SSuyog Pawar blk_y,
10422*c83a76b0SSuyog Pawar &s_mv_update_prms);
10423*c83a76b0SSuyog Pawar }
10424*c83a76b0SSuyog Pawar
10425*c83a76b0SSuyog Pawar /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10426*c83a76b0SSuyog Pawar /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10427*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10428*c83a76b0SSuyog Pawar {
10429*c83a76b0SSuyog Pawar WORD32 i4_j;
10430*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10431*c83a76b0SSuyog Pawar
10432*c83a76b0SSuyog Pawar //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10433*c83a76b0SSuyog Pawar /* Not considering this for Dyn. Search Update */
10434*c83a76b0SSuyog Pawar {
10435*c83a76b0SSuyog Pawar for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10436*c83a76b0SSuyog Pawar i4_ref_id++)
10437*c83a76b0SSuyog Pawar {
10438*c83a76b0SSuyog Pawar ps_search_node =
10439*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10440*c83a76b0SSuyog Pawar
10441*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10442*c83a76b0SSuyog Pawar {
10443*c83a76b0SSuyog Pawar hme_update_dynamic_search_params(
10444*c83a76b0SSuyog Pawar &ps_ctxt->s_coarse_dyn_range_prms
10445*c83a76b0SSuyog Pawar .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10446*c83a76b0SSuyog Pawar [i4_ref_id],
10447*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy);
10448*c83a76b0SSuyog Pawar
10449*c83a76b0SSuyog Pawar ps_search_node++;
10450*c83a76b0SSuyog Pawar }
10451*c83a76b0SSuyog Pawar }
10452*c83a76b0SSuyog Pawar }
10453*c83a76b0SSuyog Pawar }
10454*c83a76b0SSuyog Pawar
10455*c83a76b0SSuyog Pawar if(1 == ps_refine_prms->i4_layer_id)
10456*c83a76b0SSuyog Pawar {
10457*c83a76b0SSuyog Pawar WORD32 wt_pred_val, log_wt_pred_val;
10458*c83a76b0SSuyog Pawar WORD32 ref_id_of_nearest_poc = 0;
10459*c83a76b0SSuyog Pawar WORD32 max_val = 0x7fffffff;
10460*c83a76b0SSuyog Pawar WORD32 max_l0_val = 0x7fffffff;
10461*c83a76b0SSuyog Pawar WORD32 max_l1_val = 0x7fffffff;
10462*c83a76b0SSuyog Pawar WORD32 cur_val;
10463*c83a76b0SSuyog Pawar WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10464*c83a76b0SSuyog Pawar
10465*c83a76b0SSuyog Pawar WORD32 bestl0_sad = 0x7fffffff;
10466*c83a76b0SSuyog Pawar WORD32 bestl1_sad = 0x7fffffff;
10467*c83a76b0SSuyog Pawar search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10468*c83a76b0SSuyog Pawar
10469*c83a76b0SSuyog Pawar for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10470*c83a76b0SSuyog Pawar i4_ref_id++)
10471*c83a76b0SSuyog Pawar {
10472*c83a76b0SSuyog Pawar wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10473*c83a76b0SSuyog Pawar log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10474*c83a76b0SSuyog Pawar
10475*c83a76b0SSuyog Pawar ps_search_node =
10476*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10477*c83a76b0SSuyog Pawar
10478*c83a76b0SSuyog Pawar i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10479*c83a76b0SSuyog Pawar ((1 << log_wt_pred_val) >> 1)) >>
10480*c83a76b0SSuyog Pawar log_wt_pred_val;
10481*c83a76b0SSuyog Pawar
10482*c83a76b0SSuyog Pawar i4_local_cost_weighted_pred =
10483*c83a76b0SSuyog Pawar i4_local_weighted_sad +
10484*c83a76b0SSuyog Pawar (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10485*c83a76b0SSuyog Pawar //the loop is redundant as the results are already sorted based on total cost
10486*c83a76b0SSuyog Pawar //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10487*c83a76b0SSuyog Pawar {
10488*c83a76b0SSuyog Pawar if(i4_local_cost_weighted_pred < min_cost)
10489*c83a76b0SSuyog Pawar {
10490*c83a76b0SSuyog Pawar min_cost = i4_local_cost_weighted_pred;
10491*c83a76b0SSuyog Pawar min_sad = i4_local_weighted_sad;
10492*c83a76b0SSuyog Pawar }
10493*c83a76b0SSuyog Pawar }
10494*c83a76b0SSuyog Pawar
10495*c83a76b0SSuyog Pawar /* For P frame, calculate the nearest poc which is either P or I frame*/
10496*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497*c83a76b0SSuyog Pawar {
10498*c83a76b0SSuyog Pawar if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10499*c83a76b0SSuyog Pawar {
10500*c83a76b0SSuyog Pawar cur_val =
10501*c83a76b0SSuyog Pawar ABS(ps_ctxt->i4_curr_poc -
10502*c83a76b0SSuyog Pawar ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10503*c83a76b0SSuyog Pawar if(cur_val < max_val)
10504*c83a76b0SSuyog Pawar {
10505*c83a76b0SSuyog Pawar max_val = cur_val;
10506*c83a76b0SSuyog Pawar ref_id_of_nearest_poc = i4_ref_id;
10507*c83a76b0SSuyog Pawar }
10508*c83a76b0SSuyog Pawar }
10509*c83a76b0SSuyog Pawar }
10510*c83a76b0SSuyog Pawar }
10511*c83a76b0SSuyog Pawar /*Store me cost wrt. to past frame only for P frame */
10512*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10513*c83a76b0SSuyog Pawar {
10514*c83a76b0SSuyog Pawar if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10515*c83a76b0SSuyog Pawar {
10516*c83a76b0SSuyog Pawar WORD16 i2_mvx, i2_mvy;
10517*c83a76b0SSuyog Pawar
10518*c83a76b0SSuyog Pawar WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10519*c83a76b0SSuyog Pawar WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10520*c83a76b0SSuyog Pawar WORD32 z_scan_idx =
10521*c83a76b0SSuyog Pawar gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10522*c83a76b0SSuyog Pawar WORD32 wt, log_wt;
10523*c83a76b0SSuyog Pawar
10524*c83a76b0SSuyog Pawar /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10525*c83a76b0SSuyog Pawar <= (1 + ps_ctxt->num_b_frms));*/
10526*c83a76b0SSuyog Pawar
10527*c83a76b0SSuyog Pawar /*obtain mvx and mvy */
10528*c83a76b0SSuyog Pawar i2_mvx =
10529*c83a76b0SSuyog Pawar ps_search_results
10530*c83a76b0SSuyog Pawar ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10531*c83a76b0SSuyog Pawar ->s_mv.i2_mvx;
10532*c83a76b0SSuyog Pawar i2_mvy =
10533*c83a76b0SSuyog Pawar ps_search_results
10534*c83a76b0SSuyog Pawar ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10535*c83a76b0SSuyog Pawar ->s_mv.i2_mvy;
10536*c83a76b0SSuyog Pawar
10537*c83a76b0SSuyog Pawar /*register the min cost for l1 me in blk context */
10538*c83a76b0SSuyog Pawar wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10539*c83a76b0SSuyog Pawar log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10540*c83a76b0SSuyog Pawar
10541*c83a76b0SSuyog Pawar /*register the min cost for l1 me in blk context */
10542*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10543*c83a76b0SSuyog Pawar ((ps_search_results
10544*c83a76b0SSuyog Pawar ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10545*c83a76b0SSuyog Pawar ->i4_sad *
10546*c83a76b0SSuyog Pawar wt) +
10547*c83a76b0SSuyog Pawar ((1 << log_wt) >> 1)) >>
10548*c83a76b0SSuyog Pawar log_wt;
10549*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10550*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10551*c83a76b0SSuyog Pawar (ps_search_results
10552*c83a76b0SSuyog Pawar ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10553*c83a76b0SSuyog Pawar ->i4_tot_cost -
10554*c83a76b0SSuyog Pawar ps_search_results
10555*c83a76b0SSuyog Pawar ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10556*c83a76b0SSuyog Pawar ->i4_sad);
10557*c83a76b0SSuyog Pawar /*for complexity change detection*/
10558*c83a76b0SSuyog Pawar ps_ctxt->i4_num_blks++;
10559*c83a76b0SSuyog Pawar if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10560*c83a76b0SSuyog Pawar (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10561*c83a76b0SSuyog Pawar {
10562*c83a76b0SSuyog Pawar ps_ctxt->i4_num_blks_high_sad++;
10563*c83a76b0SSuyog Pawar }
10564*c83a76b0SSuyog Pawar }
10565*c83a76b0SSuyog Pawar }
10566*c83a76b0SSuyog Pawar }
10567*c83a76b0SSuyog Pawar
10568*c83a76b0SSuyog Pawar /* EIID: Early inter intra decisions */
10569*c83a76b0SSuyog Pawar /* tap L1 level SAD for inter intra decisions */
10570*c83a76b0SSuyog Pawar if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10571*c83a76b0SSuyog Pawar (!ps_ctxt->s_frm_prms
10572*c83a76b0SSuyog Pawar .is_i_pic)) //for high-quality preset->disable early decisions
10573*c83a76b0SSuyog Pawar {
10574*c83a76b0SSuyog Pawar if(1 == ps_refine_prms->i4_layer_id)
10575*c83a76b0SSuyog Pawar {
10576*c83a76b0SSuyog Pawar WORD32 i4_min_sad_cost_8x8_block = min_cost;
10577*c83a76b0SSuyog Pawar ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10578*c83a76b0SSuyog Pawar WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10579*c83a76b0SSuyog Pawar WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10580*c83a76b0SSuyog Pawar WORD32 z_scan_idx =
10581*c83a76b0SSuyog Pawar gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10582*c83a76b0SSuyog Pawar ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10583*c83a76b0SSuyog Pawar
10584*c83a76b0SSuyog Pawar /*register the min cost for l1 me in blk context */
10585*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10586*c83a76b0SSuyog Pawar i4_min_sad_cost_8x8_block;
10587*c83a76b0SSuyog Pawar i4_num_comparisions++;
10588*c83a76b0SSuyog Pawar
10589*c83a76b0SSuyog Pawar /* take early inter-intra decision here */
10590*c83a76b0SSuyog Pawar ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10591*c83a76b0SSuyog Pawar #if DISABLE_INTRA_IN_BPICS
10592*c83a76b0SSuyog Pawar if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10593*c83a76b0SSuyog Pawar (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10594*c83a76b0SSuyog Pawar {
10595*c83a76b0SSuyog Pawar ps_curr_ed_blk_ctxt->intra_or_inter =
10596*c83a76b0SSuyog Pawar 2; /*eval only inter if inter cost is less */
10597*c83a76b0SSuyog Pawar i4_num_inter_wins++;
10598*c83a76b0SSuyog Pawar }
10599*c83a76b0SSuyog Pawar else
10600*c83a76b0SSuyog Pawar #endif
10601*c83a76b0SSuyog Pawar {
10602*c83a76b0SSuyog Pawar if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10603*c83a76b0SSuyog Pawar ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10604*c83a76b0SSuyog Pawar i4_threshold_multiplier) /
10605*c83a76b0SSuyog Pawar i4_threshold_divider))
10606*c83a76b0SSuyog Pawar {
10607*c83a76b0SSuyog Pawar ps_curr_ed_blk_ctxt->intra_or_inter =
10608*c83a76b0SSuyog Pawar 2; /*eval only inter if inter cost is less */
10609*c83a76b0SSuyog Pawar i4_num_inter_wins++;
10610*c83a76b0SSuyog Pawar }
10611*c83a76b0SSuyog Pawar }
10612*c83a76b0SSuyog Pawar
10613*c83a76b0SSuyog Pawar //{
10614*c83a76b0SSuyog Pawar // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10615*c83a76b0SSuyog Pawar // blk_x,blk_y,
10616*c83a76b0SSuyog Pawar // i4_ctb_blk_ctr, i4_ctb_row_ctr,
10617*c83a76b0SSuyog Pawar // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10618*c83a76b0SSuyog Pawar // i4_min_sad_cost_8x8_block
10619*c83a76b0SSuyog Pawar // );
10620*c83a76b0SSuyog Pawar //}
10621*c83a76b0SSuyog Pawar
10622*c83a76b0SSuyog Pawar } //end of layer-1
10623*c83a76b0SSuyog Pawar } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10624*c83a76b0SSuyog Pawar else
10625*c83a76b0SSuyog Pawar {
10626*c83a76b0SSuyog Pawar if(1 == ps_refine_prms->i4_layer_id)
10627*c83a76b0SSuyog Pawar {
10628*c83a76b0SSuyog Pawar WORD32 i4_min_sad_cost_8x8_block = min_cost;
10629*c83a76b0SSuyog Pawar WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10630*c83a76b0SSuyog Pawar WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10631*c83a76b0SSuyog Pawar WORD32 z_scan_idx =
10632*c83a76b0SSuyog Pawar gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10633*c83a76b0SSuyog Pawar
10634*c83a76b0SSuyog Pawar /*register the min cost for l1 me in blk context */
10635*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10636*c83a76b0SSuyog Pawar i4_min_sad_cost_8x8_block;
10637*c83a76b0SSuyog Pawar }
10638*c83a76b0SSuyog Pawar }
10639*c83a76b0SSuyog Pawar if(1 == ps_refine_prms->i4_layer_id)
10640*c83a76b0SSuyog Pawar {
10641*c83a76b0SSuyog Pawar WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10642*c83a76b0SSuyog Pawar WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10643*c83a76b0SSuyog Pawar WORD32 z_scan_idx =
10644*c83a76b0SSuyog Pawar gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10645*c83a76b0SSuyog Pawar
10646*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10647*c83a76b0SSuyog Pawar min_sad;
10648*c83a76b0SSuyog Pawar
10649*c83a76b0SSuyog Pawar if(min_cost <
10650*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10651*c83a76b0SSuyog Pawar {
10652*c83a76b0SSuyog Pawar ps_ctxt->i4_L1_hme_best_cost += min_cost;
10653*c83a76b0SSuyog Pawar ps_ctxt->i4_L1_hme_sad += min_sad;
10654*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10655*c83a76b0SSuyog Pawar }
10656*c83a76b0SSuyog Pawar else
10657*c83a76b0SSuyog Pawar {
10658*c83a76b0SSuyog Pawar ps_ctxt->i4_L1_hme_best_cost +=
10659*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10660*c83a76b0SSuyog Pawar ps_ctxt->i4_L1_hme_sad +=
10661*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10662*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10663*c83a76b0SSuyog Pawar ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10664*c83a76b0SSuyog Pawar }
10665*c83a76b0SSuyog Pawar }
10666*c83a76b0SSuyog Pawar }
10667*c83a76b0SSuyog Pawar }
10668*c83a76b0SSuyog Pawar
10669*c83a76b0SSuyog Pawar /* Update the number of blocks processed in the current row */
10670*c83a76b0SSuyog Pawar if((ME_MEDIUM_SPEED > e_me_quality_presets))
10671*c83a76b0SSuyog Pawar {
10672*c83a76b0SSuyog Pawar ihevce_dmgr_set_row_row_sync(
10673*c83a76b0SSuyog Pawar pv_hme_dep_mngr,
10674*c83a76b0SSuyog Pawar (i4_ctb_x + 1),
10675*c83a76b0SSuyog Pawar blk_y,
10676*c83a76b0SSuyog Pawar 0 /* Col Tile No. : Not supported in PreEnc*/);
10677*c83a76b0SSuyog Pawar }
10678*c83a76b0SSuyog Pawar }
10679*c83a76b0SSuyog Pawar
10680*c83a76b0SSuyog Pawar /* set the output dependency after completion of row */
10681*c83a76b0SSuyog Pawar ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10682*c83a76b0SSuyog Pawar }
10683*c83a76b0SSuyog Pawar }
10684