xref: /aosp_15_r20/external/libhevc/encoder/hme_fullpel.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar  *
3*c83a76b0SSuyog Pawar  * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar  *
5*c83a76b0SSuyog Pawar  * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar  * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar  * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar  *
9*c83a76b0SSuyog Pawar  * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar  *
11*c83a76b0SSuyog Pawar  * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar  * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar  * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar  * limitations under the License.
16*c83a76b0SSuyog Pawar  *
17*c83a76b0SSuyog Pawar  *****************************************************************************
18*c83a76b0SSuyog Pawar  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar 
21*c83a76b0SSuyog Pawar /**
22*c83a76b0SSuyog Pawar ******************************************************************************
23*c83a76b0SSuyog Pawar * @file hme_subpel.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * @brief
26*c83a76b0SSuyog Pawar *    Fullpel search and refinement
27*c83a76b0SSuyog Pawar *
28*c83a76b0SSuyog Pawar * @author
29*c83a76b0SSuyog Pawar *    Ittiam
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar ******************************************************************************
32*c83a76b0SSuyog Pawar */
33*c83a76b0SSuyog Pawar 
34*c83a76b0SSuyog Pawar /*****************************************************************************/
35*c83a76b0SSuyog Pawar /* File Includes                                                             */
36*c83a76b0SSuyog Pawar /*****************************************************************************/
37*c83a76b0SSuyog Pawar /* System include files */
38*c83a76b0SSuyog Pawar #include <stdio.h>
39*c83a76b0SSuyog Pawar #include <string.h>
40*c83a76b0SSuyog Pawar #include <stdlib.h>
41*c83a76b0SSuyog Pawar #include <assert.h>
42*c83a76b0SSuyog Pawar #include <stdarg.h>
43*c83a76b0SSuyog Pawar #include <math.h>
44*c83a76b0SSuyog Pawar #include <limits.h>
45*c83a76b0SSuyog Pawar 
46*c83a76b0SSuyog Pawar /* User include files */
47*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
48*c83a76b0SSuyog Pawar #include "itt_video_api.h"
49*c83a76b0SSuyog Pawar #include "ihevce_api.h"
50*c83a76b0SSuyog Pawar 
51*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
52*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
53*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
54*c83a76b0SSuyog Pawar 
55*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
56*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
57*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
58*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
59*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
60*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
61*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
62*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
63*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
64*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
65*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
66*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
67*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
68*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
69*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
70*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
71*c83a76b0SSuyog Pawar 
72*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
73*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
74*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
75*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
76*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
77*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
78*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
79*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
80*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
81*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
82*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
83*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
84*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
85*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
86*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
87*c83a76b0SSuyog Pawar #include "ihevce_bs_compute_ctb.h"
88*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
89*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
90*c83a76b0SSuyog Pawar #include "hme_datatype.h"
91*c83a76b0SSuyog Pawar #include "hme_interface.h"
92*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
93*c83a76b0SSuyog Pawar #include "hme_defs.h"
94*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
95*c83a76b0SSuyog Pawar #include "hme_globals.h"
96*c83a76b0SSuyog Pawar #include "hme_utils.h"
97*c83a76b0SSuyog Pawar #include "hme_coarse.h"
98*c83a76b0SSuyog Pawar #include "hme_refine.h"
99*c83a76b0SSuyog Pawar #include "hme_err_compute.h"
100*c83a76b0SSuyog Pawar #include "hme_common_utils.h"
101*c83a76b0SSuyog Pawar #include "hme_search_algo.h"
102*c83a76b0SSuyog Pawar #include "ihevce_stasino_helpers.h"
103*c83a76b0SSuyog Pawar 
104*c83a76b0SSuyog Pawar /**
105*c83a76b0SSuyog Pawar ********************************************************************************
106*c83a76b0SSuyog Pawar *  @fn     hme_fullpel_cand_sifter
107*c83a76b0SSuyog Pawar *
108*c83a76b0SSuyog Pawar *  @brief  Given a list of search candidates and valid partition types,
109*c83a76b0SSuyog Pawar *          this function finds the two best candidates for each partition type.
110*c83a76b0SSuyog Pawar *
111*c83a76b0SSuyog Pawar *  @return None
112*c83a76b0SSuyog Pawar ********************************************************************************
113*c83a76b0SSuyog Pawar */
hme_fullpel_cand_sifter(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_alpha_stim_multiplier,U08 u1_is_cu_noisy,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)114*c83a76b0SSuyog Pawar void hme_fullpel_cand_sifter(
115*c83a76b0SSuyog Pawar     hme_search_prms_t *ps_search_prms,
116*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt,
117*c83a76b0SSuyog Pawar     wgt_pred_ctxt_t *ps_wt_inp_prms,
118*c83a76b0SSuyog Pawar     S32 i4_alpha_stim_multiplier,
119*c83a76b0SSuyog Pawar     U08 u1_is_cu_noisy,
120*c83a76b0SSuyog Pawar     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
121*c83a76b0SSuyog Pawar {
122*c83a76b0SSuyog Pawar     S32 i4_i;
123*c83a76b0SSuyog Pawar     S16 i2_temp_tot_cost, i2_temp_stim_injected_cost, i2_temp_mv_cost, i2_temp_mv_x, i2_temp_mv_y,
124*c83a76b0SSuyog Pawar         i2_temp_ref_idx;
125*c83a76b0SSuyog Pawar 
126*c83a76b0SSuyog Pawar     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
127*c83a76b0SSuyog Pawar     S32 i4_temp_part_mask;
128*c83a76b0SSuyog Pawar 
129*c83a76b0SSuyog Pawar     ps_search_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
130*c83a76b0SSuyog Pawar     ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
131*c83a76b0SSuyog Pawar 
132*c83a76b0SSuyog Pawar     if(u1_is_cu_noisy)
133*c83a76b0SSuyog Pawar     {
134*c83a76b0SSuyog Pawar         i4_temp_part_mask = ps_search_prms->i4_part_mask;
135*c83a76b0SSuyog Pawar         ps_search_prms->i4_part_mask &= ((ENABLE_2Nx2N) | (ENABLE_NxN));
136*c83a76b0SSuyog Pawar 
137*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
138*c83a76b0SSuyog Pawar             (ps_search_prms->i4_part_mask) & ((ENABLE_2Nx2N) | (ENABLE_NxN)),
139*c83a76b0SSuyog Pawar             &ps_fullpel_refine_ctxt->ai4_part_id[0]);
140*c83a76b0SSuyog Pawar     }
141*c83a76b0SSuyog Pawar 
142*c83a76b0SSuyog Pawar     ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
143*c83a76b0SSuyog Pawar 
144*c83a76b0SSuyog Pawar     hme_pred_search(
145*c83a76b0SSuyog Pawar         ps_search_prms, ps_layer_ctxt, ps_wt_inp_prms, 0, ps_me_optimised_function_list);
146*c83a76b0SSuyog Pawar 
147*c83a76b0SSuyog Pawar     if(u1_is_cu_noisy)
148*c83a76b0SSuyog Pawar     {
149*c83a76b0SSuyog Pawar         if(ps_search_prms->ps_search_results->u1_num_results_per_part == 2)
150*c83a76b0SSuyog Pawar         {
151*c83a76b0SSuyog Pawar             for(i4_i = 0; i4_i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i4_i++)
152*c83a76b0SSuyog Pawar             {
153*c83a76b0SSuyog Pawar                 if(ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] >
154*c83a76b0SSuyog Pawar                    ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i])
155*c83a76b0SSuyog Pawar                 {
156*c83a76b0SSuyog Pawar                     i2_temp_tot_cost = ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i];
157*c83a76b0SSuyog Pawar                     i2_temp_stim_injected_cost =
158*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i];
159*c83a76b0SSuyog Pawar                     i2_temp_mv_cost = ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i];
160*c83a76b0SSuyog Pawar                     i2_temp_mv_x = ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i];
161*c83a76b0SSuyog Pawar                     i2_temp_mv_y = ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i];
162*c83a76b0SSuyog Pawar                     i2_temp_ref_idx = ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i];
163*c83a76b0SSuyog Pawar 
164*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] =
165*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i];
166*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
167*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i];
168*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] =
169*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i];
170*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] =
171*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i];
172*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] =
173*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i];
174*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] =
175*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i];
176*c83a76b0SSuyog Pawar 
177*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = i2_temp_tot_cost;
178*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
179*c83a76b0SSuyog Pawar                         i2_temp_stim_injected_cost;
180*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = i2_temp_mv_cost;
181*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = i2_temp_mv_x;
182*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = i2_temp_mv_y;
183*c83a76b0SSuyog Pawar                     ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = i2_temp_ref_idx;
184*c83a76b0SSuyog Pawar                 }
185*c83a76b0SSuyog Pawar             }
186*c83a76b0SSuyog Pawar         }
187*c83a76b0SSuyog Pawar 
188*c83a76b0SSuyog Pawar         ps_search_prms->i4_part_mask = i4_temp_part_mask;
189*c83a76b0SSuyog Pawar 
190*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
191*c83a76b0SSuyog Pawar             ps_search_prms->i4_part_mask, &ps_fullpel_refine_ctxt->ai4_part_id[0]);
192*c83a76b0SSuyog Pawar     }
193*c83a76b0SSuyog Pawar }
194*c83a76b0SSuyog Pawar 
hme_add_fpel_refine_candidates_to_search_cand_array(search_node_t * ps_unique_search_nodes,fullpel_refine_ctxt_t * ps_fullpel_refine_ctxt,S32 * pi4_num_unique_nodes,U32 * pu4_unique_node_map,S32 i4_fpel_search_result_id,S32 i4_fpel_search_result_array_index,S32 i4_unique_node_map_center_x,S32 i4_unique_node_map_center_y,S08 i1_unique_node_map_ref_idx,U08 u1_add_refine_grid_center_to_search_cand_array,U08 u1_do_not_check_for_duplicates)195*c83a76b0SSuyog Pawar static void hme_add_fpel_refine_candidates_to_search_cand_array(
196*c83a76b0SSuyog Pawar     search_node_t *ps_unique_search_nodes,
197*c83a76b0SSuyog Pawar     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt,
198*c83a76b0SSuyog Pawar     S32 *pi4_num_unique_nodes,
199*c83a76b0SSuyog Pawar     U32 *pu4_unique_node_map,
200*c83a76b0SSuyog Pawar     S32 i4_fpel_search_result_id,
201*c83a76b0SSuyog Pawar     S32 i4_fpel_search_result_array_index,
202*c83a76b0SSuyog Pawar     S32 i4_unique_node_map_center_x,
203*c83a76b0SSuyog Pawar     S32 i4_unique_node_map_center_y,
204*c83a76b0SSuyog Pawar     S08 i1_unique_node_map_ref_idx,
205*c83a76b0SSuyog Pawar     U08 u1_add_refine_grid_center_to_search_cand_array,
206*c83a76b0SSuyog Pawar     U08 u1_do_not_check_for_duplicates)
207*c83a76b0SSuyog Pawar {
208*c83a76b0SSuyog Pawar     search_node_t s_refine_grid_center;
209*c83a76b0SSuyog Pawar 
210*c83a76b0SSuyog Pawar     U08 u1_use_hashing, i;
211*c83a76b0SSuyog Pawar 
212*c83a76b0SSuyog Pawar     S32 i2_mvx =
213*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i2_mv_x[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
214*c83a76b0SSuyog Pawar     S32 i2_mvy =
215*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i2_mv_y[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
216*c83a76b0SSuyog Pawar     S08 i1_ref_idx = ps_fullpel_refine_ctxt
217*c83a76b0SSuyog Pawar                          ->i2_ref_idx[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
218*c83a76b0SSuyog Pawar 
219*c83a76b0SSuyog Pawar     if(!u1_do_not_check_for_duplicates)
220*c83a76b0SSuyog Pawar     {
221*c83a76b0SSuyog Pawar         s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
222*c83a76b0SSuyog Pawar         s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
223*c83a76b0SSuyog Pawar         s_refine_grid_center.i1_ref_idx = i1_ref_idx;
224*c83a76b0SSuyog Pawar 
225*c83a76b0SSuyog Pawar         u1_use_hashing = (s_refine_grid_center.i1_ref_idx == i1_unique_node_map_ref_idx);
226*c83a76b0SSuyog Pawar 
227*c83a76b0SSuyog Pawar         for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
228*c83a76b0SSuyog Pawar         {
229*c83a76b0SSuyog Pawar             S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
230*c83a76b0SSuyog Pawar             S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
231*c83a76b0SSuyog Pawar 
232*c83a76b0SSuyog Pawar             if(i1_offset_x || i1_offset_y)
233*c83a76b0SSuyog Pawar             {
234*c83a76b0SSuyog Pawar                 s_refine_grid_center.s_mv.i2_mvx = i2_mvx + i1_offset_x;
235*c83a76b0SSuyog Pawar                 s_refine_grid_center.s_mv.i2_mvy = i2_mvy + i1_offset_y;
236*c83a76b0SSuyog Pawar 
237*c83a76b0SSuyog Pawar                 INSERT_NEW_NODE(
238*c83a76b0SSuyog Pawar                     ps_unique_search_nodes,
239*c83a76b0SSuyog Pawar                     pi4_num_unique_nodes[0],
240*c83a76b0SSuyog Pawar                     s_refine_grid_center,
241*c83a76b0SSuyog Pawar                     1,
242*c83a76b0SSuyog Pawar                     pu4_unique_node_map,
243*c83a76b0SSuyog Pawar                     i4_unique_node_map_center_x,
244*c83a76b0SSuyog Pawar                     i4_unique_node_map_center_y,
245*c83a76b0SSuyog Pawar                     u1_use_hashing);
246*c83a76b0SSuyog Pawar             }
247*c83a76b0SSuyog Pawar             else if(u1_add_refine_grid_center_to_search_cand_array)
248*c83a76b0SSuyog Pawar             {
249*c83a76b0SSuyog Pawar                 s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
250*c83a76b0SSuyog Pawar                 s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
251*c83a76b0SSuyog Pawar 
252*c83a76b0SSuyog Pawar                 INSERT_NEW_NODE(
253*c83a76b0SSuyog Pawar                     ps_unique_search_nodes,
254*c83a76b0SSuyog Pawar                     pi4_num_unique_nodes[0],
255*c83a76b0SSuyog Pawar                     s_refine_grid_center,
256*c83a76b0SSuyog Pawar                     1,
257*c83a76b0SSuyog Pawar                     pu4_unique_node_map,
258*c83a76b0SSuyog Pawar                     i4_unique_node_map_center_x,
259*c83a76b0SSuyog Pawar                     i4_unique_node_map_center_y,
260*c83a76b0SSuyog Pawar                     0);
261*c83a76b0SSuyog Pawar             }
262*c83a76b0SSuyog Pawar         }
263*c83a76b0SSuyog Pawar     }
264*c83a76b0SSuyog Pawar     else
265*c83a76b0SSuyog Pawar     {
266*c83a76b0SSuyog Pawar         for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
267*c83a76b0SSuyog Pawar         {
268*c83a76b0SSuyog Pawar             S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
269*c83a76b0SSuyog Pawar             S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
270*c83a76b0SSuyog Pawar 
271*c83a76b0SSuyog Pawar             if(i1_offset_x || i1_offset_y)
272*c83a76b0SSuyog Pawar             {
273*c83a76b0SSuyog Pawar                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx + i1_offset_x;
274*c83a76b0SSuyog Pawar                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy + i1_offset_y;
275*c83a76b0SSuyog Pawar                 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
276*c83a76b0SSuyog Pawar             }
277*c83a76b0SSuyog Pawar             else if(u1_add_refine_grid_center_to_search_cand_array)
278*c83a76b0SSuyog Pawar             {
279*c83a76b0SSuyog Pawar                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx;
280*c83a76b0SSuyog Pawar                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy;
281*c83a76b0SSuyog Pawar                 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
282*c83a76b0SSuyog Pawar             }
283*c83a76b0SSuyog Pawar         }
284*c83a76b0SSuyog Pawar     }
285*c83a76b0SSuyog Pawar }
286*c83a76b0SSuyog Pawar 
hme_fullpel_refine(refine_prms_t * ps_refine_prms,hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,U32 * pu4_unique_node_map,U08 u1_num_init_search_cands,U08 u1_8x8_blk_mask,S32 i4_unique_node_map_center_x,S32 i4_unique_node_map_center_y,S08 i1_unique_node_map_ref_idx,ME_QUALITY_PRESETS_T e_quality_preset,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)287*c83a76b0SSuyog Pawar void hme_fullpel_refine(
288*c83a76b0SSuyog Pawar     refine_prms_t *ps_refine_prms,
289*c83a76b0SSuyog Pawar     hme_search_prms_t *ps_search_prms,
290*c83a76b0SSuyog Pawar     layer_ctxt_t *ps_layer_ctxt,
291*c83a76b0SSuyog Pawar     wgt_pred_ctxt_t *ps_wt_inp_prms,
292*c83a76b0SSuyog Pawar     U32 *pu4_unique_node_map,
293*c83a76b0SSuyog Pawar     U08 u1_num_init_search_cands,
294*c83a76b0SSuyog Pawar     U08 u1_8x8_blk_mask,
295*c83a76b0SSuyog Pawar     S32 i4_unique_node_map_center_x,
296*c83a76b0SSuyog Pawar     S32 i4_unique_node_map_center_y,
297*c83a76b0SSuyog Pawar     S08 i1_unique_node_map_ref_idx,
298*c83a76b0SSuyog Pawar     ME_QUALITY_PRESETS_T e_quality_preset,
299*c83a76b0SSuyog Pawar     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
300*c83a76b0SSuyog Pawar {
301*c83a76b0SSuyog Pawar     S32 i, j;
302*c83a76b0SSuyog Pawar     S32 i4_num_results;
303*c83a76b0SSuyog Pawar     U08 u1_num_complete_grids = 0;
304*c83a76b0SSuyog Pawar     U08 u1_num_grids = 0;
305*c83a76b0SSuyog Pawar 
306*c83a76b0SSuyog Pawar     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
307*c83a76b0SSuyog Pawar 
308*c83a76b0SSuyog Pawar     S32 i4_num_unique_nodes = 0;
309*c83a76b0SSuyog Pawar 
310*c83a76b0SSuyog Pawar     search_node_t *ps_unique_search_nodes = ps_search_prms->ps_search_nodes;
311*c83a76b0SSuyog Pawar 
312*c83a76b0SSuyog Pawar     if(u1_num_init_search_cands >= 2)
313*c83a76b0SSuyog Pawar     {
314*c83a76b0SSuyog Pawar         S32 i4_max_num_results = (15 == u1_8x8_blk_mask)
315*c83a76b0SSuyog Pawar                                      ? ps_refine_prms->u1_max_num_fpel_refine_centers
316*c83a76b0SSuyog Pawar                                      : ((ME_XTREME_SPEED_25 == e_quality_preset)
317*c83a76b0SSuyog Pawar                                             ? MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25
318*c83a76b0SSuyog Pawar                                             : INT_MAX);
319*c83a76b0SSuyog Pawar 
320*c83a76b0SSuyog Pawar         for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
321*c83a76b0SSuyog Pawar         {
322*c83a76b0SSuyog Pawar             S32 i4_part_id;
323*c83a76b0SSuyog Pawar             S32 i4_index;
324*c83a76b0SSuyog Pawar 
325*c83a76b0SSuyog Pawar             i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
326*c83a76b0SSuyog Pawar             i4_index = (ps_fullpel_refine_ctxt->i4_num_valid_parts > 8) ? i4_part_id : i;
327*c83a76b0SSuyog Pawar             i4_num_results = (15 == u1_8x8_blk_mask)
328*c83a76b0SSuyog Pawar                                  ? MIN(ps_search_prms->ps_search_results->u1_num_results_per_part,
329*c83a76b0SSuyog Pawar                                        ps_refine_prms->pu1_num_best_results[i4_part_id])
330*c83a76b0SSuyog Pawar                                  : ps_search_prms->ps_search_results->u1_num_results_per_part;
331*c83a76b0SSuyog Pawar 
332*c83a76b0SSuyog Pawar             ASSERT(i4_num_results <= 2);
333*c83a76b0SSuyog Pawar 
334*c83a76b0SSuyog Pawar             for(j = 0; j < i4_num_results; j++)
335*c83a76b0SSuyog Pawar             {
336*c83a76b0SSuyog Pawar                 if((ps_fullpel_refine_ctxt->i2_ref_idx[j][i4_index] >= 0) &&
337*c83a76b0SSuyog Pawar                    (ps_fullpel_refine_ctxt->i2_mv_x[j][i4_index] != INTRA_MV))
338*c83a76b0SSuyog Pawar                 {
339*c83a76b0SSuyog Pawar                     S32 i4_num_nodes_added = i4_num_unique_nodes;
340*c83a76b0SSuyog Pawar 
341*c83a76b0SSuyog Pawar                     hme_add_fpel_refine_candidates_to_search_cand_array(
342*c83a76b0SSuyog Pawar                         ps_unique_search_nodes,
343*c83a76b0SSuyog Pawar                         ps_fullpel_refine_ctxt,
344*c83a76b0SSuyog Pawar                         &i4_num_unique_nodes,
345*c83a76b0SSuyog Pawar                         pu4_unique_node_map,
346*c83a76b0SSuyog Pawar                         j,
347*c83a76b0SSuyog Pawar                         i4_index,
348*c83a76b0SSuyog Pawar                         i4_unique_node_map_center_x,
349*c83a76b0SSuyog Pawar                         i4_unique_node_map_center_y,
350*c83a76b0SSuyog Pawar                         i1_unique_node_map_ref_idx,
351*c83a76b0SSuyog Pawar                         0,
352*c83a76b0SSuyog Pawar                         0);
353*c83a76b0SSuyog Pawar 
354*c83a76b0SSuyog Pawar                     i4_num_nodes_added = i4_num_unique_nodes - i4_num_nodes_added;
355*c83a76b0SSuyog Pawar 
356*c83a76b0SSuyog Pawar                     u1_num_complete_grids +=
357*c83a76b0SSuyog Pawar                         (i4_num_nodes_added >= (NUM_POINTS_IN_RECTANGULAR_GRID - 1));
358*c83a76b0SSuyog Pawar                     u1_num_grids += (!!i4_num_nodes_added);
359*c83a76b0SSuyog Pawar 
360*c83a76b0SSuyog Pawar                     i4_max_num_results--;
361*c83a76b0SSuyog Pawar                 }
362*c83a76b0SSuyog Pawar 
363*c83a76b0SSuyog Pawar                 if(i4_max_num_results <= 0)
364*c83a76b0SSuyog Pawar                 {
365*c83a76b0SSuyog Pawar                     break;
366*c83a76b0SSuyog Pawar                 }
367*c83a76b0SSuyog Pawar             }
368*c83a76b0SSuyog Pawar 
369*c83a76b0SSuyog Pawar             if(i4_max_num_results <= 0)
370*c83a76b0SSuyog Pawar             {
371*c83a76b0SSuyog Pawar                 break;
372*c83a76b0SSuyog Pawar             }
373*c83a76b0SSuyog Pawar         }
374*c83a76b0SSuyog Pawar     }
375*c83a76b0SSuyog Pawar     else if((1 == u1_num_init_search_cands) && (ps_refine_prms->u1_max_num_fpel_refine_centers >= 1))
376*c83a76b0SSuyog Pawar     {
377*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i2_mv_x[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvx;
378*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i2_mv_y[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvy;
379*c83a76b0SSuyog Pawar         ps_fullpel_refine_ctxt->i2_ref_idx[0][0] = ps_unique_search_nodes[0].i1_ref_idx;
380*c83a76b0SSuyog Pawar 
381*c83a76b0SSuyog Pawar         if((ps_fullpel_refine_ctxt->i2_ref_idx[0][0] >= 0) &&
382*c83a76b0SSuyog Pawar            (ps_fullpel_refine_ctxt->i2_mv_x[0][0] != INTRA_MV))
383*c83a76b0SSuyog Pawar         {
384*c83a76b0SSuyog Pawar             hme_add_fpel_refine_candidates_to_search_cand_array(
385*c83a76b0SSuyog Pawar                 ps_unique_search_nodes,
386*c83a76b0SSuyog Pawar                 ps_fullpel_refine_ctxt,
387*c83a76b0SSuyog Pawar                 &i4_num_unique_nodes,
388*c83a76b0SSuyog Pawar                 pu4_unique_node_map,
389*c83a76b0SSuyog Pawar                 0,
390*c83a76b0SSuyog Pawar                 0,
391*c83a76b0SSuyog Pawar                 i4_unique_node_map_center_x,
392*c83a76b0SSuyog Pawar                 i4_unique_node_map_center_y,
393*c83a76b0SSuyog Pawar                 i1_unique_node_map_ref_idx,
394*c83a76b0SSuyog Pawar                 1,
395*c83a76b0SSuyog Pawar                 1);
396*c83a76b0SSuyog Pawar 
397*c83a76b0SSuyog Pawar             u1_num_complete_grids++;
398*c83a76b0SSuyog Pawar         }
399*c83a76b0SSuyog Pawar     }
400*c83a76b0SSuyog Pawar 
401*c83a76b0SSuyog Pawar     if(i4_num_unique_nodes > 0)
402*c83a76b0SSuyog Pawar     {
403*c83a76b0SSuyog Pawar         ps_search_prms->i4_num_search_nodes = i4_num_unique_nodes;
404*c83a76b0SSuyog Pawar         ps_search_prms->u1_is_cu_noisy = 0;
405*c83a76b0SSuyog Pawar 
406*c83a76b0SSuyog Pawar         hme_pred_search(
407*c83a76b0SSuyog Pawar             ps_search_prms,
408*c83a76b0SSuyog Pawar             ps_layer_ctxt,
409*c83a76b0SSuyog Pawar             ps_wt_inp_prms,
410*c83a76b0SSuyog Pawar             (1 == u1_num_complete_grids) && (u1_num_grids == u1_num_complete_grids),
411*c83a76b0SSuyog Pawar             ps_me_optimised_function_list
412*c83a76b0SSuyog Pawar 
413*c83a76b0SSuyog Pawar         );
414*c83a76b0SSuyog Pawar     }
415*c83a76b0SSuyog Pawar }
416*c83a76b0SSuyog Pawar 
417*c83a76b0SSuyog Pawar /**
418*c83a76b0SSuyog Pawar ********************************************************************************
419*c83a76b0SSuyog Pawar *  @fn     hme_remove_duplicate_fpel_search_candidates
420*c83a76b0SSuyog Pawar *
421*c83a76b0SSuyog Pawar *  @brief  Function name is self-explanatory
422*c83a76b0SSuyog Pawar *
423*c83a76b0SSuyog Pawar *  @return Number of unique candidates
424*c83a76b0SSuyog Pawar ********************************************************************************
425*c83a76b0SSuyog Pawar */
hme_remove_duplicate_fpel_search_candidates(search_node_t * ps_unique_search_nodes,search_candt_t * ps_search_candts,U32 * pu4_unique_node_map,S08 * pi1_pred_dir_to_ref_idx,S32 i4_num_srch_cands,S32 i4_num_init_candts,S32 i4_refine_iter_ctr,S32 i4_num_refinement_iterations,S32 i4_num_act_ref_l0,S08 i1_unique_node_map_ref_idx,S32 i4_unique_node_map_center_x,S32 i4_unique_node_map_center_y,U08 u1_is_bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)426*c83a76b0SSuyog Pawar S32 hme_remove_duplicate_fpel_search_candidates(
427*c83a76b0SSuyog Pawar     search_node_t *ps_unique_search_nodes,
428*c83a76b0SSuyog Pawar     search_candt_t *ps_search_candts,
429*c83a76b0SSuyog Pawar     U32 *pu4_unique_node_map,
430*c83a76b0SSuyog Pawar     S08 *pi1_pred_dir_to_ref_idx,
431*c83a76b0SSuyog Pawar     S32 i4_num_srch_cands,
432*c83a76b0SSuyog Pawar     S32 i4_num_init_candts,
433*c83a76b0SSuyog Pawar     S32 i4_refine_iter_ctr,
434*c83a76b0SSuyog Pawar     S32 i4_num_refinement_iterations,
435*c83a76b0SSuyog Pawar     S32 i4_num_act_ref_l0,
436*c83a76b0SSuyog Pawar     S08 i1_unique_node_map_ref_idx,
437*c83a76b0SSuyog Pawar     S32 i4_unique_node_map_center_x,
438*c83a76b0SSuyog Pawar     S32 i4_unique_node_map_center_y,
439*c83a76b0SSuyog Pawar     U08 u1_is_bidir_enabled,
440*c83a76b0SSuyog Pawar     ME_QUALITY_PRESETS_T e_quality_preset)
441*c83a76b0SSuyog Pawar {
442*c83a76b0SSuyog Pawar     S32 i;
443*c83a76b0SSuyog Pawar 
444*c83a76b0SSuyog Pawar     S32 i4_max_num_cands = ((!u1_is_bidir_enabled) && (i4_num_act_ref_l0 > 1))
445*c83a76b0SSuyog Pawar                                ? (i4_num_init_candts >> 1)
446*c83a76b0SSuyog Pawar                                : i4_num_init_candts;
447*c83a76b0SSuyog Pawar     S32 i4_num_unique_nodes = 0;
448*c83a76b0SSuyog Pawar 
449*c83a76b0SSuyog Pawar     for(i = 0; (i < i4_num_srch_cands) && (i4_num_unique_nodes < i4_max_num_cands); i++)
450*c83a76b0SSuyog Pawar     {
451*c83a76b0SSuyog Pawar         search_node_t *ps_cur_cand = ps_search_candts[i].ps_search_node;
452*c83a76b0SSuyog Pawar 
453*c83a76b0SSuyog Pawar         U08 u1_use_hashing = (ps_cur_cand->i1_ref_idx == i1_unique_node_map_ref_idx);
454*c83a76b0SSuyog Pawar 
455*c83a76b0SSuyog Pawar         if(i4_num_refinement_iterations > 1)
456*c83a76b0SSuyog Pawar         {
457*c83a76b0SSuyog Pawar #if !ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
458*c83a76b0SSuyog Pawar             /* Ref0 evaluated during the first iteration */
459*c83a76b0SSuyog Pawar             /* All other Ref's evaluated during the second iteration */
460*c83a76b0SSuyog Pawar             if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) && (i4_refine_iter_ctr == 0))
461*c83a76b0SSuyog Pawar             {
462*c83a76b0SSuyog Pawar                 continue;
463*c83a76b0SSuyog Pawar             }
464*c83a76b0SSuyog Pawar #else
465*c83a76b0SSuyog Pawar             if(e_quality_preset == ME_HIGH_QUALITY)
466*c83a76b0SSuyog Pawar             {
467*c83a76b0SSuyog Pawar                 if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) &&
468*c83a76b0SSuyog Pawar                    (i4_refine_iter_ctr == 0))
469*c83a76b0SSuyog Pawar                 {
470*c83a76b0SSuyog Pawar                     continue;
471*c83a76b0SSuyog Pawar                 }
472*c83a76b0SSuyog Pawar             }
473*c83a76b0SSuyog Pawar             else
474*c83a76b0SSuyog Pawar             {
475*c83a76b0SSuyog Pawar                 if(ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr])
476*c83a76b0SSuyog Pawar                 {
477*c83a76b0SSuyog Pawar                     continue;
478*c83a76b0SSuyog Pawar                 }
479*c83a76b0SSuyog Pawar             }
480*c83a76b0SSuyog Pawar #endif
481*c83a76b0SSuyog Pawar         }
482*c83a76b0SSuyog Pawar 
483*c83a76b0SSuyog Pawar         INSERT_UNIQUE_NODE(
484*c83a76b0SSuyog Pawar             ps_unique_search_nodes,
485*c83a76b0SSuyog Pawar             i4_num_unique_nodes,
486*c83a76b0SSuyog Pawar             ps_cur_cand[0],
487*c83a76b0SSuyog Pawar             pu4_unique_node_map,
488*c83a76b0SSuyog Pawar             i4_unique_node_map_center_x,
489*c83a76b0SSuyog Pawar             i4_unique_node_map_center_y,
490*c83a76b0SSuyog Pawar             u1_use_hashing);
491*c83a76b0SSuyog Pawar     }
492*c83a76b0SSuyog Pawar 
493*c83a76b0SSuyog Pawar     return i4_num_unique_nodes;
494*c83a76b0SSuyog Pawar }
495