1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar
21*c83a76b0SSuyog Pawar /*****************************************************************************/
22*c83a76b0SSuyog Pawar /* File Includes */
23*c83a76b0SSuyog Pawar /*****************************************************************************/
24*c83a76b0SSuyog Pawar /* System include files */
25*c83a76b0SSuyog Pawar #include <stdio.h>
26*c83a76b0SSuyog Pawar #include <string.h>
27*c83a76b0SSuyog Pawar #include <stdlib.h>
28*c83a76b0SSuyog Pawar #include <assert.h>
29*c83a76b0SSuyog Pawar #include <stdarg.h>
30*c83a76b0SSuyog Pawar #include <math.h>
31*c83a76b0SSuyog Pawar #include <limits.h>
32*c83a76b0SSuyog Pawar
33*c83a76b0SSuyog Pawar /* User include files */
34*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
35*c83a76b0SSuyog Pawar #include "itt_video_api.h"
36*c83a76b0SSuyog Pawar #include "ihevce_api.h"
37*c83a76b0SSuyog Pawar
38*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
39*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
40*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
41*c83a76b0SSuyog Pawar
42*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
43*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
44*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
45*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
46*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
47*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
48*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
49*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
50*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
51*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
52*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
53*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
54*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
55*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
56*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
57*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
58*c83a76b0SSuyog Pawar
59*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
60*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
61*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
62*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
63*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
64*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
65*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
66*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
67*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
68*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
69*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
70*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
71*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
72*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
73*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
74*c83a76b0SSuyog Pawar #include "ihevce_inter_pred.h"
75*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
76*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
77*c83a76b0SSuyog Pawar #include "hme_datatype.h"
78*c83a76b0SSuyog Pawar #include "hme_interface.h"
79*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
80*c83a76b0SSuyog Pawar #include "hme_defs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
82*c83a76b0SSuyog Pawar #include "hme_globals.h"
83*c83a76b0SSuyog Pawar #include "hme_utils.h"
84*c83a76b0SSuyog Pawar #include "hme_coarse.h"
85*c83a76b0SSuyog Pawar #include "hme_fullpel.h"
86*c83a76b0SSuyog Pawar #include "hme_subpel.h"
87*c83a76b0SSuyog Pawar #include "hme_refine.h"
88*c83a76b0SSuyog Pawar #include "hme_err_compute.h"
89*c83a76b0SSuyog Pawar #include "hme_common_utils.h"
90*c83a76b0SSuyog Pawar #include "hme_search_algo.h"
91*c83a76b0SSuyog Pawar #include "ihevce_stasino_helpers.h"
92*c83a76b0SSuyog Pawar #include "ihevce_common_utils.h"
93*c83a76b0SSuyog Pawar
94*c83a76b0SSuyog Pawar /*****************************************************************************/
95*c83a76b0SSuyog Pawar /* Macros */
96*c83a76b0SSuyog Pawar /*****************************************************************************/
97*c83a76b0SSuyog Pawar #define UNI_SATD_SCALE 1
98*c83a76b0SSuyog Pawar
99*c83a76b0SSuyog Pawar /*****************************************************************************/
100*c83a76b0SSuyog Pawar /* Function Definitions */
101*c83a76b0SSuyog Pawar /*****************************************************************************/
ihevce_open_loop_pred_data(me_frm_ctxt_t * ps_ctxt,inter_pu_results_t * ps_pu_results,U08 * pu1_src,U08 * pu1_temp_pred,S32 stride,S32 src_strd,UWORD8 e_part_id)102*c83a76b0SSuyog Pawar void ihevce_open_loop_pred_data(
103*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt,
104*c83a76b0SSuyog Pawar inter_pu_results_t *ps_pu_results,
105*c83a76b0SSuyog Pawar U08 *pu1_src,
106*c83a76b0SSuyog Pawar U08 *pu1_temp_pred,
107*c83a76b0SSuyog Pawar S32 stride,
108*c83a76b0SSuyog Pawar S32 src_strd,
109*c83a76b0SSuyog Pawar UWORD8 e_part_id)
110*c83a76b0SSuyog Pawar {
111*c83a76b0SSuyog Pawar S32 best_sad_l0 = -1, best_sad_l1 = -1;
112*c83a76b0SSuyog Pawar S32 sad_diff, status;
113*c83a76b0SSuyog Pawar inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt;
114*c83a76b0SSuyog Pawar U08 enable_bi = 0;
115*c83a76b0SSuyog Pawar pu_t s_pu;
116*c83a76b0SSuyog Pawar
117*c83a76b0SSuyog Pawar ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt;
118*c83a76b0SSuyog Pawar ps_ctxt->i4_count++;
119*c83a76b0SSuyog Pawar /* L0*/
120*c83a76b0SSuyog Pawar if(ps_pu_results->u1_num_results_per_part_l0[e_part_id])
121*c83a76b0SSuyog Pawar {
122*c83a76b0SSuyog Pawar pu_result_t *ps_best_l0_pu;
123*c83a76b0SSuyog Pawar ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N];
124*c83a76b0SSuyog Pawar best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost;
125*c83a76b0SSuyog Pawar s_pu.b2_pred_mode = PRED_L0;
126*c83a76b0SSuyog Pawar s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht;
127*c83a76b0SSuyog Pawar s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd;
128*c83a76b0SSuyog Pawar s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x;
129*c83a76b0SSuyog Pawar s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y;
130*c83a76b0SSuyog Pawar s_pu.b1_intra_flag = 0;
131*c83a76b0SSuyog Pawar s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx;
132*c83a76b0SSuyog Pawar s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy;
133*c83a76b0SSuyog Pawar s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx;
134*c83a76b0SSuyog Pawar }
135*c83a76b0SSuyog Pawar /*L1*/
136*c83a76b0SSuyog Pawar if(ps_pu_results->u1_num_results_per_part_l1[e_part_id])
137*c83a76b0SSuyog Pawar {
138*c83a76b0SSuyog Pawar pu_result_t *ps_best_l1_pu;
139*c83a76b0SSuyog Pawar ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N];
140*c83a76b0SSuyog Pawar best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost;
141*c83a76b0SSuyog Pawar s_pu.b2_pred_mode = PRED_L1;
142*c83a76b0SSuyog Pawar s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht;
143*c83a76b0SSuyog Pawar s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd;
144*c83a76b0SSuyog Pawar s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x;
145*c83a76b0SSuyog Pawar s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y;
146*c83a76b0SSuyog Pawar s_pu.b1_intra_flag = 0;
147*c83a76b0SSuyog Pawar s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx;
148*c83a76b0SSuyog Pawar s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy;
149*c83a76b0SSuyog Pawar s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx;
150*c83a76b0SSuyog Pawar }
151*c83a76b0SSuyog Pawar ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1));
152*c83a76b0SSuyog Pawar /*bi selection*/
153*c83a76b0SSuyog Pawar if((best_sad_l0 != -1) && (best_sad_l1 != -1))
154*c83a76b0SSuyog Pawar {
155*c83a76b0SSuyog Pawar sad_diff = abs(best_sad_l0 - best_sad_l1);
156*c83a76b0SSuyog Pawar if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15)))
157*c83a76b0SSuyog Pawar {
158*c83a76b0SSuyog Pawar enable_bi = 1;
159*c83a76b0SSuyog Pawar s_pu.b2_pred_mode = PRED_BI;
160*c83a76b0SSuyog Pawar }
161*c83a76b0SSuyog Pawar if(!enable_bi)
162*c83a76b0SSuyog Pawar {
163*c83a76b0SSuyog Pawar if(best_sad_l0 < best_sad_l1)
164*c83a76b0SSuyog Pawar {
165*c83a76b0SSuyog Pawar s_pu.b2_pred_mode = PRED_L0;
166*c83a76b0SSuyog Pawar }
167*c83a76b0SSuyog Pawar else
168*c83a76b0SSuyog Pawar {
169*c83a76b0SSuyog Pawar s_pu.b2_pred_mode = PRED_L1;
170*c83a76b0SSuyog Pawar }
171*c83a76b0SSuyog Pawar }
172*c83a76b0SSuyog Pawar }
173*c83a76b0SSuyog Pawar status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1);
174*c83a76b0SSuyog Pawar if(status == -1)
175*c83a76b0SSuyog Pawar {
176*c83a76b0SSuyog Pawar ASSERT(0);
177*c83a76b0SSuyog Pawar }
178*c83a76b0SSuyog Pawar }
179*c83a76b0SSuyog Pawar
180*c83a76b0SSuyog Pawar /**
181*c83a76b0SSuyog Pawar ********************************************************************************
182*c83a76b0SSuyog Pawar * @fn void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
183*c83a76b0SSuyog Pawar *
184*c83a76b0SSuyog Pawar * @brief Allocates a block of size = i4_size from working memory and returns
185*c83a76b0SSuyog Pawar *
186*c83a76b0SSuyog Pawar * @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
187*c83a76b0SSuyog Pawar *
188*c83a76b0SSuyog Pawar * @param[in] i4_size : size required
189*c83a76b0SSuyog Pawar *
190*c83a76b0SSuyog Pawar * @return void pointer to allocated memory, NULL if failure
191*c83a76b0SSuyog Pawar ********************************************************************************
192*c83a76b0SSuyog Pawar */
hme_get_wkg_mem(buf_mgr_t * ps_buf_mgr,S32 i4_size)193*c83a76b0SSuyog Pawar void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
194*c83a76b0SSuyog Pawar {
195*c83a76b0SSuyog Pawar U08 *pu1_mem;
196*c83a76b0SSuyog Pawar
197*c83a76b0SSuyog Pawar if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total)
198*c83a76b0SSuyog Pawar return NULL;
199*c83a76b0SSuyog Pawar
200*c83a76b0SSuyog Pawar pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used;
201*c83a76b0SSuyog Pawar ps_buf_mgr->i4_used += i4_size;
202*c83a76b0SSuyog Pawar
203*c83a76b0SSuyog Pawar return ((void *)pu1_mem);
204*c83a76b0SSuyog Pawar }
205*c83a76b0SSuyog Pawar
206*c83a76b0SSuyog Pawar /**
207*c83a76b0SSuyog Pawar ********************************************************************************
208*c83a76b0SSuyog Pawar * @fn hme_init_histogram(
209*c83a76b0SSuyog Pawar *
210*c83a76b0SSuyog Pawar * @brief Top level entry point for Coarse ME. Runs across blocks and does the
211*c83a76b0SSuyog Pawar * needful by calling other low level routines.
212*c83a76b0SSuyog Pawar *
213*c83a76b0SSuyog Pawar * @param[in,out] ps_hist : the histogram structure
214*c83a76b0SSuyog Pawar *
215*c83a76b0SSuyog Pawar * @param[in] i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
216*c83a76b0SSuyog Pawar *
217*c83a76b0SSuyog Pawar * @param[in] i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
218*c83a76b0SSuyog Pawar *
219*c83a76b0SSuyog Pawar * @return None
220*c83a76b0SSuyog Pawar ********************************************************************************
221*c83a76b0SSuyog Pawar */
222*c83a76b0SSuyog Pawar
hme_init_histogram(mv_hist_t * ps_hist,S32 i4_max_mv_x,S32 i4_max_mv_y)223*c83a76b0SSuyog Pawar void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y)
224*c83a76b0SSuyog Pawar {
225*c83a76b0SSuyog Pawar S32 i4_num_bins, i4_num_cols, i4_num_rows;
226*c83a76b0SSuyog Pawar S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val;
227*c83a76b0SSuyog Pawar
228*c83a76b0SSuyog Pawar /*************************************************************************/
229*c83a76b0SSuyog Pawar /* Evaluate the shift_x and shift_y. For this, we use the following logic*/
230*c83a76b0SSuyog Pawar /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is */
231*c83a76b0SSuyog Pawar /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/
232*c83a76b0SSuyog Pawar /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/
233*c83a76b0SSuyog Pawar /* if above quantity is negative, then we make it zero. */
234*c83a76b0SSuyog Pawar /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */
235*c83a76b0SSuyog Pawar /*************************************************************************/
236*c83a76b0SSuyog Pawar i4_val = i4_max_mv_x * i4_max_mv_y * 4;
237*c83a76b0SSuyog Pawar i4_range = (hme_get_range(i4_val - 1)) + 1;
238*c83a76b0SSuyog Pawar if(i4_range > LOG_MAX_NUM_BINS)
239*c83a76b0SSuyog Pawar {
240*c83a76b0SSuyog Pawar i4_shift_y = (i4_range - LOG_MAX_NUM_BINS);
241*c83a76b0SSuyog Pawar i4_shift_x = (i4_shift_y + 1) >> 1;
242*c83a76b0SSuyog Pawar i4_shift_y >>= 1;
243*c83a76b0SSuyog Pawar }
244*c83a76b0SSuyog Pawar else
245*c83a76b0SSuyog Pawar {
246*c83a76b0SSuyog Pawar i4_shift_y = 0;
247*c83a76b0SSuyog Pawar i4_shift_x = 0;
248*c83a76b0SSuyog Pawar }
249*c83a76b0SSuyog Pawar
250*c83a76b0SSuyog Pawar /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */
251*c83a76b0SSuyog Pawar /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */
252*c83a76b0SSuyog Pawar /* this helps us compute num bins that are active for this histo session */
253*c83a76b0SSuyog Pawar i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x;
254*c83a76b0SSuyog Pawar i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y;
255*c83a76b0SSuyog Pawar i4_num_bins = i4_num_rows * i4_num_cols;
256*c83a76b0SSuyog Pawar
257*c83a76b0SSuyog Pawar ASSERT(i4_num_bins <= MAX_NUM_BINS);
258*c83a76b0SSuyog Pawar
259*c83a76b0SSuyog Pawar ps_hist->i4_num_rows = i4_num_rows;
260*c83a76b0SSuyog Pawar ps_hist->i4_num_cols = i4_num_cols;
261*c83a76b0SSuyog Pawar ps_hist->i4_min_x = -i4_max_mv_x;
262*c83a76b0SSuyog Pawar ps_hist->i4_min_y = -i4_max_mv_y;
263*c83a76b0SSuyog Pawar ps_hist->i4_shift_x = i4_shift_x;
264*c83a76b0SSuyog Pawar ps_hist->i4_shift_y = i4_shift_y;
265*c83a76b0SSuyog Pawar ps_hist->i4_lobe1_size = 5;
266*c83a76b0SSuyog Pawar ps_hist->i4_lobe2_size = 3;
267*c83a76b0SSuyog Pawar
268*c83a76b0SSuyog Pawar ps_hist->i4_num_bins = i4_num_bins;
269*c83a76b0SSuyog Pawar
270*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_bins; i++)
271*c83a76b0SSuyog Pawar {
272*c83a76b0SSuyog Pawar ps_hist->ai4_bin_count[i] = 0;
273*c83a76b0SSuyog Pawar }
274*c83a76b0SSuyog Pawar }
275*c83a76b0SSuyog Pawar
276*c83a76b0SSuyog Pawar /**
277*c83a76b0SSuyog Pawar ********************************************************************************
278*c83a76b0SSuyog Pawar * @fn hme_update_histogram(
279*c83a76b0SSuyog Pawar *
280*c83a76b0SSuyog Pawar * @brief Updates the histogram given an mv entry
281*c83a76b0SSuyog Pawar *
282*c83a76b0SSuyog Pawar * @param[in,out] ps_hist : the histogram structure
283*c83a76b0SSuyog Pawar *
284*c83a76b0SSuyog Pawar * @param[in] i4_mv_x : x component of the mv (fpel units)
285*c83a76b0SSuyog Pawar *
286*c83a76b0SSuyog Pawar * @param[in] i4_mv_y : y component of the mv (fpel units)
287*c83a76b0SSuyog Pawar *
288*c83a76b0SSuyog Pawar * @return None
289*c83a76b0SSuyog Pawar ********************************************************************************
290*c83a76b0SSuyog Pawar */
hme_update_histogram(mv_hist_t * ps_hist,S32 i4_mv_x,S32 i4_mv_y)291*c83a76b0SSuyog Pawar void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y)
292*c83a76b0SSuyog Pawar {
293*c83a76b0SSuyog Pawar S32 i4_bin_index, i4_col, i4_row;
294*c83a76b0SSuyog Pawar
295*c83a76b0SSuyog Pawar i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x;
296*c83a76b0SSuyog Pawar i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y;
297*c83a76b0SSuyog Pawar
298*c83a76b0SSuyog Pawar i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols);
299*c83a76b0SSuyog Pawar /* Sanity Check */
300*c83a76b0SSuyog Pawar ASSERT(i4_bin_index < MAX_NUM_BINS);
301*c83a76b0SSuyog Pawar
302*c83a76b0SSuyog Pawar ps_hist->ai4_bin_count[i4_bin_index]++;
303*c83a76b0SSuyog Pawar }
304*c83a76b0SSuyog Pawar
305*c83a76b0SSuyog Pawar /**
306*c83a76b0SSuyog Pawar ********************************************************************************
307*c83a76b0SSuyog Pawar * @fn hme_get_global_mv(
308*c83a76b0SSuyog Pawar *
309*c83a76b0SSuyog Pawar * @brief returns the global mv of a previous picture. Accounts for the fact
310*c83a76b0SSuyog Pawar * that the delta poc of the previous picture may have been different
311*c83a76b0SSuyog Pawar * from delta poc of current picture. Delta poc is POC difference
312*c83a76b0SSuyog Pawar * between a picture and its reference.
313*c83a76b0SSuyog Pawar *
314*c83a76b0SSuyog Pawar * @param[out] ps_mv: mv_t structure where the motion vector is returned
315*c83a76b0SSuyog Pawar *
316*c83a76b0SSuyog Pawar * @param[in] i4_delta_poc: the delta poc for the current pic w.r.t. reference
317*c83a76b0SSuyog Pawar *
318*c83a76b0SSuyog Pawar * @return None
319*c83a76b0SSuyog Pawar ********************************************************************************
320*c83a76b0SSuyog Pawar */
hme_get_global_mv(layer_ctxt_t * ps_prev_layer,hme_mv_t * ps_mv,S32 i4_delta_poc)321*c83a76b0SSuyog Pawar void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc)
322*c83a76b0SSuyog Pawar {
323*c83a76b0SSuyog Pawar S16 i2_mv_x, i2_mv_y;
324*c83a76b0SSuyog Pawar S32 i4_delta_poc_prev;
325*c83a76b0SSuyog Pawar S32 i4_poc_prev = ps_prev_layer->i4_poc;
326*c83a76b0SSuyog Pawar S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0];
327*c83a76b0SSuyog Pawar
328*c83a76b0SSuyog Pawar i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref;
329*c83a76b0SSuyog Pawar i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x;
330*c83a76b0SSuyog Pawar i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y;
331*c83a76b0SSuyog Pawar
332*c83a76b0SSuyog Pawar i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev);
333*c83a76b0SSuyog Pawar i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev);
334*c83a76b0SSuyog Pawar
335*c83a76b0SSuyog Pawar ps_mv->i2_mv_x = i2_mv_x;
336*c83a76b0SSuyog Pawar ps_mv->i2_mv_y = i2_mv_y;
337*c83a76b0SSuyog Pawar }
338*c83a76b0SSuyog Pawar
339*c83a76b0SSuyog Pawar /**
340*c83a76b0SSuyog Pawar ********************************************************************************
341*c83a76b0SSuyog Pawar * @fn hme_calculate_global_mv(
342*c83a76b0SSuyog Pawar *
343*c83a76b0SSuyog Pawar * @brief Calculates global mv for a given histogram
344*c83a76b0SSuyog Pawar *
345*c83a76b0SSuyog Pawar * @param[in] ps_hist : the histogram structure
346*c83a76b0SSuyog Pawar *
347*c83a76b0SSuyog Pawar * @param[in] ps_mv : used to return the global mv
348*c83a76b0SSuyog Pawar *
349*c83a76b0SSuyog Pawar * @param[in] e_lobe_type : refer to GMV_MVTYPE_T
350*c83a76b0SSuyog Pawar *
351*c83a76b0SSuyog Pawar * @return None
352*c83a76b0SSuyog Pawar ********************************************************************************
353*c83a76b0SSuyog Pawar */
hme_calculate_global_mv(mv_hist_t * ps_hist,hme_mv_t * ps_mv,GMV_MVTYPE_T e_lobe_type)354*c83a76b0SSuyog Pawar void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type)
355*c83a76b0SSuyog Pawar {
356*c83a76b0SSuyog Pawar S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count;
357*c83a76b0SSuyog Pawar S32 i4_max_sum = -1;
358*c83a76b0SSuyog Pawar S32 i4_max_x = 0, i4_max_y = 0;
359*c83a76b0SSuyog Pawar
360*c83a76b0SSuyog Pawar if(e_lobe_type == GMV_THICK_LOBE)
361*c83a76b0SSuyog Pawar i4_lobe_size = ps_hist->i4_lobe1_size;
362*c83a76b0SSuyog Pawar else
363*c83a76b0SSuyog Pawar i4_lobe_size = ps_hist->i4_lobe2_size;
364*c83a76b0SSuyog Pawar
365*c83a76b0SSuyog Pawar i4_offset = i4_lobe_size >> 1;
366*c83a76b0SSuyog Pawar for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++)
367*c83a76b0SSuyog Pawar {
368*c83a76b0SSuyog Pawar for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++)
369*c83a76b0SSuyog Pawar {
370*c83a76b0SSuyog Pawar S32 i4_bin_id, i4_sum;
371*c83a76b0SSuyog Pawar i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols);
372*c83a76b0SSuyog Pawar
373*c83a76b0SSuyog Pawar pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id];
374*c83a76b0SSuyog Pawar i4_sum = hme_compute_2d_sum_unsigned(
375*c83a76b0SSuyog Pawar (void *)pi4_bin_count,
376*c83a76b0SSuyog Pawar i4_lobe_size,
377*c83a76b0SSuyog Pawar i4_lobe_size,
378*c83a76b0SSuyog Pawar ps_hist->i4_num_cols,
379*c83a76b0SSuyog Pawar sizeof(U32));
380*c83a76b0SSuyog Pawar
381*c83a76b0SSuyog Pawar if(i4_sum > i4_max_sum)
382*c83a76b0SSuyog Pawar {
383*c83a76b0SSuyog Pawar i4_max_x = i4_x;
384*c83a76b0SSuyog Pawar i4_max_y = i4_y;
385*c83a76b0SSuyog Pawar i4_max_sum = i4_sum;
386*c83a76b0SSuyog Pawar }
387*c83a76b0SSuyog Pawar }
388*c83a76b0SSuyog Pawar }
389*c83a76b0SSuyog Pawar
390*c83a76b0SSuyog Pawar ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y);
391*c83a76b0SSuyog Pawar ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x);
392*c83a76b0SSuyog Pawar }
393*c83a76b0SSuyog Pawar
394*c83a76b0SSuyog Pawar /**
395*c83a76b0SSuyog Pawar ********************************************************************************
396*c83a76b0SSuyog Pawar * @fn ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
397*c83a76b0SSuyog Pawar *
398*c83a76b0SSuyog Pawar * @brief returns a new ctb node usable for creating a new ctb candidate
399*c83a76b0SSuyog Pawar *
400*c83a76b0SSuyog Pawar * @param[in] ps_mem_mgr : memory manager holding all ctb nodes
401*c83a76b0SSuyog Pawar *
402*c83a76b0SSuyog Pawar * @return NULL if no free nodes, else ptr to the new ctb node
403*c83a76b0SSuyog Pawar ********************************************************************************
404*c83a76b0SSuyog Pawar */
hme_get_ctb_node(ctb_mem_mgr_t * ps_mem_mgr)405*c83a76b0SSuyog Pawar ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
406*c83a76b0SSuyog Pawar {
407*c83a76b0SSuyog Pawar U08 *pu1_ret;
408*c83a76b0SSuyog Pawar if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot)
409*c83a76b0SSuyog Pawar return (NULL);
410*c83a76b0SSuyog Pawar pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used;
411*c83a76b0SSuyog Pawar ps_mem_mgr->i4_used += ps_mem_mgr->i4_size;
412*c83a76b0SSuyog Pawar return ((ctb_node_t *)pu1_ret);
413*c83a76b0SSuyog Pawar }
414*c83a76b0SSuyog Pawar
415*c83a76b0SSuyog Pawar /**
416*c83a76b0SSuyog Pawar ********************************************************************************
417*c83a76b0SSuyog Pawar * @fn hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
418*c83a76b0SSuyog Pawar search_results_t *ps_search_results, S32 i4_num_ref)
419*c83a76b0SSuyog Pawar *
420*c83a76b0SSuyog Pawar * @brief For a given CU whose results are in ps_search_results, the 17x17
421*c83a76b0SSuyog Pawar * mv grid is updated for future use within the CTB
422*c83a76b0SSuyog Pawar *
423*c83a76b0SSuyog Pawar * @param[in] ps_search_results : Search results data structure
424*c83a76b0SSuyog Pawar *
425*c83a76b0SSuyog Pawar * @param[out] pps_mv_grid: The mv grid (as many as num ref)
426*c83a76b0SSuyog Pawar *
427*c83a76b0SSuyog Pawar * @param[in] i4_num_ref: nuber of search iterations to update
428*c83a76b0SSuyog Pawar *
429*c83a76b0SSuyog Pawar * @return None
430*c83a76b0SSuyog Pawar ********************************************************************************
431*c83a76b0SSuyog Pawar */
hme_map_mvs_to_grid(mv_grid_t ** pps_mv_grid,search_results_t * ps_search_results,U08 * pu1_pred_dir_searched,S32 i4_num_pred_dir)432*c83a76b0SSuyog Pawar void hme_map_mvs_to_grid(
433*c83a76b0SSuyog Pawar mv_grid_t **pps_mv_grid,
434*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
435*c83a76b0SSuyog Pawar U08 *pu1_pred_dir_searched,
436*c83a76b0SSuyog Pawar S32 i4_num_pred_dir)
437*c83a76b0SSuyog Pawar {
438*c83a76b0SSuyog Pawar S32 i4_cu_start_offset;
439*c83a76b0SSuyog Pawar /*************************************************************************/
440*c83a76b0SSuyog Pawar /* Start x, y offset of CU relative to CTB. To update the mv grid which */
441*c83a76b0SSuyog Pawar /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset */
442*c83a76b0SSuyog Pawar /*************************************************************************/
443*c83a76b0SSuyog Pawar S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2;
444*c83a76b0SSuyog Pawar S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2;
445*c83a76b0SSuyog Pawar
446*c83a76b0SSuyog Pawar /* Controls the attribute of a given partition within CU */
447*c83a76b0SSuyog Pawar /* , i.e. start locn, size */
448*c83a76b0SSuyog Pawar part_attr_t *ps_part_attr;
449*c83a76b0SSuyog Pawar
450*c83a76b0SSuyog Pawar S32 i4_part, i4_part_id, num_parts, i4_stride;
451*c83a76b0SSuyog Pawar S16 i2_mv_x, i2_mv_y;
452*c83a76b0SSuyog Pawar S08 i1_ref_idx;
453*c83a76b0SSuyog Pawar
454*c83a76b0SSuyog Pawar /* Per partition, attributes w.r.t. CU start */
455*c83a76b0SSuyog Pawar S32 x_start, y_start, x_end, y_end, i4_x, i4_y;
456*c83a76b0SSuyog Pawar PART_TYPE_T e_part_type;
457*c83a76b0SSuyog Pawar
458*c83a76b0SSuyog Pawar /* Points to exact mv structures within the grid to be udpated */
459*c83a76b0SSuyog Pawar search_node_t *ps_grid_node, *ps_grid_node_tmp;
460*c83a76b0SSuyog Pawar
461*c83a76b0SSuyog Pawar /* points to exact mv grid (based on search iteration) to be updated */
462*c83a76b0SSuyog Pawar mv_grid_t *ps_mv_grid;
463*c83a76b0SSuyog Pawar
464*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
465*c83a76b0SSuyog Pawar
466*c83a76b0SSuyog Pawar S32 shift, i, mv_shift = 2;
467*c83a76b0SSuyog Pawar /* Proportional to the size of CU, controls the number of 4x4 blks */
468*c83a76b0SSuyog Pawar /* to be updated */
469*c83a76b0SSuyog Pawar shift = ps_search_results->e_cu_size;
470*c83a76b0SSuyog Pawar ASSERT(i4_num_pred_dir <= 2);
471*c83a76b0SSuyog Pawar
472*c83a76b0SSuyog Pawar e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
473*c83a76b0SSuyog Pawar
474*c83a76b0SSuyog Pawar if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
475*c83a76b0SSuyog Pawar (ps_search_results->i4_part_mask & ENABLE_NxN))
476*c83a76b0SSuyog Pawar {
477*c83a76b0SSuyog Pawar e_part_type = PRT_NxN;
478*c83a76b0SSuyog Pawar }
479*c83a76b0SSuyog Pawar
480*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_pred_dir; i++)
481*c83a76b0SSuyog Pawar {
482*c83a76b0SSuyog Pawar num_parts = gau1_num_parts_in_part_type[e_part_type];
483*c83a76b0SSuyog Pawar ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]];
484*c83a76b0SSuyog Pawar i4_stride = ps_mv_grid->i4_stride;
485*c83a76b0SSuyog Pawar
486*c83a76b0SSuyog Pawar i4_cu_start_offset =
487*c83a76b0SSuyog Pawar i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset;
488*c83a76b0SSuyog Pawar
489*c83a76b0SSuyog Pawar /* Move to the appropriate 2d locn of CU start within Grid */
490*c83a76b0SSuyog Pawar ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset];
491*c83a76b0SSuyog Pawar
492*c83a76b0SSuyog Pawar for(i4_part = 0; i4_part < num_parts; i4_part++)
493*c83a76b0SSuyog Pawar {
494*c83a76b0SSuyog Pawar i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part];
495*c83a76b0SSuyog Pawar
496*c83a76b0SSuyog Pawar /* Pick the mvx and y and ref id corresponding to this partition */
497*c83a76b0SSuyog Pawar ps_search_node =
498*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
499*c83a76b0SSuyog Pawar
500*c83a76b0SSuyog Pawar i2_mv_x = ps_search_node->s_mv.i2_mvx;
501*c83a76b0SSuyog Pawar i2_mv_y = ps_search_node->s_mv.i2_mvy;
502*c83a76b0SSuyog Pawar i1_ref_idx = ps_search_node->i1_ref_idx;
503*c83a76b0SSuyog Pawar
504*c83a76b0SSuyog Pawar /* Move to the appropriate location within the CU */
505*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[i4_part_id];
506*c83a76b0SSuyog Pawar x_start = ps_part_attr->u1_x_start;
507*c83a76b0SSuyog Pawar x_end = x_start + ps_part_attr->u1_x_count;
508*c83a76b0SSuyog Pawar y_start = ps_part_attr->u1_y_start;
509*c83a76b0SSuyog Pawar y_end = y_start + ps_part_attr->u1_y_count;
510*c83a76b0SSuyog Pawar
511*c83a76b0SSuyog Pawar /* Convert attributes from 8x8 CU size to given CU size */
512*c83a76b0SSuyog Pawar x_start = (x_start << shift) >> mv_shift;
513*c83a76b0SSuyog Pawar x_end = (x_end << shift) >> mv_shift;
514*c83a76b0SSuyog Pawar y_start = (y_start << shift) >> mv_shift;
515*c83a76b0SSuyog Pawar y_end = (y_end << shift) >> mv_shift;
516*c83a76b0SSuyog Pawar
517*c83a76b0SSuyog Pawar ps_grid_node_tmp = ps_grid_node + y_start * i4_stride;
518*c83a76b0SSuyog Pawar
519*c83a76b0SSuyog Pawar /* Update all 4x4 blk mvs with the part mv */
520*c83a76b0SSuyog Pawar /* For e.g. we update 4 units in case of NxN for 16x16 CU */
521*c83a76b0SSuyog Pawar for(i4_y = y_start; i4_y < y_end; i4_y++)
522*c83a76b0SSuyog Pawar {
523*c83a76b0SSuyog Pawar for(i4_x = x_start; i4_x < x_end; i4_x++)
524*c83a76b0SSuyog Pawar {
525*c83a76b0SSuyog Pawar ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x;
526*c83a76b0SSuyog Pawar ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y;
527*c83a76b0SSuyog Pawar ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx;
528*c83a76b0SSuyog Pawar ps_grid_node_tmp[i4_x].u1_subpel_done = 1;
529*c83a76b0SSuyog Pawar }
530*c83a76b0SSuyog Pawar ps_grid_node_tmp += i4_stride;
531*c83a76b0SSuyog Pawar }
532*c83a76b0SSuyog Pawar }
533*c83a76b0SSuyog Pawar }
534*c83a76b0SSuyog Pawar }
535*c83a76b0SSuyog Pawar
hme_set_ctb_pred_attr(ctb_node_t * ps_parent,U08 * pu1_pred0,U08 * pu1_pred1,S32 i4_stride)536*c83a76b0SSuyog Pawar void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride)
537*c83a76b0SSuyog Pawar {
538*c83a76b0SSuyog Pawar ps_parent->apu1_pred[0] = pu1_pred0;
539*c83a76b0SSuyog Pawar ps_parent->apu1_pred[1] = pu1_pred1;
540*c83a76b0SSuyog Pawar ps_parent->i4_pred_stride = i4_stride;
541*c83a76b0SSuyog Pawar if(ps_parent->ps_tl != NULL)
542*c83a76b0SSuyog Pawar {
543*c83a76b0SSuyog Pawar S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off;
544*c83a76b0SSuyog Pawar blk_wd -= (S32)ps_parent->u1_x_off;
545*c83a76b0SSuyog Pawar
546*c83a76b0SSuyog Pawar hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1);
547*c83a76b0SSuyog Pawar
548*c83a76b0SSuyog Pawar hme_set_ctb_pred_attr(
549*c83a76b0SSuyog Pawar ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1);
550*c83a76b0SSuyog Pawar
551*c83a76b0SSuyog Pawar hme_set_ctb_pred_attr(
552*c83a76b0SSuyog Pawar ps_parent->ps_bl,
553*c83a76b0SSuyog Pawar pu1_pred0 + (blk_wd * i4_stride),
554*c83a76b0SSuyog Pawar pu1_pred1 + (blk_wd * i4_stride),
555*c83a76b0SSuyog Pawar i4_stride >> 1);
556*c83a76b0SSuyog Pawar
557*c83a76b0SSuyog Pawar hme_set_ctb_pred_attr(
558*c83a76b0SSuyog Pawar ps_parent->ps_tr,
559*c83a76b0SSuyog Pawar pu1_pred0 + (blk_wd * (1 + i4_stride)),
560*c83a76b0SSuyog Pawar pu1_pred1 + (blk_wd * (1 + i4_stride)),
561*c83a76b0SSuyog Pawar i4_stride >> 1);
562*c83a76b0SSuyog Pawar }
563*c83a76b0SSuyog Pawar }
564*c83a76b0SSuyog Pawar
565*c83a76b0SSuyog Pawar /**
566*c83a76b0SSuyog Pawar ********************************************************************************
567*c83a76b0SSuyog Pawar * @fn hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
568*c83a76b0SSuyog Pawar *
569*c83a76b0SSuyog Pawar * @brief Expands the part mask to a list of valid part ids terminated by -1
570*c83a76b0SSuyog Pawar *
571*c83a76b0SSuyog Pawar * @param[in] i4_part_mask : bit mask of active partitino ids
572*c83a76b0SSuyog Pawar *
573*c83a76b0SSuyog Pawar * @param[out] pi4_valid_part_ids : array, each entry has one valid part id
574*c83a76b0SSuyog Pawar * Terminated by -1 to signal end.
575*c83a76b0SSuyog Pawar *
576*c83a76b0SSuyog Pawar * @return number of partitions
577*c83a76b0SSuyog Pawar ********************************************************************************
578*c83a76b0SSuyog Pawar */
hme_create_valid_part_ids(S32 i4_part_mask,S32 * pi4_valid_part_ids)579*c83a76b0SSuyog Pawar S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
580*c83a76b0SSuyog Pawar {
581*c83a76b0SSuyog Pawar S32 id = 0, i;
582*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
583*c83a76b0SSuyog Pawar {
584*c83a76b0SSuyog Pawar if(i4_part_mask & (1 << i))
585*c83a76b0SSuyog Pawar {
586*c83a76b0SSuyog Pawar pi4_valid_part_ids[id] = i;
587*c83a76b0SSuyog Pawar id++;
588*c83a76b0SSuyog Pawar }
589*c83a76b0SSuyog Pawar }
590*c83a76b0SSuyog Pawar pi4_valid_part_ids[id] = -1;
591*c83a76b0SSuyog Pawar
592*c83a76b0SSuyog Pawar return id;
593*c83a76b0SSuyog Pawar }
594*c83a76b0SSuyog Pawar
595*c83a76b0SSuyog Pawar ctb_boundary_attrs_t *
get_ctb_attrs(S32 ctb_start_x,S32 ctb_start_y,S32 pic_wd,S32 pic_ht,me_frm_ctxt_t * ps_ctxt)596*c83a76b0SSuyog Pawar get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt)
597*c83a76b0SSuyog Pawar {
598*c83a76b0SSuyog Pawar S32 horz_crop, vert_crop;
599*c83a76b0SSuyog Pawar ctb_boundary_attrs_t *ps_attrs;
600*c83a76b0SSuyog Pawar
601*c83a76b0SSuyog Pawar horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0;
602*c83a76b0SSuyog Pawar vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0;
603*c83a76b0SSuyog Pawar switch(horz_crop + vert_crop)
604*c83a76b0SSuyog Pawar {
605*c83a76b0SSuyog Pawar case 0:
606*c83a76b0SSuyog Pawar ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE];
607*c83a76b0SSuyog Pawar break;
608*c83a76b0SSuyog Pawar case 1:
609*c83a76b0SSuyog Pawar ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY];
610*c83a76b0SSuyog Pawar break;
611*c83a76b0SSuyog Pawar case 2:
612*c83a76b0SSuyog Pawar ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY];
613*c83a76b0SSuyog Pawar break;
614*c83a76b0SSuyog Pawar case 3:
615*c83a76b0SSuyog Pawar ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY];
616*c83a76b0SSuyog Pawar break;
617*c83a76b0SSuyog Pawar }
618*c83a76b0SSuyog Pawar return (ps_attrs);
619*c83a76b0SSuyog Pawar }
620*c83a76b0SSuyog Pawar
621*c83a76b0SSuyog Pawar /**
622*c83a76b0SSuyog Pawar ********************************************************************************
623*c83a76b0SSuyog Pawar * @fn hevc_avg_2d(U08 *pu1_src1,
624*c83a76b0SSuyog Pawar * U08 *pu1_src2,
625*c83a76b0SSuyog Pawar * S32 i4_src1_stride,
626*c83a76b0SSuyog Pawar * S32 i4_src2_stride,
627*c83a76b0SSuyog Pawar * S32 i4_blk_wd,
628*c83a76b0SSuyog Pawar * S32 i4_blk_ht,
629*c83a76b0SSuyog Pawar * U08 *pu1_dst,
630*c83a76b0SSuyog Pawar * S32 i4_dst_stride)
631*c83a76b0SSuyog Pawar *
632*c83a76b0SSuyog Pawar *
633*c83a76b0SSuyog Pawar * @brief point wise average of two buffers into a third buffer
634*c83a76b0SSuyog Pawar *
635*c83a76b0SSuyog Pawar * @param[in] pu1_src1 : first source buffer
636*c83a76b0SSuyog Pawar *
637*c83a76b0SSuyog Pawar * @param[in] pu1_src2 : 2nd source buffer
638*c83a76b0SSuyog Pawar *
639*c83a76b0SSuyog Pawar * @param[in] i4_src1_stride : stride of source 1 buffer
640*c83a76b0SSuyog Pawar *
641*c83a76b0SSuyog Pawar * @param[in] i4_src2_stride : stride of source 2 buffer
642*c83a76b0SSuyog Pawar *
643*c83a76b0SSuyog Pawar * @param[in] i4_blk_wd : block width
644*c83a76b0SSuyog Pawar *
645*c83a76b0SSuyog Pawar * @param[in] i4_blk_ht : block height
646*c83a76b0SSuyog Pawar *
647*c83a76b0SSuyog Pawar * @param[out] pu1_dst : destination buffer
648*c83a76b0SSuyog Pawar *
649*c83a76b0SSuyog Pawar * @param[in] i4_dst_stride : stride of the destination buffer
650*c83a76b0SSuyog Pawar *
651*c83a76b0SSuyog Pawar * @return void
652*c83a76b0SSuyog Pawar ********************************************************************************
653*c83a76b0SSuyog Pawar */
hevc_avg_2d(U08 * pu1_src1,U08 * pu1_src2,S32 i4_src1_stride,S32 i4_src2_stride,S32 i4_blk_wd,S32 i4_blk_ht,U08 * pu1_dst,S32 i4_dst_stride)654*c83a76b0SSuyog Pawar void hevc_avg_2d(
655*c83a76b0SSuyog Pawar U08 *pu1_src1,
656*c83a76b0SSuyog Pawar U08 *pu1_src2,
657*c83a76b0SSuyog Pawar S32 i4_src1_stride,
658*c83a76b0SSuyog Pawar S32 i4_src2_stride,
659*c83a76b0SSuyog Pawar S32 i4_blk_wd,
660*c83a76b0SSuyog Pawar S32 i4_blk_ht,
661*c83a76b0SSuyog Pawar U08 *pu1_dst,
662*c83a76b0SSuyog Pawar S32 i4_dst_stride)
663*c83a76b0SSuyog Pawar {
664*c83a76b0SSuyog Pawar S32 i, j;
665*c83a76b0SSuyog Pawar
666*c83a76b0SSuyog Pawar for(i = 0; i < i4_blk_ht; i++)
667*c83a76b0SSuyog Pawar {
668*c83a76b0SSuyog Pawar for(j = 0; j < i4_blk_wd; j++)
669*c83a76b0SSuyog Pawar {
670*c83a76b0SSuyog Pawar pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1;
671*c83a76b0SSuyog Pawar }
672*c83a76b0SSuyog Pawar pu1_src1 += i4_src1_stride;
673*c83a76b0SSuyog Pawar pu1_src2 += i4_src2_stride;
674*c83a76b0SSuyog Pawar pu1_dst += i4_dst_stride;
675*c83a76b0SSuyog Pawar }
676*c83a76b0SSuyog Pawar }
677*c83a76b0SSuyog Pawar /**
678*c83a76b0SSuyog Pawar ********************************************************************************
679*c83a76b0SSuyog Pawar * @fn hme_pick_back_search_node(search_results_t *ps_search_results,
680*c83a76b0SSuyog Pawar * search_node_t *ps_search_node_fwd,
681*c83a76b0SSuyog Pawar * S32 i4_part_idx,
682*c83a76b0SSuyog Pawar * layer_ctxt_t *ps_curr_layer)
683*c83a76b0SSuyog Pawar *
684*c83a76b0SSuyog Pawar *
685*c83a76b0SSuyog Pawar * @brief returns the search node corresponding to a ref idx in same or
686*c83a76b0SSuyog Pawar * opp direction. Preference is given to opp direction, but if that
687*c83a76b0SSuyog Pawar * does not yield results, same direction is attempted.
688*c83a76b0SSuyog Pawar *
689*c83a76b0SSuyog Pawar * @param[in] ps_search_results: search results overall
690*c83a76b0SSuyog Pawar *
691*c83a76b0SSuyog Pawar * @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
692*c83a76b0SSuyog Pawar *
693*c83a76b0SSuyog Pawar * @param[in] i4_part_idx : partition id
694*c83a76b0SSuyog Pawar *
695*c83a76b0SSuyog Pawar * @param[in] ps_curr_layer : layer context for current layer.
696*c83a76b0SSuyog Pawar *
697*c83a76b0SSuyog Pawar * @return search node corresponding to hte "other direction"
698*c83a76b0SSuyog Pawar ********************************************************************************
699*c83a76b0SSuyog Pawar */
700*c83a76b0SSuyog Pawar //#define PICK_L1_REF_SAME_DIR
hme_pick_back_search_node(search_results_t * ps_search_results,search_node_t * ps_search_node_fwd,S32 i4_part_idx,layer_ctxt_t * ps_curr_layer)701*c83a76b0SSuyog Pawar search_node_t *hme_pick_back_search_node(
702*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
703*c83a76b0SSuyog Pawar search_node_t *ps_search_node_fwd,
704*c83a76b0SSuyog Pawar S32 i4_part_idx,
705*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer)
706*c83a76b0SSuyog Pawar {
707*c83a76b0SSuyog Pawar S32 is_past_l0, is_past_l1, id, i, i4_poc;
708*c83a76b0SSuyog Pawar S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc;
709*c83a76b0SSuyog Pawar //ref_attr_t *ps_ref_attr_lc;
710*c83a76b0SSuyog Pawar S08 i1_ref_idx_fwd;
711*c83a76b0SSuyog Pawar S16 i2_mv_x, i2_mv_y;
712*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
713*c83a76b0SSuyog Pawar
714*c83a76b0SSuyog Pawar i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx;
715*c83a76b0SSuyog Pawar i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx;
716*c83a76b0SSuyog Pawar i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy;
717*c83a76b0SSuyog Pawar i4_poc = ps_curr_layer->i4_poc;
718*c83a76b0SSuyog Pawar
719*c83a76b0SSuyog Pawar //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0];
720*c83a76b0SSuyog Pawar /* If the ref id already picked up maps to a past pic, then we pick */
721*c83a76b0SSuyog Pawar /* a result corresponding to future pic. If such a result is not */
722*c83a76b0SSuyog Pawar /* to be found, then we pick a result corresponding to a past pic */
723*c83a76b0SSuyog Pawar //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past;
724*c83a76b0SSuyog Pawar is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0;
725*c83a76b0SSuyog Pawar
726*c83a76b0SSuyog Pawar ASSERT(ps_search_results->u1_num_active_ref <= 2);
727*c83a76b0SSuyog Pawar
728*c83a76b0SSuyog Pawar /* pick the right iteration of search nodes to pick up */
729*c83a76b0SSuyog Pawar #ifdef PICK_L1_REF_SAME_DIR
730*c83a76b0SSuyog Pawar if(ps_search_results->u1_num_active_ref == 2)
731*c83a76b0SSuyog Pawar id = !is_past_l0;
732*c83a76b0SSuyog Pawar #else
733*c83a76b0SSuyog Pawar if(ps_search_results->u1_num_active_ref == 2)
734*c83a76b0SSuyog Pawar id = is_past_l0;
735*c83a76b0SSuyog Pawar #endif
736*c83a76b0SSuyog Pawar else
737*c83a76b0SSuyog Pawar id = 0;
738*c83a76b0SSuyog Pawar
739*c83a76b0SSuyog Pawar ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx];
740*c83a76b0SSuyog Pawar
741*c83a76b0SSuyog Pawar for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
742*c83a76b0SSuyog Pawar {
743*c83a76b0SSuyog Pawar S08 i1_ref_test = ps_search_node[i].i1_ref_idx;
744*c83a76b0SSuyog Pawar is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0;
745*c83a76b0SSuyog Pawar //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past)
746*c83a76b0SSuyog Pawar #ifdef PICK_L1_REF_SAME_DIR
747*c83a76b0SSuyog Pawar if(is_past_l1 == is_past_l0)
748*c83a76b0SSuyog Pawar #else
749*c83a76b0SSuyog Pawar if(is_past_l1 != is_past_l0)
750*c83a76b0SSuyog Pawar #endif
751*c83a76b0SSuyog Pawar {
752*c83a76b0SSuyog Pawar /* belongs to same direction as the ref idx passed, so continue */
753*c83a76b0SSuyog Pawar return (ps_search_node + i);
754*c83a76b0SSuyog Pawar }
755*c83a76b0SSuyog Pawar }
756*c83a76b0SSuyog Pawar
757*c83a76b0SSuyog Pawar /* Unable to find best result in opp direction, so try same direction */
758*c83a76b0SSuyog Pawar /* However we need to ensure that we do not pick up same result */
759*c83a76b0SSuyog Pawar for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
760*c83a76b0SSuyog Pawar {
761*c83a76b0SSuyog Pawar if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) ||
762*c83a76b0SSuyog Pawar (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y))
763*c83a76b0SSuyog Pawar {
764*c83a76b0SSuyog Pawar return (ps_search_node);
765*c83a76b0SSuyog Pawar }
766*c83a76b0SSuyog Pawar ps_search_node++;
767*c83a76b0SSuyog Pawar }
768*c83a76b0SSuyog Pawar
769*c83a76b0SSuyog Pawar //ASSERT(0);
770*c83a76b0SSuyog Pawar return (ps_search_results->aps_part_results[id][i4_part_idx]);
771*c83a76b0SSuyog Pawar
772*c83a76b0SSuyog Pawar //return (NULL);
773*c83a76b0SSuyog Pawar }
774*c83a76b0SSuyog Pawar
775*c83a76b0SSuyog Pawar /**
776*c83a76b0SSuyog Pawar ********************************************************************************
777*c83a76b0SSuyog Pawar * @fn hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride)
778*c83a76b0SSuyog Pawar *
779*c83a76b0SSuyog Pawar *
780*c83a76b0SSuyog Pawar * @brief Examines input 16x16 for possible edges and orientations of those,
781*c83a76b0SSuyog Pawar * and returns a bit mask of partitions that should be searched for
782*c83a76b0SSuyog Pawar *
783*c83a76b0SSuyog Pawar * @param[in] pu1_inp : input buffer
784*c83a76b0SSuyog Pawar *
785*c83a76b0SSuyog Pawar * @param[in] i4_inp_stride: input stride
786*c83a76b0SSuyog Pawar *
787*c83a76b0SSuyog Pawar * @return part mask (bit mask of active partitions to search)
788*c83a76b0SSuyog Pawar ********************************************************************************
789*c83a76b0SSuyog Pawar */
790*c83a76b0SSuyog Pawar
hme_study_input_segmentation(U08 * pu1_inp,S32 i4_inp_stride,S32 limit_active_partitions)791*c83a76b0SSuyog Pawar S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions)
792*c83a76b0SSuyog Pawar {
793*c83a76b0SSuyog Pawar S32 i4_rsum[16], i4_csum[16];
794*c83a76b0SSuyog Pawar U08 *pu1_tmp, u1_tmp;
795*c83a76b0SSuyog Pawar S32 i4_max_ridx, i4_max_cidx, i4_tmp;
796*c83a76b0SSuyog Pawar S32 i, j, i4_ret;
797*c83a76b0SSuyog Pawar S32 i4_max_rp[4], i4_max_cp[4];
798*c83a76b0SSuyog Pawar S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N };
799*c83a76b0SSuyog Pawar S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD };
800*c83a76b0SSuyog Pawar #define EDGE_THR (15 * 16)
801*c83a76b0SSuyog Pawar #define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2]))
802*c83a76b0SSuyog Pawar
803*c83a76b0SSuyog Pawar if(0 == limit_active_partitions)
804*c83a76b0SSuyog Pawar {
805*c83a76b0SSuyog Pawar /*********************************************************************/
806*c83a76b0SSuyog Pawar /* In this case, we do not optimize on active partitions and search */
807*c83a76b0SSuyog Pawar /* brute force. This way, 17 partitinos would be enabled. */
808*c83a76b0SSuyog Pawar /*********************************************************************/
809*c83a76b0SSuyog Pawar return (ENABLE_ALL_PARTS);
810*c83a76b0SSuyog Pawar }
811*c83a76b0SSuyog Pawar
812*c83a76b0SSuyog Pawar /*************************************************************************/
813*c83a76b0SSuyog Pawar /* Control passes below in case we wish to optimize on active partitions.*/
814*c83a76b0SSuyog Pawar /* This is based on input characteristics, check how an edge passes along*/
815*c83a76b0SSuyog Pawar /* an input 16x16 area, if at all, and decide active partitinos. */
816*c83a76b0SSuyog Pawar /*************************************************************************/
817*c83a76b0SSuyog Pawar
818*c83a76b0SSuyog Pawar /* Initialize row and col sums */
819*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
820*c83a76b0SSuyog Pawar {
821*c83a76b0SSuyog Pawar i4_rsum[i] = 0;
822*c83a76b0SSuyog Pawar i4_csum[i] = 0;
823*c83a76b0SSuyog Pawar }
824*c83a76b0SSuyog Pawar pu1_tmp = pu1_inp;
825*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
826*c83a76b0SSuyog Pawar {
827*c83a76b0SSuyog Pawar for(j = 0; j < 16; j++)
828*c83a76b0SSuyog Pawar {
829*c83a76b0SSuyog Pawar u1_tmp = *pu1_tmp++;
830*c83a76b0SSuyog Pawar i4_rsum[i] += u1_tmp;
831*c83a76b0SSuyog Pawar i4_csum[j] += u1_tmp;
832*c83a76b0SSuyog Pawar }
833*c83a76b0SSuyog Pawar pu1_tmp += (i4_inp_stride - 16);
834*c83a76b0SSuyog Pawar }
835*c83a76b0SSuyog Pawar
836*c83a76b0SSuyog Pawar /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */
837*c83a76b0SSuyog Pawar i4_max_rp[0] = 0;
838*c83a76b0SSuyog Pawar i4_max_cp[0] = 0;
839*c83a76b0SSuyog Pawar i4_max_rp[1] = 0;
840*c83a76b0SSuyog Pawar i4_max_cp[1] = 0;
841*c83a76b0SSuyog Pawar i4_max_rp[2] = 0;
842*c83a76b0SSuyog Pawar i4_max_cp[2] = 0;
843*c83a76b0SSuyog Pawar i4_max_rp[3] = 0;
844*c83a76b0SSuyog Pawar i4_max_cp[3] = 0;
845*c83a76b0SSuyog Pawar
846*c83a76b0SSuyog Pawar /* Get Max edge strength across (2,3) (3,4) (4,5) */
847*c83a76b0SSuyog Pawar for(i = 3; i < 6; i++)
848*c83a76b0SSuyog Pawar {
849*c83a76b0SSuyog Pawar /* Run [-1 -2 2 1] filter through rsum/csum */
850*c83a76b0SSuyog Pawar i4_tmp = HI_PASS(i4_rsum, i);
851*c83a76b0SSuyog Pawar if(ABS(i4_tmp) > i4_max_rp[1])
852*c83a76b0SSuyog Pawar i4_max_rp[1] = i4_tmp;
853*c83a76b0SSuyog Pawar
854*c83a76b0SSuyog Pawar i4_tmp = HI_PASS(i4_csum, i);
855*c83a76b0SSuyog Pawar if(ABS(i4_tmp) > i4_max_cp[1])
856*c83a76b0SSuyog Pawar i4_max_cp[1] = i4_tmp;
857*c83a76b0SSuyog Pawar }
858*c83a76b0SSuyog Pawar
859*c83a76b0SSuyog Pawar /* Get Max edge strength across (6,7) (7,8) (8,9) */
860*c83a76b0SSuyog Pawar for(i = 7; i < 10; i++)
861*c83a76b0SSuyog Pawar {
862*c83a76b0SSuyog Pawar /* Run [-1 -2 2 1] filter through rsum/csum */
863*c83a76b0SSuyog Pawar i4_tmp = HI_PASS(i4_rsum, i);
864*c83a76b0SSuyog Pawar if(ABS(i4_tmp) > i4_max_rp[2])
865*c83a76b0SSuyog Pawar i4_max_rp[2] = i4_tmp;
866*c83a76b0SSuyog Pawar
867*c83a76b0SSuyog Pawar i4_tmp = HI_PASS(i4_csum, i);
868*c83a76b0SSuyog Pawar if(ABS(i4_tmp) > i4_max_cp[2])
869*c83a76b0SSuyog Pawar i4_max_cp[2] = i4_tmp;
870*c83a76b0SSuyog Pawar }
871*c83a76b0SSuyog Pawar
872*c83a76b0SSuyog Pawar /* Get Max edge strength across (10,11) (11,12) (12,13) */
873*c83a76b0SSuyog Pawar for(i = 11; i < 14; i++)
874*c83a76b0SSuyog Pawar {
875*c83a76b0SSuyog Pawar /* Run [-1 -2 2 1] filter through rsum/csum */
876*c83a76b0SSuyog Pawar i4_tmp = HI_PASS(i4_rsum, i);
877*c83a76b0SSuyog Pawar if(ABS(i4_tmp) > i4_max_rp[3])
878*c83a76b0SSuyog Pawar i4_max_rp[3] = i4_tmp;
879*c83a76b0SSuyog Pawar
880*c83a76b0SSuyog Pawar i4_tmp = HI_PASS(i4_csum, i);
881*c83a76b0SSuyog Pawar if(ABS(i4_tmp) > i4_max_cp[3])
882*c83a76b0SSuyog Pawar i4_max_cp[3] = i4_tmp;
883*c83a76b0SSuyog Pawar }
884*c83a76b0SSuyog Pawar
885*c83a76b0SSuyog Pawar /* Find the maximum across the 3 and see whether the strength qualifies as edge */
886*c83a76b0SSuyog Pawar i4_max_ridx = 1;
887*c83a76b0SSuyog Pawar i4_max_cidx = 1;
888*c83a76b0SSuyog Pawar for(i = 2; i <= 3; i++)
889*c83a76b0SSuyog Pawar {
890*c83a76b0SSuyog Pawar if(i4_max_rp[i] > i4_max_rp[i4_max_ridx])
891*c83a76b0SSuyog Pawar i4_max_ridx = i;
892*c83a76b0SSuyog Pawar
893*c83a76b0SSuyog Pawar if(i4_max_cp[i] > i4_max_cp[i4_max_cidx])
894*c83a76b0SSuyog Pawar i4_max_cidx = i;
895*c83a76b0SSuyog Pawar }
896*c83a76b0SSuyog Pawar
897*c83a76b0SSuyog Pawar if(EDGE_THR > i4_max_rp[i4_max_ridx])
898*c83a76b0SSuyog Pawar {
899*c83a76b0SSuyog Pawar i4_max_ridx = 0;
900*c83a76b0SSuyog Pawar }
901*c83a76b0SSuyog Pawar
902*c83a76b0SSuyog Pawar if(EDGE_THR > i4_max_cp[i4_max_cidx])
903*c83a76b0SSuyog Pawar {
904*c83a76b0SSuyog Pawar i4_max_cidx = 0;
905*c83a76b0SSuyog Pawar }
906*c83a76b0SSuyog Pawar
907*c83a76b0SSuyog Pawar i4_ret = ENABLE_2Nx2N;
908*c83a76b0SSuyog Pawar
909*c83a76b0SSuyog Pawar /* If only vertical discontinuity, go with one of 2Nx? */
910*c83a76b0SSuyog Pawar if(0 == (i4_max_ridx + i4_max_cidx))
911*c83a76b0SSuyog Pawar {
912*c83a76b0SSuyog Pawar //num_me_parts++;
913*c83a76b0SSuyog Pawar return i4_ret;
914*c83a76b0SSuyog Pawar }
915*c83a76b0SSuyog Pawar
916*c83a76b0SSuyog Pawar if(i4_max_ridx && (i4_max_cidx == 0))
917*c83a76b0SSuyog Pawar {
918*c83a76b0SSuyog Pawar //num_me_parts += 3;
919*c83a76b0SSuyog Pawar return ((i4_ret | i4_seg_lutr[i4_max_ridx]));
920*c83a76b0SSuyog Pawar }
921*c83a76b0SSuyog Pawar
922*c83a76b0SSuyog Pawar /* If only horizontal discontinuity, go with one of ?x2N */
923*c83a76b0SSuyog Pawar if(i4_max_cidx && (i4_max_ridx == 0))
924*c83a76b0SSuyog Pawar {
925*c83a76b0SSuyog Pawar //num_me_parts += 3;
926*c83a76b0SSuyog Pawar return ((i4_ret | i4_seg_lutc[i4_max_cidx]));
927*c83a76b0SSuyog Pawar }
928*c83a76b0SSuyog Pawar
929*c83a76b0SSuyog Pawar /* If middle is dominant in both directions, go with NxN */
930*c83a76b0SSuyog Pawar if((2 == i4_max_cidx) && (2 == i4_max_ridx))
931*c83a76b0SSuyog Pawar {
932*c83a76b0SSuyog Pawar //num_me_parts += 5;
933*c83a76b0SSuyog Pawar return ((i4_ret | ENABLE_NxN));
934*c83a76b0SSuyog Pawar }
935*c83a76b0SSuyog Pawar
936*c83a76b0SSuyog Pawar /* Otherwise, conservatively, enable NxN and the 2 AMPs */
937*c83a76b0SSuyog Pawar //num_me_parts += 9;
938*c83a76b0SSuyog Pawar return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]);
939*c83a76b0SSuyog Pawar }
940*c83a76b0SSuyog Pawar
941*c83a76b0SSuyog Pawar /**
942*c83a76b0SSuyog Pawar ********************************************************************************
943*c83a76b0SSuyog Pawar * @fn hme_init_search_results(search_results_t *ps_search_results,
944*c83a76b0SSuyog Pawar * S32 i4_num_ref,
945*c83a76b0SSuyog Pawar * S32 i4_num_best_results,
946*c83a76b0SSuyog Pawar * S32 i4_num_results_per_part,
947*c83a76b0SSuyog Pawar * BLK_SIZE_T e_blk_size,
948*c83a76b0SSuyog Pawar * S32 i4_x_off,
949*c83a76b0SSuyog Pawar * S32 i4_y_off)
950*c83a76b0SSuyog Pawar *
951*c83a76b0SSuyog Pawar * @brief Initializes the search results structure with some key attributes
952*c83a76b0SSuyog Pawar *
953*c83a76b0SSuyog Pawar * @param[out] ps_search_results : search results structure to initialise
954*c83a76b0SSuyog Pawar *
955*c83a76b0SSuyog Pawar * @param[in] i4_num_Ref: corresponds to the number of ref ids searched
956*c83a76b0SSuyog Pawar *
957*c83a76b0SSuyog Pawar * @param[in] i4_num_best_results: Number of best results for the CU to
958*c83a76b0SSuyog Pawar * be maintained in the result structure
959*c83a76b0SSuyog Pawar *
960*c83a76b0SSuyog Pawar * @param[in] i4_num_results_per_part: Per active partition the number of best
961*c83a76b0SSuyog Pawar * results to be maintained
962*c83a76b0SSuyog Pawar *
963*c83a76b0SSuyog Pawar * @param[in] e_blk_size: blk size of the CU for which this structure used
964*c83a76b0SSuyog Pawar *
965*c83a76b0SSuyog Pawar * @param[in] i4_x_off: x offset of the top left of CU from CTB top left
966*c83a76b0SSuyog Pawar *
967*c83a76b0SSuyog Pawar * @param[in] i4_y_off: y offset of the top left of CU from CTB top left
968*c83a76b0SSuyog Pawar *
969*c83a76b0SSuyog Pawar * @param[in] pu1_is_past : points ot an array that tells whether a given ref id
970*c83a76b0SSuyog Pawar * has prominence in L0 or in L1 list (past or future )
971*c83a76b0SSuyog Pawar *
972*c83a76b0SSuyog Pawar * @return void
973*c83a76b0SSuyog Pawar ********************************************************************************
974*c83a76b0SSuyog Pawar */
hme_init_search_results(search_results_t * ps_search_results,S32 i4_num_ref,S32 i4_num_best_results,S32 i4_num_results_per_part,BLK_SIZE_T e_blk_size,S32 i4_x_off,S32 i4_y_off,U08 * pu1_is_past)975*c83a76b0SSuyog Pawar void hme_init_search_results(
976*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
977*c83a76b0SSuyog Pawar S32 i4_num_ref,
978*c83a76b0SSuyog Pawar S32 i4_num_best_results,
979*c83a76b0SSuyog Pawar S32 i4_num_results_per_part,
980*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size,
981*c83a76b0SSuyog Pawar S32 i4_x_off,
982*c83a76b0SSuyog Pawar S32 i4_y_off,
983*c83a76b0SSuyog Pawar U08 *pu1_is_past)
984*c83a76b0SSuyog Pawar {
985*c83a76b0SSuyog Pawar CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size];
986*c83a76b0SSuyog Pawar
987*c83a76b0SSuyog Pawar ASSERT(e_cu_size != -1);
988*c83a76b0SSuyog Pawar ps_search_results->e_cu_size = e_cu_size;
989*c83a76b0SSuyog Pawar ps_search_results->u1_x_off = (U08)i4_x_off;
990*c83a76b0SSuyog Pawar ps_search_results->u1_y_off = (U08)i4_y_off;
991*c83a76b0SSuyog Pawar ps_search_results->u1_num_active_ref = (U08)i4_num_ref;
992*c83a76b0SSuyog Pawar ps_search_results->u1_num_best_results = (U08)i4_num_best_results;
993*c83a76b0SSuyog Pawar ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part;
994*c83a76b0SSuyog Pawar ps_search_results->pu1_is_past = pu1_is_past;
995*c83a76b0SSuyog Pawar ps_search_results->u1_split_flag = 0;
996*c83a76b0SSuyog Pawar ps_search_results->best_cu_cost = MAX_32BIT_VAL;
997*c83a76b0SSuyog Pawar }
998*c83a76b0SSuyog Pawar
999*c83a76b0SSuyog Pawar /**
1000*c83a76b0SSuyog Pawar ********************************************************************************
1001*c83a76b0SSuyog Pawar * @fn hme_reset_search_results((search_results_t *ps_search_results,
1002*c83a76b0SSuyog Pawar * S32 i4_part_mask)
1003*c83a76b0SSuyog Pawar *
1004*c83a76b0SSuyog Pawar *
1005*c83a76b0SSuyog Pawar * @brief Resets the best results to maximum values, so as to allow search
1006*c83a76b0SSuyog Pawar * for the new CU's partitions. The existing results may be from an
1007*c83a76b0SSuyog Pawar * older CU using same structure.
1008*c83a76b0SSuyog Pawar *
1009*c83a76b0SSuyog Pawar * @param[in] ps_search_results: search results structure
1010*c83a76b0SSuyog Pawar *
1011*c83a76b0SSuyog Pawar * @param[in] i4_part_mask : bit mask of active partitions
1012*c83a76b0SSuyog Pawar *
1013*c83a76b0SSuyog Pawar * @return part mask (bit mask of active partitions to search)
1014*c83a76b0SSuyog Pawar ********************************************************************************
1015*c83a76b0SSuyog Pawar */
hme_reset_search_results(search_results_t * ps_search_results,S32 i4_part_mask,S32 mv_res)1016*c83a76b0SSuyog Pawar void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res)
1017*c83a76b0SSuyog Pawar {
1018*c83a76b0SSuyog Pawar S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref;
1019*c83a76b0SSuyog Pawar S08 i1_ref_idx;
1020*c83a76b0SSuyog Pawar S32 i, j;
1021*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
1022*c83a76b0SSuyog Pawar
1023*c83a76b0SSuyog Pawar /* store this for future use */
1024*c83a76b0SSuyog Pawar ps_search_results->i4_part_mask = i4_part_mask;
1025*c83a76b0SSuyog Pawar
1026*c83a76b0SSuyog Pawar /* Reset the spli_flag to zero */
1027*c83a76b0SSuyog Pawar ps_search_results->u1_split_flag = 0;
1028*c83a76b0SSuyog Pawar
1029*c83a76b0SSuyog Pawar HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res);
1030*c83a76b0SSuyog Pawar HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res);
1031*c83a76b0SSuyog Pawar
1032*c83a76b0SSuyog Pawar for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1033*c83a76b0SSuyog Pawar {
1034*c83a76b0SSuyog Pawar /* Reset the individual partitino results */
1035*c83a76b0SSuyog Pawar for(i = 0; i < TOT_NUM_PARTS; i++)
1036*c83a76b0SSuyog Pawar {
1037*c83a76b0SSuyog Pawar if(!(i4_part_mask & (1 << i)))
1038*c83a76b0SSuyog Pawar continue;
1039*c83a76b0SSuyog Pawar
1040*c83a76b0SSuyog Pawar ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i];
1041*c83a76b0SSuyog Pawar
1042*c83a76b0SSuyog Pawar for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
1043*c83a76b0SSuyog Pawar {
1044*c83a76b0SSuyog Pawar ps_search_node[j].s_mv.i2_mvx = 0;
1045*c83a76b0SSuyog Pawar ps_search_node[j].s_mv.i2_mvy = 0;
1046*c83a76b0SSuyog Pawar ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL;
1047*c83a76b0SSuyog Pawar ps_search_node[j].i4_sad = MAX_32BIT_VAL;
1048*c83a76b0SSuyog Pawar ps_search_node[j].i4_sdi = 0;
1049*c83a76b0SSuyog Pawar ps_search_node[j].i1_ref_idx = -1;
1050*c83a76b0SSuyog Pawar ps_search_node[j].u1_subpel_done = 0;
1051*c83a76b0SSuyog Pawar ps_search_node[j].u1_is_avail = 1;
1052*c83a76b0SSuyog Pawar ps_search_node[j].i4_mv_cost = 0;
1053*c83a76b0SSuyog Pawar }
1054*c83a76b0SSuyog Pawar }
1055*c83a76b0SSuyog Pawar }
1056*c83a76b0SSuyog Pawar }
1057*c83a76b0SSuyog Pawar /**
1058*c83a76b0SSuyog Pawar ********************************************************************************
1059*c83a76b0SSuyog Pawar * @fn hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
1060*c83a76b0SSuyog Pawar * S32 i4_step,
1061*c83a76b0SSuyog Pawar * range_prms_t *ps_mvrange)
1062*c83a76b0SSuyog Pawar *
1063*c83a76b0SSuyog Pawar * @brief Given a central pt within mv range, and a grid of points surrounding
1064*c83a76b0SSuyog Pawar * this pt, this function returns a grid mask of pts within search rng
1065*c83a76b0SSuyog Pawar *
1066*c83a76b0SSuyog Pawar * @param[in] ps_search_node: the centre pt of the grid
1067*c83a76b0SSuyog Pawar *
1068*c83a76b0SSuyog Pawar * @param[in] i4_step: step size of grid
1069*c83a76b0SSuyog Pawar *
1070*c83a76b0SSuyog Pawar * @param[in] ps_mvrange: structure containing the current mv range
1071*c83a76b0SSuyog Pawar *
1072*c83a76b0SSuyog Pawar * @return bitmask of the pts in grid within search range
1073*c83a76b0SSuyog Pawar ********************************************************************************
1074*c83a76b0SSuyog Pawar */
hme_clamp_grid_by_mvrange(search_node_t * ps_search_node,S32 i4_step,range_prms_t * ps_mvrange)1075*c83a76b0SSuyog Pawar S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange)
1076*c83a76b0SSuyog Pawar {
1077*c83a76b0SSuyog Pawar S32 i4_mask = GRID_ALL_PTS_VALID;
1078*c83a76b0SSuyog Pawar if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x)
1079*c83a76b0SSuyog Pawar {
1080*c83a76b0SSuyog Pawar i4_mask &= (GRID_RT_3_INVALID);
1081*c83a76b0SSuyog Pawar }
1082*c83a76b0SSuyog Pawar if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x)
1083*c83a76b0SSuyog Pawar {
1084*c83a76b0SSuyog Pawar i4_mask &= (GRID_LT_3_INVALID);
1085*c83a76b0SSuyog Pawar }
1086*c83a76b0SSuyog Pawar if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y)
1087*c83a76b0SSuyog Pawar {
1088*c83a76b0SSuyog Pawar i4_mask &= (GRID_BOT_3_INVALID);
1089*c83a76b0SSuyog Pawar }
1090*c83a76b0SSuyog Pawar if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y)
1091*c83a76b0SSuyog Pawar {
1092*c83a76b0SSuyog Pawar i4_mask &= (GRID_TOP_3_INVALID);
1093*c83a76b0SSuyog Pawar }
1094*c83a76b0SSuyog Pawar return i4_mask;
1095*c83a76b0SSuyog Pawar }
1096*c83a76b0SSuyog Pawar
1097*c83a76b0SSuyog Pawar /**
1098*c83a76b0SSuyog Pawar ********************************************************************************
1099*c83a76b0SSuyog Pawar * @fn layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1100*c83a76b0SSuyog Pawar S32 i4_layer_id)
1101*c83a76b0SSuyog Pawar *
1102*c83a76b0SSuyog Pawar * @brief returns the layer ctxt of the layer with given id from the temporally
1103*c83a76b0SSuyog Pawar * previous frame
1104*c83a76b0SSuyog Pawar *
1105*c83a76b0SSuyog Pawar * @param[in] ps_ctxt : ME context
1106*c83a76b0SSuyog Pawar *
1107*c83a76b0SSuyog Pawar * @param[in] i4_layer_id : id of layer required
1108*c83a76b0SSuyog Pawar *
1109*c83a76b0SSuyog Pawar * @return layer ctxt of given layer id in temporally previous frame
1110*c83a76b0SSuyog Pawar ********************************************************************************
1111*c83a76b0SSuyog Pawar */
hme_get_past_layer_ctxt(me_ctxt_t * ps_ctxt,me_frm_ctxt_t * ps_frm_ctxt,S32 i4_layer_id,S32 i4_num_me_frm_pllel)1112*c83a76b0SSuyog Pawar layer_ctxt_t *hme_get_past_layer_ctxt(
1113*c83a76b0SSuyog Pawar me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel)
1114*c83a76b0SSuyog Pawar {
1115*c83a76b0SSuyog Pawar S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0];
1116*c83a76b0SSuyog Pawar S32 i;
1117*c83a76b0SSuyog Pawar layers_descr_t *ps_desc;
1118*c83a76b0SSuyog Pawar
1119*c83a76b0SSuyog Pawar for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1120*c83a76b0SSuyog Pawar {
1121*c83a76b0SSuyog Pawar ps_desc = &ps_ctxt->as_ref_descr[i];
1122*c83a76b0SSuyog Pawar if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1123*c83a76b0SSuyog Pawar return (ps_desc->aps_layers[i4_layer_id]);
1124*c83a76b0SSuyog Pawar }
1125*c83a76b0SSuyog Pawar return NULL;
1126*c83a76b0SSuyog Pawar }
1127*c83a76b0SSuyog Pawar
1128*c83a76b0SSuyog Pawar /**
1129*c83a76b0SSuyog Pawar ********************************************************************************
1130*c83a76b0SSuyog Pawar * @fn layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1131*c83a76b0SSuyog Pawar S32 i4_layer_id)
1132*c83a76b0SSuyog Pawar *
1133*c83a76b0SSuyog Pawar * @brief returns the layer ctxt of the layer with given id from the temporally
1134*c83a76b0SSuyog Pawar * previous frame
1135*c83a76b0SSuyog Pawar *
1136*c83a76b0SSuyog Pawar * @param[in] ps_ctxt : ME context
1137*c83a76b0SSuyog Pawar *
1138*c83a76b0SSuyog Pawar * @param[in] i4_layer_id : id of layer required
1139*c83a76b0SSuyog Pawar *
1140*c83a76b0SSuyog Pawar * @return layer ctxt of given layer id in temporally previous frame
1141*c83a76b0SSuyog Pawar ********************************************************************************
1142*c83a76b0SSuyog Pawar */
hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t * ps_ctxt,S32 i4_layer_id)1143*c83a76b0SSuyog Pawar layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id)
1144*c83a76b0SSuyog Pawar {
1145*c83a76b0SSuyog Pawar S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0];
1146*c83a76b0SSuyog Pawar S32 i;
1147*c83a76b0SSuyog Pawar layers_descr_t *ps_desc;
1148*c83a76b0SSuyog Pawar
1149*c83a76b0SSuyog Pawar for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1150*c83a76b0SSuyog Pawar {
1151*c83a76b0SSuyog Pawar ps_desc = &ps_ctxt->as_ref_descr[i];
1152*c83a76b0SSuyog Pawar if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1153*c83a76b0SSuyog Pawar return (ps_desc->aps_layers[i4_layer_id]);
1154*c83a76b0SSuyog Pawar }
1155*c83a76b0SSuyog Pawar return NULL;
1156*c83a76b0SSuyog Pawar }
1157*c83a76b0SSuyog Pawar
1158*c83a76b0SSuyog Pawar /**
1159*c83a76b0SSuyog Pawar ********************************************************************************
1160*c83a76b0SSuyog Pawar * @fn void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
1161*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size,
1162*c83a76b0SSuyog Pawar S32 i4_num_ref,
1163*c83a76b0SSuyog Pawar S32 i4_num_results_per_part)
1164*c83a76b0SSuyog Pawar *
1165*c83a76b0SSuyog Pawar * @brief Given a blk size to be used for this layer, this function initialize
1166*c83a76b0SSuyog Pawar * the mv bank to make it ready to store and return results.
1167*c83a76b0SSuyog Pawar *
1168*c83a76b0SSuyog Pawar * @param[in, out] ps_layer_ctxt: pointer to layer ctxt
1169*c83a76b0SSuyog Pawar *
1170*c83a76b0SSuyog Pawar * @param[in] e_blk_size : resolution at which mvs are stored
1171*c83a76b0SSuyog Pawar *
1172*c83a76b0SSuyog Pawar * @param[in] i4_num_ref: number of reference frames corresponding to which
1173*c83a76b0SSuyog Pawar * results are stored.
1174*c83a76b0SSuyog Pawar *
1175*c83a76b0SSuyog Pawar * @param[in] e_blk_size : resolution at which mvs are stored
1176*c83a76b0SSuyog Pawar *
1177*c83a76b0SSuyog Pawar * @param[in] i4_num_results_per_part : Number of results to be stored per
1178*c83a76b0SSuyog Pawar * ref idx. So these many best results stored
1179*c83a76b0SSuyog Pawar *
1180*c83a76b0SSuyog Pawar * @return void
1181*c83a76b0SSuyog Pawar ********************************************************************************
1182*c83a76b0SSuyog Pawar */
hme_init_mv_bank(layer_ctxt_t * ps_layer_ctxt,BLK_SIZE_T e_blk_size,S32 i4_num_ref,S32 i4_num_results_per_part,U08 u1_enc)1183*c83a76b0SSuyog Pawar void hme_init_mv_bank(
1184*c83a76b0SSuyog Pawar layer_ctxt_t *ps_layer_ctxt,
1185*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size,
1186*c83a76b0SSuyog Pawar S32 i4_num_ref,
1187*c83a76b0SSuyog Pawar S32 i4_num_results_per_part,
1188*c83a76b0SSuyog Pawar U08 u1_enc)
1189*c83a76b0SSuyog Pawar {
1190*c83a76b0SSuyog Pawar layer_mv_t *ps_mv_bank;
1191*c83a76b0SSuyog Pawar hme_mv_t *ps_mv1, *ps_mv2;
1192*c83a76b0SSuyog Pawar S08 *pi1_ref_id1, *pi1_ref_id2;
1193*c83a76b0SSuyog Pawar S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col;
1194*c83a76b0SSuyog Pawar S32 i4_i, i4_j, blk_ht;
1195*c83a76b0SSuyog Pawar
1196*c83a76b0SSuyog Pawar ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1197*c83a76b0SSuyog Pawar ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part;
1198*c83a76b0SSuyog Pawar ps_mv_bank->i4_num_ref = i4_num_ref;
1199*c83a76b0SSuyog Pawar mvs_in_blk = i4_num_ref * i4_num_results_per_part;
1200*c83a76b0SSuyog Pawar ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk;
1201*c83a76b0SSuyog Pawar
1202*c83a76b0SSuyog Pawar /*************************************************************************/
1203*c83a76b0SSuyog Pawar /* Store blk size, from blk size derive blk width and use this to compute*/
1204*c83a76b0SSuyog Pawar /* number of blocks every row. We also pad to left and top by 1, to */
1205*c83a76b0SSuyog Pawar /* support the prediction mechanism. */
1206*c83a76b0SSuyog Pawar /*************************************************************************/
1207*c83a76b0SSuyog Pawar ps_mv_bank->e_blk_size = e_blk_size;
1208*c83a76b0SSuyog Pawar blk_wd = gau1_blk_size_to_wd[e_blk_size];
1209*c83a76b0SSuyog Pawar blk_ht = gau1_blk_size_to_ht[e_blk_size];
1210*c83a76b0SSuyog Pawar
1211*c83a76b0SSuyog Pawar blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd;
1212*c83a76b0SSuyog Pawar blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht;
1213*c83a76b0SSuyog Pawar
1214*c83a76b0SSuyog Pawar if(u1_enc)
1215*c83a76b0SSuyog Pawar {
1216*c83a76b0SSuyog Pawar /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
1217*c83a76b0SSuyog Pawar WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6);
1218*c83a76b0SSuyog Pawar WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6);
1219*c83a76b0SSuyog Pawar
1220*c83a76b0SSuyog Pawar blks_in_row = (num_ctb_cols << 3);
1221*c83a76b0SSuyog Pawar blks_in_col = (num_ctb_rows << 3);
1222*c83a76b0SSuyog Pawar }
1223*c83a76b0SSuyog Pawar
1224*c83a76b0SSuyog Pawar blks_in_row += 2;
1225*c83a76b0SSuyog Pawar mvs_in_row = blks_in_row * mvs_in_blk;
1226*c83a76b0SSuyog Pawar
1227*c83a76b0SSuyog Pawar ps_mv_bank->i4_num_blks_per_row = blks_in_row;
1228*c83a76b0SSuyog Pawar ps_mv_bank->i4_num_mvs_per_row = mvs_in_row;
1229*c83a76b0SSuyog Pawar
1230*c83a76b0SSuyog Pawar /* To ensure run time requirements fall within allocation time request */
1231*c83a76b0SSuyog Pawar ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row);
1232*c83a76b0SSuyog Pawar
1233*c83a76b0SSuyog Pawar /*************************************************************************/
1234*c83a76b0SSuyog Pawar /* Increment by one full row at top for padding and one column in left */
1235*c83a76b0SSuyog Pawar /* this gives us the actual start of mv for 0,0 blk */
1236*c83a76b0SSuyog Pawar /*************************************************************************/
1237*c83a76b0SSuyog Pawar ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk;
1238*c83a76b0SSuyog Pawar ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk;
1239*c83a76b0SSuyog Pawar
1240*c83a76b0SSuyog Pawar memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t));
1241*c83a76b0SSuyog Pawar memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08));
1242*c83a76b0SSuyog Pawar
1243*c83a76b0SSuyog Pawar /*************************************************************************/
1244*c83a76b0SSuyog Pawar /* Initialize top row, left col and right col with zeros since these are */
1245*c83a76b0SSuyog Pawar /* used as candidates during searches. */
1246*c83a76b0SSuyog Pawar /*************************************************************************/
1247*c83a76b0SSuyog Pawar ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row;
1248*c83a76b0SSuyog Pawar ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk;
1249*c83a76b0SSuyog Pawar pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row;
1250*c83a76b0SSuyog Pawar pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk;
1251*c83a76b0SSuyog Pawar for(i4_i = 0; i4_i < blks_in_col; i4_i++)
1252*c83a76b0SSuyog Pawar {
1253*c83a76b0SSuyog Pawar for(i4_j = 0; i4_j < mvs_in_blk; i4_j++)
1254*c83a76b0SSuyog Pawar {
1255*c83a76b0SSuyog Pawar ps_mv1[i4_j].i2_mv_x = 0;
1256*c83a76b0SSuyog Pawar ps_mv1[i4_j].i2_mv_y = 0;
1257*c83a76b0SSuyog Pawar ps_mv2[i4_j].i2_mv_x = 0;
1258*c83a76b0SSuyog Pawar ps_mv2[i4_j].i2_mv_y = 0;
1259*c83a76b0SSuyog Pawar pi1_ref_id1[i4_j] = -1;
1260*c83a76b0SSuyog Pawar pi1_ref_id2[i4_j] = -1;
1261*c83a76b0SSuyog Pawar }
1262*c83a76b0SSuyog Pawar ps_mv1 += mvs_in_row;
1263*c83a76b0SSuyog Pawar ps_mv2 += mvs_in_row;
1264*c83a76b0SSuyog Pawar pi1_ref_id1 += mvs_in_row;
1265*c83a76b0SSuyog Pawar pi1_ref_id2 += mvs_in_row;
1266*c83a76b0SSuyog Pawar }
1267*c83a76b0SSuyog Pawar }
hme_fill_mvbank_intra(layer_ctxt_t * ps_layer_ctxt)1268*c83a76b0SSuyog Pawar void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt)
1269*c83a76b0SSuyog Pawar {
1270*c83a76b0SSuyog Pawar layer_mv_t *ps_mv_bank;
1271*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
1272*c83a76b0SSuyog Pawar S08 *pi1_ref_id;
1273*c83a76b0SSuyog Pawar S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col;
1274*c83a76b0SSuyog Pawar S32 i, j, blk_ht;
1275*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size;
1276*c83a76b0SSuyog Pawar
1277*c83a76b0SSuyog Pawar ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1278*c83a76b0SSuyog Pawar
1279*c83a76b0SSuyog Pawar /*************************************************************************/
1280*c83a76b0SSuyog Pawar /* Store blk size, from blk size derive blk width and use this to compute*/
1281*c83a76b0SSuyog Pawar /* number of blocks every row. We also pad to left and top by 1, to */
1282*c83a76b0SSuyog Pawar /* support the prediction mechanism. */
1283*c83a76b0SSuyog Pawar /*************************************************************************/
1284*c83a76b0SSuyog Pawar e_blk_size = ps_mv_bank->e_blk_size;
1285*c83a76b0SSuyog Pawar blk_wd = gau1_blk_size_to_wd[e_blk_size];
1286*c83a76b0SSuyog Pawar blk_ht = gau1_blk_size_to_wd[e_blk_size];
1287*c83a76b0SSuyog Pawar blks_in_row = ps_layer_ctxt->i4_wd / blk_wd;
1288*c83a76b0SSuyog Pawar blks_in_col = ps_layer_ctxt->i4_ht / blk_ht;
1289*c83a76b0SSuyog Pawar mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk;
1290*c83a76b0SSuyog Pawar
1291*c83a76b0SSuyog Pawar /*************************************************************************/
1292*c83a76b0SSuyog Pawar /* Increment by one full row at top for padding and one column in left */
1293*c83a76b0SSuyog Pawar /* this gives us the actual start of mv for 0,0 blk */
1294*c83a76b0SSuyog Pawar /*************************************************************************/
1295*c83a76b0SSuyog Pawar ps_mv = ps_mv_bank->ps_mv;
1296*c83a76b0SSuyog Pawar pi1_ref_id = ps_mv_bank->pi1_ref_idx;
1297*c83a76b0SSuyog Pawar
1298*c83a76b0SSuyog Pawar for(i = 0; i < blks_in_col; i++)
1299*c83a76b0SSuyog Pawar {
1300*c83a76b0SSuyog Pawar for(j = 0; j < blks_in_row; j++)
1301*c83a76b0SSuyog Pawar {
1302*c83a76b0SSuyog Pawar ps_mv[j].i2_mv_x = INTRA_MV;
1303*c83a76b0SSuyog Pawar ps_mv[j].i2_mv_y = INTRA_MV;
1304*c83a76b0SSuyog Pawar pi1_ref_id[j] = -1;
1305*c83a76b0SSuyog Pawar }
1306*c83a76b0SSuyog Pawar ps_mv += ps_mv_bank->i4_num_mvs_per_row;
1307*c83a76b0SSuyog Pawar pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row;
1308*c83a76b0SSuyog Pawar }
1309*c83a76b0SSuyog Pawar }
1310*c83a76b0SSuyog Pawar
1311*c83a76b0SSuyog Pawar /**
1312*c83a76b0SSuyog Pawar ********************************************************************************
1313*c83a76b0SSuyog Pawar * @fn void hme_derive_search_range(range_prms_t *ps_range,
1314*c83a76b0SSuyog Pawar * range_prms_t *ps_pic_limit,
1315*c83a76b0SSuyog Pawar * range_prms_t *ps_mv_limit,
1316*c83a76b0SSuyog Pawar * S32 i4_x,
1317*c83a76b0SSuyog Pawar * S32 i4_y,
1318*c83a76b0SSuyog Pawar * S32 blk_wd,
1319*c83a76b0SSuyog Pawar * S32 blk_ht)
1320*c83a76b0SSuyog Pawar *
1321*c83a76b0SSuyog Pawar * @brief given picture limits and blk dimensions and mv search limits, obtains
1322*c83a76b0SSuyog Pawar * teh valid search range such that the blk stays within pic boundaries,
1323*c83a76b0SSuyog Pawar * where picture boundaries include padded portions of picture
1324*c83a76b0SSuyog Pawar *
1325*c83a76b0SSuyog Pawar * @param[out] ps_range: updated with actual search range
1326*c83a76b0SSuyog Pawar *
1327*c83a76b0SSuyog Pawar * @param[in] ps_pic_limit : picture boundaries
1328*c83a76b0SSuyog Pawar *
1329*c83a76b0SSuyog Pawar * @param[in] ps_mv_limit: Search range limits for the mvs
1330*c83a76b0SSuyog Pawar *
1331*c83a76b0SSuyog Pawar * @param[in] i4_x : x coordinate of the blk
1332*c83a76b0SSuyog Pawar *
1333*c83a76b0SSuyog Pawar * @param[in] i4_y : y coordinate of the blk
1334*c83a76b0SSuyog Pawar *
1335*c83a76b0SSuyog Pawar * @param[in] blk_wd : blk width
1336*c83a76b0SSuyog Pawar *
1337*c83a76b0SSuyog Pawar * @param[in] blk_ht : blk height
1338*c83a76b0SSuyog Pawar *
1339*c83a76b0SSuyog Pawar * @return void
1340*c83a76b0SSuyog Pawar ********************************************************************************
1341*c83a76b0SSuyog Pawar */
hme_derive_search_range(range_prms_t * ps_range,range_prms_t * ps_pic_limit,range_prms_t * ps_mv_limit,S32 i4_x,S32 i4_y,S32 blk_wd,S32 blk_ht)1342*c83a76b0SSuyog Pawar void hme_derive_search_range(
1343*c83a76b0SSuyog Pawar range_prms_t *ps_range,
1344*c83a76b0SSuyog Pawar range_prms_t *ps_pic_limit,
1345*c83a76b0SSuyog Pawar range_prms_t *ps_mv_limit,
1346*c83a76b0SSuyog Pawar S32 i4_x,
1347*c83a76b0SSuyog Pawar S32 i4_y,
1348*c83a76b0SSuyog Pawar S32 blk_wd,
1349*c83a76b0SSuyog Pawar S32 blk_ht)
1350*c83a76b0SSuyog Pawar {
1351*c83a76b0SSuyog Pawar ps_range->i2_max_x =
1352*c83a76b0SSuyog Pawar MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x);
1353*c83a76b0SSuyog Pawar ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
1354*c83a76b0SSuyog Pawar ps_range->i2_max_y =
1355*c83a76b0SSuyog Pawar MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y);
1356*c83a76b0SSuyog Pawar ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
1357*c83a76b0SSuyog Pawar }
1358*c83a76b0SSuyog Pawar
1359*c83a76b0SSuyog Pawar /**
1360*c83a76b0SSuyog Pawar ********************************************************************************
1361*c83a76b0SSuyog Pawar * @fn void hme_get_spatial_candt(search_node_t *ps_search_node,
1362*c83a76b0SSuyog Pawar * layer_ctxt_t *ps_curr_layer,
1363*c83a76b0SSuyog Pawar * S32 i4_blk_x,
1364*c83a76b0SSuyog Pawar * S32 i4_blk_y,
1365*c83a76b0SSuyog Pawar * S08 i1_ref_id,
1366*c83a76b0SSuyog Pawar * S32 i4_result_id)
1367*c83a76b0SSuyog Pawar *
1368*c83a76b0SSuyog Pawar * @brief obtains a candt from the same mv bank as the current one, its called
1369*c83a76b0SSuyog Pawar * spatial candt as it does not require scaling for temporal distances
1370*c83a76b0SSuyog Pawar *
1371*c83a76b0SSuyog Pawar * @param[out] ps_search_node: mv and ref id updated here of the candt
1372*c83a76b0SSuyog Pawar *
1373*c83a76b0SSuyog Pawar * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1374*c83a76b0SSuyog Pawar *
1375*c83a76b0SSuyog Pawar * @param[in] i4_blk_x : x coordinate of the block in mv bank
1376*c83a76b0SSuyog Pawar *
1377*c83a76b0SSuyog Pawar * @param[in] i4_blk_y : y coordinate of the block in mv bank
1378*c83a76b0SSuyog Pawar *
1379*c83a76b0SSuyog Pawar * @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv
1380*c83a76b0SSuyog Pawar * results, useful if multiple ref idx candts maintained separately.
1381*c83a76b0SSuyog Pawar *
1382*c83a76b0SSuyog Pawar * @param[in] i4_result_id : If multiple results stored per ref idx, this
1383*c83a76b0SSuyog Pawar * pts to the id of the result
1384*c83a76b0SSuyog Pawar *
1385*c83a76b0SSuyog Pawar * @param[in] tr_avail : top right availability of the block
1386*c83a76b0SSuyog Pawar *
1387*c83a76b0SSuyog Pawar * @param[in] bl_avail : bottom left availability of the block
1388*c83a76b0SSuyog Pawar *
1389*c83a76b0SSuyog Pawar * @return void
1390*c83a76b0SSuyog Pawar ********************************************************************************
1391*c83a76b0SSuyog Pawar */
hme_get_spatial_candt(layer_ctxt_t * ps_curr_layer,BLK_SIZE_T e_search_blk_size,S32 i4_blk_x,S32 i4_blk_y,S08 i1_ref_idx,search_node_t * ps_top_neighbours,search_node_t * ps_left_neighbours,S32 i4_result_id,S32 tr_avail,S32 bl_avail,S32 encode)1392*c83a76b0SSuyog Pawar void hme_get_spatial_candt(
1393*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
1394*c83a76b0SSuyog Pawar BLK_SIZE_T e_search_blk_size,
1395*c83a76b0SSuyog Pawar S32 i4_blk_x,
1396*c83a76b0SSuyog Pawar S32 i4_blk_y,
1397*c83a76b0SSuyog Pawar S08 i1_ref_idx,
1398*c83a76b0SSuyog Pawar search_node_t *ps_top_neighbours,
1399*c83a76b0SSuyog Pawar search_node_t *ps_left_neighbours,
1400*c83a76b0SSuyog Pawar S32 i4_result_id,
1401*c83a76b0SSuyog Pawar S32 tr_avail,
1402*c83a76b0SSuyog Pawar S32 bl_avail,
1403*c83a76b0SSuyog Pawar S32 encode)
1404*c83a76b0SSuyog Pawar
1405*c83a76b0SSuyog Pawar {
1406*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1407*c83a76b0SSuyog Pawar S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1408*c83a76b0SSuyog Pawar S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1409*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
1410*c83a76b0SSuyog Pawar S32 i4_offset;
1411*c83a76b0SSuyog Pawar hme_mv_t *ps_mv, *ps_mv_base;
1412*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx_base;
1413*c83a76b0SSuyog Pawar S32 jump = 1, mvs_in_blk, mvs_in_row;
1414*c83a76b0SSuyog Pawar S32 shift = (encode ? 2 : 0);
1415*c83a76b0SSuyog Pawar
1416*c83a76b0SSuyog Pawar if(i4_blk_size1 != i4_blk_size2)
1417*c83a76b0SSuyog Pawar {
1418*c83a76b0SSuyog Pawar i4_blk_x <<= 1;
1419*c83a76b0SSuyog Pawar i4_blk_y <<= 1;
1420*c83a76b0SSuyog Pawar jump = 2;
1421*c83a76b0SSuyog Pawar if((i4_blk_size1 << 2) == i4_blk_size2)
1422*c83a76b0SSuyog Pawar {
1423*c83a76b0SSuyog Pawar i4_blk_x <<= 1;
1424*c83a76b0SSuyog Pawar i4_blk_y <<= 1;
1425*c83a76b0SSuyog Pawar jump = 4;
1426*c83a76b0SSuyog Pawar }
1427*c83a76b0SSuyog Pawar }
1428*c83a76b0SSuyog Pawar
1429*c83a76b0SSuyog Pawar mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1430*c83a76b0SSuyog Pawar mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1431*c83a76b0SSuyog Pawar
1432*c83a76b0SSuyog Pawar /* Adjust teh blk coord to point to top left locn */
1433*c83a76b0SSuyog Pawar i4_blk_x -= 1;
1434*c83a76b0SSuyog Pawar i4_blk_y -= 1;
1435*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
1436*c83a76b0SSuyog Pawar i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1437*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1438*c83a76b0SSuyog Pawar
1439*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1440*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1441*c83a76b0SSuyog Pawar
1442*c83a76b0SSuyog Pawar ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1443*c83a76b0SSuyog Pawar pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1444*c83a76b0SSuyog Pawar
1445*c83a76b0SSuyog Pawar ps_mv_base = ps_mv;
1446*c83a76b0SSuyog Pawar pi1_ref_idx_base = pi1_ref_idx;
1447*c83a76b0SSuyog Pawar
1448*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
1449*c83a76b0SSuyog Pawar /* Get 4 mvs as follows: */
1450*c83a76b0SSuyog Pawar ps_search_node = ps_top_neighbours;
1451*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1452*c83a76b0SSuyog Pawar
1453*c83a76b0SSuyog Pawar /* Move to top */
1454*c83a76b0SSuyog Pawar ps_search_node++;
1455*c83a76b0SSuyog Pawar ps_mv += mvs_in_blk;
1456*c83a76b0SSuyog Pawar pi1_ref_idx += mvs_in_blk;
1457*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1458*c83a76b0SSuyog Pawar
1459*c83a76b0SSuyog Pawar /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1460*c83a76b0SSuyog Pawar if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1461*c83a76b0SSuyog Pawar {
1462*c83a76b0SSuyog Pawar ps_search_node++;
1463*c83a76b0SSuyog Pawar ps_mv += (mvs_in_blk * (jump >> 1));
1464*c83a76b0SSuyog Pawar pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1465*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1466*c83a76b0SSuyog Pawar }
1467*c83a76b0SSuyog Pawar else
1468*c83a76b0SSuyog Pawar {
1469*c83a76b0SSuyog Pawar ps_search_node++;
1470*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1471*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1472*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1473*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1474*c83a76b0SSuyog Pawar ps_search_node->u1_subpel_done = 0;
1475*c83a76b0SSuyog Pawar }
1476*c83a76b0SSuyog Pawar
1477*c83a76b0SSuyog Pawar /* Move to tr: this will be tr w.r.t. the blk being searched */
1478*c83a76b0SSuyog Pawar ps_search_node++;
1479*c83a76b0SSuyog Pawar if(tr_avail == 0)
1480*c83a76b0SSuyog Pawar {
1481*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1482*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1483*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1484*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1485*c83a76b0SSuyog Pawar ps_search_node->u1_subpel_done = 0;
1486*c83a76b0SSuyog Pawar }
1487*c83a76b0SSuyog Pawar else
1488*c83a76b0SSuyog Pawar {
1489*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1490*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1491*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1492*c83a76b0SSuyog Pawar }
1493*c83a76b0SSuyog Pawar
1494*c83a76b0SSuyog Pawar /* Move to left */
1495*c83a76b0SSuyog Pawar ps_search_node = ps_left_neighbours;
1496*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + mvs_in_row;
1497*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1498*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1499*c83a76b0SSuyog Pawar
1500*c83a76b0SSuyog Pawar /* Move to l1 */
1501*c83a76b0SSuyog Pawar if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1502*c83a76b0SSuyog Pawar {
1503*c83a76b0SSuyog Pawar ps_search_node++;
1504*c83a76b0SSuyog Pawar ps_mv += (mvs_in_row * (jump >> 1));
1505*c83a76b0SSuyog Pawar pi1_ref_idx += (mvs_in_row * (jump >> 1));
1506*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1507*c83a76b0SSuyog Pawar }
1508*c83a76b0SSuyog Pawar else
1509*c83a76b0SSuyog Pawar {
1510*c83a76b0SSuyog Pawar ps_search_node++;
1511*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1512*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1513*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1514*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1515*c83a76b0SSuyog Pawar ps_search_node->u1_subpel_done = 0;
1516*c83a76b0SSuyog Pawar }
1517*c83a76b0SSuyog Pawar
1518*c83a76b0SSuyog Pawar /* Move to bl */
1519*c83a76b0SSuyog Pawar ps_search_node++;
1520*c83a76b0SSuyog Pawar if(bl_avail == 0)
1521*c83a76b0SSuyog Pawar {
1522*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1523*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1524*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1525*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1526*c83a76b0SSuyog Pawar }
1527*c83a76b0SSuyog Pawar else
1528*c83a76b0SSuyog Pawar {
1529*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1530*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1531*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1532*c83a76b0SSuyog Pawar }
1533*c83a76b0SSuyog Pawar }
1534*c83a76b0SSuyog Pawar
hme_get_spatial_candt_in_l1_me(layer_ctxt_t * ps_curr_layer,BLK_SIZE_T e_search_blk_size,S32 i4_blk_x,S32 i4_blk_y,S08 i1_ref_idx,U08 u1_pred_dir,search_node_t * ps_top_neighbours,search_node_t * ps_left_neighbours,S32 i4_result_id,S32 tr_avail,S32 bl_avail,S32 i4_num_act_ref_l0,S32 i4_num_act_ref_l1)1535*c83a76b0SSuyog Pawar void hme_get_spatial_candt_in_l1_me(
1536*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
1537*c83a76b0SSuyog Pawar BLK_SIZE_T e_search_blk_size,
1538*c83a76b0SSuyog Pawar S32 i4_blk_x,
1539*c83a76b0SSuyog Pawar S32 i4_blk_y,
1540*c83a76b0SSuyog Pawar S08 i1_ref_idx,
1541*c83a76b0SSuyog Pawar U08 u1_pred_dir,
1542*c83a76b0SSuyog Pawar search_node_t *ps_top_neighbours,
1543*c83a76b0SSuyog Pawar search_node_t *ps_left_neighbours,
1544*c83a76b0SSuyog Pawar S32 i4_result_id,
1545*c83a76b0SSuyog Pawar S32 tr_avail,
1546*c83a76b0SSuyog Pawar S32 bl_avail,
1547*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0,
1548*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l1)
1549*c83a76b0SSuyog Pawar {
1550*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
1551*c83a76b0SSuyog Pawar hme_mv_t *ps_mv, *ps_mv_base;
1552*c83a76b0SSuyog Pawar
1553*c83a76b0SSuyog Pawar S32 i4_offset;
1554*c83a76b0SSuyog Pawar S32 mvs_in_blk, mvs_in_row;
1555*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx_base;
1556*c83a76b0SSuyog Pawar S32 i4_mv_pos_in_implicit_array;
1557*c83a76b0SSuyog Pawar
1558*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1559*c83a76b0SSuyog Pawar
1560*c83a76b0SSuyog Pawar S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1561*c83a76b0SSuyog Pawar S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1562*c83a76b0SSuyog Pawar S32 jump = 1;
1563*c83a76b0SSuyog Pawar S32 shift = 0;
1564*c83a76b0SSuyog Pawar S32 i4_num_results_in_given_dir =
1565*c83a76b0SSuyog Pawar ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1)
1566*c83a76b0SSuyog Pawar : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0));
1567*c83a76b0SSuyog Pawar
1568*c83a76b0SSuyog Pawar if(i4_blk_size1 != i4_blk_size2)
1569*c83a76b0SSuyog Pawar {
1570*c83a76b0SSuyog Pawar i4_blk_x <<= 1;
1571*c83a76b0SSuyog Pawar i4_blk_y <<= 1;
1572*c83a76b0SSuyog Pawar jump = 2;
1573*c83a76b0SSuyog Pawar if((i4_blk_size1 << 2) == i4_blk_size2)
1574*c83a76b0SSuyog Pawar {
1575*c83a76b0SSuyog Pawar i4_blk_x <<= 1;
1576*c83a76b0SSuyog Pawar i4_blk_y <<= 1;
1577*c83a76b0SSuyog Pawar jump = 4;
1578*c83a76b0SSuyog Pawar }
1579*c83a76b0SSuyog Pawar }
1580*c83a76b0SSuyog Pawar
1581*c83a76b0SSuyog Pawar mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1582*c83a76b0SSuyog Pawar mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1583*c83a76b0SSuyog Pawar
1584*c83a76b0SSuyog Pawar /* Adjust the blk coord to point to top left locn */
1585*c83a76b0SSuyog Pawar i4_blk_x -= 1;
1586*c83a76b0SSuyog Pawar i4_blk_y -= 1;
1587*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
1588*c83a76b0SSuyog Pawar i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1589*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1590*c83a76b0SSuyog Pawar
1591*c83a76b0SSuyog Pawar i4_offset +=
1592*c83a76b0SSuyog Pawar ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0);
1593*c83a76b0SSuyog Pawar
1594*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1595*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1596*c83a76b0SSuyog Pawar
1597*c83a76b0SSuyog Pawar ps_mv_base = ps_mv;
1598*c83a76b0SSuyog Pawar pi1_ref_idx_base = pi1_ref_idx;
1599*c83a76b0SSuyog Pawar
1600*c83a76b0SSuyog Pawar /* TL */
1601*c83a76b0SSuyog Pawar {
1602*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
1603*c83a76b0SSuyog Pawar ps_search_node = ps_top_neighbours;
1604*c83a76b0SSuyog Pawar
1605*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1606*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1607*c83a76b0SSuyog Pawar
1608*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
1609*c83a76b0SSuyog Pawar {
1610*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
1611*c83a76b0SSuyog Pawar ps_search_node,
1612*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
1613*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1614*c83a76b0SSuyog Pawar i1_ref_idx,
1615*c83a76b0SSuyog Pawar shift);
1616*c83a76b0SSuyog Pawar }
1617*c83a76b0SSuyog Pawar else
1618*c83a76b0SSuyog Pawar {
1619*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1620*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1621*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1622*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1623*c83a76b0SSuyog Pawar }
1624*c83a76b0SSuyog Pawar }
1625*c83a76b0SSuyog Pawar
1626*c83a76b0SSuyog Pawar /* Move to top */
1627*c83a76b0SSuyog Pawar {
1628*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
1629*c83a76b0SSuyog Pawar ps_search_node++;
1630*c83a76b0SSuyog Pawar ps_mv += mvs_in_blk;
1631*c83a76b0SSuyog Pawar pi1_ref_idx += mvs_in_blk;
1632*c83a76b0SSuyog Pawar
1633*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1634*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1635*c83a76b0SSuyog Pawar
1636*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
1637*c83a76b0SSuyog Pawar {
1638*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
1639*c83a76b0SSuyog Pawar ps_search_node,
1640*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
1641*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1642*c83a76b0SSuyog Pawar i1_ref_idx,
1643*c83a76b0SSuyog Pawar shift);
1644*c83a76b0SSuyog Pawar }
1645*c83a76b0SSuyog Pawar else
1646*c83a76b0SSuyog Pawar {
1647*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1648*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1649*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1650*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1651*c83a76b0SSuyog Pawar }
1652*c83a76b0SSuyog Pawar }
1653*c83a76b0SSuyog Pawar
1654*c83a76b0SSuyog Pawar /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1655*c83a76b0SSuyog Pawar if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1656*c83a76b0SSuyog Pawar {
1657*c83a76b0SSuyog Pawar ps_search_node++;
1658*c83a76b0SSuyog Pawar ps_mv += (mvs_in_blk * (jump >> 1));
1659*c83a76b0SSuyog Pawar pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1660*c83a76b0SSuyog Pawar
1661*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1662*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1663*c83a76b0SSuyog Pawar
1664*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
1665*c83a76b0SSuyog Pawar {
1666*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
1667*c83a76b0SSuyog Pawar ps_search_node,
1668*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
1669*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1670*c83a76b0SSuyog Pawar i1_ref_idx,
1671*c83a76b0SSuyog Pawar shift);
1672*c83a76b0SSuyog Pawar }
1673*c83a76b0SSuyog Pawar else
1674*c83a76b0SSuyog Pawar {
1675*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1676*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1677*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1678*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1679*c83a76b0SSuyog Pawar }
1680*c83a76b0SSuyog Pawar }
1681*c83a76b0SSuyog Pawar else
1682*c83a76b0SSuyog Pawar {
1683*c83a76b0SSuyog Pawar ps_search_node++;
1684*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1685*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1686*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1687*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1688*c83a76b0SSuyog Pawar }
1689*c83a76b0SSuyog Pawar
1690*c83a76b0SSuyog Pawar /* Move to tr: this will be tr w.r.t. the blk being searched */
1691*c83a76b0SSuyog Pawar ps_search_node++;
1692*c83a76b0SSuyog Pawar if(tr_avail == 0)
1693*c83a76b0SSuyog Pawar {
1694*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1695*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1696*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1697*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1698*c83a76b0SSuyog Pawar ps_search_node->u1_subpel_done = 0;
1699*c83a76b0SSuyog Pawar }
1700*c83a76b0SSuyog Pawar else
1701*c83a76b0SSuyog Pawar {
1702*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
1703*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1704*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1705*c83a76b0SSuyog Pawar
1706*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1707*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1708*c83a76b0SSuyog Pawar
1709*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
1710*c83a76b0SSuyog Pawar {
1711*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
1712*c83a76b0SSuyog Pawar ps_search_node,
1713*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
1714*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1715*c83a76b0SSuyog Pawar i1_ref_idx,
1716*c83a76b0SSuyog Pawar shift);
1717*c83a76b0SSuyog Pawar }
1718*c83a76b0SSuyog Pawar else
1719*c83a76b0SSuyog Pawar {
1720*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1721*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1722*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1723*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1724*c83a76b0SSuyog Pawar }
1725*c83a76b0SSuyog Pawar }
1726*c83a76b0SSuyog Pawar
1727*c83a76b0SSuyog Pawar /* Move to left */
1728*c83a76b0SSuyog Pawar {
1729*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
1730*c83a76b0SSuyog Pawar ps_search_node = ps_left_neighbours;
1731*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + mvs_in_row;
1732*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1733*c83a76b0SSuyog Pawar
1734*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1735*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1736*c83a76b0SSuyog Pawar
1737*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
1738*c83a76b0SSuyog Pawar {
1739*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
1740*c83a76b0SSuyog Pawar ps_search_node,
1741*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
1742*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1743*c83a76b0SSuyog Pawar i1_ref_idx,
1744*c83a76b0SSuyog Pawar shift);
1745*c83a76b0SSuyog Pawar }
1746*c83a76b0SSuyog Pawar else
1747*c83a76b0SSuyog Pawar {
1748*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1749*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1750*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1751*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1752*c83a76b0SSuyog Pawar }
1753*c83a76b0SSuyog Pawar }
1754*c83a76b0SSuyog Pawar
1755*c83a76b0SSuyog Pawar /* Move to l1 */
1756*c83a76b0SSuyog Pawar if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1757*c83a76b0SSuyog Pawar {
1758*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
1759*c83a76b0SSuyog Pawar ps_search_node++;
1760*c83a76b0SSuyog Pawar ps_mv += (mvs_in_row * (jump >> 1));
1761*c83a76b0SSuyog Pawar pi1_ref_idx += (mvs_in_row * (jump >> 1));
1762*c83a76b0SSuyog Pawar
1763*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1764*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1765*c83a76b0SSuyog Pawar
1766*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
1767*c83a76b0SSuyog Pawar {
1768*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
1769*c83a76b0SSuyog Pawar ps_search_node,
1770*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
1771*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1772*c83a76b0SSuyog Pawar i1_ref_idx,
1773*c83a76b0SSuyog Pawar shift);
1774*c83a76b0SSuyog Pawar }
1775*c83a76b0SSuyog Pawar else
1776*c83a76b0SSuyog Pawar {
1777*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1778*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1779*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1780*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1781*c83a76b0SSuyog Pawar }
1782*c83a76b0SSuyog Pawar }
1783*c83a76b0SSuyog Pawar else
1784*c83a76b0SSuyog Pawar {
1785*c83a76b0SSuyog Pawar ps_search_node++;
1786*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1787*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1788*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1789*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1790*c83a76b0SSuyog Pawar }
1791*c83a76b0SSuyog Pawar
1792*c83a76b0SSuyog Pawar /* Move to bl */
1793*c83a76b0SSuyog Pawar ps_search_node++;
1794*c83a76b0SSuyog Pawar if(bl_avail == 0)
1795*c83a76b0SSuyog Pawar {
1796*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1797*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1798*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1799*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1800*c83a76b0SSuyog Pawar }
1801*c83a76b0SSuyog Pawar else
1802*c83a76b0SSuyog Pawar {
1803*c83a76b0SSuyog Pawar /* ps_mv and pi1_ref_idx now point to the top left locn */
1804*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1805*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1806*c83a76b0SSuyog Pawar
1807*c83a76b0SSuyog Pawar i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1808*c83a76b0SSuyog Pawar pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1809*c83a76b0SSuyog Pawar
1810*c83a76b0SSuyog Pawar if(-1 != i4_mv_pos_in_implicit_array)
1811*c83a76b0SSuyog Pawar {
1812*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(
1813*c83a76b0SSuyog Pawar ps_search_node,
1814*c83a76b0SSuyog Pawar &ps_mv[i4_mv_pos_in_implicit_array],
1815*c83a76b0SSuyog Pawar &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1816*c83a76b0SSuyog Pawar i1_ref_idx,
1817*c83a76b0SSuyog Pawar shift);
1818*c83a76b0SSuyog Pawar }
1819*c83a76b0SSuyog Pawar else
1820*c83a76b0SSuyog Pawar {
1821*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 0;
1822*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvx = 0;
1823*c83a76b0SSuyog Pawar ps_search_node->s_mv.i2_mvy = 0;
1824*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
1825*c83a76b0SSuyog Pawar }
1826*c83a76b0SSuyog Pawar }
1827*c83a76b0SSuyog Pawar }
1828*c83a76b0SSuyog Pawar
1829*c83a76b0SSuyog Pawar /**
1830*c83a76b0SSuyog Pawar ********************************************************************************
1831*c83a76b0SSuyog Pawar * @fn void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
1832*c83a76b0SSuyog Pawar * S32 i4_blk_x,
1833*c83a76b0SSuyog Pawar * S32 i4_blk_y,
1834*c83a76b0SSuyog Pawar * mvgrid_t *ps_mv_grid ,
1835*c83a76b0SSuyog Pawar * S32 i1_ref_id)
1836*c83a76b0SSuyog Pawar *
1837*c83a76b0SSuyog Pawar * @brief The 18x18 MV grid for a ctb, is filled in first row and 1st col
1838*c83a76b0SSuyog Pawar * this corresponds to neighbours (TL, T, TR, L, BL)
1839*c83a76b0SSuyog Pawar *
1840*c83a76b0SSuyog Pawar * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1841*c83a76b0SSuyog Pawar *
1842*c83a76b0SSuyog Pawar * @param[in] blk_x : x coordinate of the block in mv bank
1843*c83a76b0SSuyog Pawar *
1844*c83a76b0SSuyog Pawar * @param[in] blk_y : y coordinate of the block in mv bank
1845*c83a76b0SSuyog Pawar *
1846*c83a76b0SSuyog Pawar * @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
1847*c83a76b0SSuyog Pawar *
1848*c83a76b0SSuyog Pawar * @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
1849*c83a76b0SSuyog Pawar * results, useful if multiple ref idx candts maintained separately.
1850*c83a76b0SSuyog Pawar *
1851*c83a76b0SSuyog Pawar * @return void
1852*c83a76b0SSuyog Pawar ********************************************************************************
1853*c83a76b0SSuyog Pawar */
hme_fill_ctb_neighbour_mvs(layer_ctxt_t * ps_curr_layer,S32 blk_x,S32 blk_y,mv_grid_t * ps_mv_grid,U08 u1_pred_dir_ctr,U08 u1_default_ref_id,S32 i4_num_act_ref_l0)1854*c83a76b0SSuyog Pawar void hme_fill_ctb_neighbour_mvs(
1855*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
1856*c83a76b0SSuyog Pawar S32 blk_x,
1857*c83a76b0SSuyog Pawar S32 blk_y,
1858*c83a76b0SSuyog Pawar mv_grid_t *ps_mv_grid,
1859*c83a76b0SSuyog Pawar U08 u1_pred_dir_ctr,
1860*c83a76b0SSuyog Pawar U08 u1_default_ref_id,
1861*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0)
1862*c83a76b0SSuyog Pawar {
1863*c83a76b0SSuyog Pawar search_node_t *ps_grid_node;
1864*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1865*c83a76b0SSuyog Pawar S32 i4_offset;
1866*c83a76b0SSuyog Pawar hme_mv_t *ps_mv, *ps_mv_base;
1867*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, *pi1_ref_idx_base;
1868*c83a76b0SSuyog Pawar S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row;
1869*c83a76b0SSuyog Pawar
1870*c83a76b0SSuyog Pawar if(ps_layer_mvbank->e_blk_size == BLK_4x4)
1871*c83a76b0SSuyog Pawar {
1872*c83a76b0SSuyog Pawar /* searching 16x16, mvs are for 4x4 */
1873*c83a76b0SSuyog Pawar jump = 1;
1874*c83a76b0SSuyog Pawar blk_x <<= 2;
1875*c83a76b0SSuyog Pawar blk_y <<= 2;
1876*c83a76b0SSuyog Pawar }
1877*c83a76b0SSuyog Pawar else
1878*c83a76b0SSuyog Pawar {
1879*c83a76b0SSuyog Pawar /* Searching 16x16, mvs are for 8x8 */
1880*c83a76b0SSuyog Pawar blk_x <<= 1;
1881*c83a76b0SSuyog Pawar blk_y <<= 1;
1882*c83a76b0SSuyog Pawar }
1883*c83a76b0SSuyog Pawar ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16);
1884*c83a76b0SSuyog Pawar
1885*c83a76b0SSuyog Pawar mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1886*c83a76b0SSuyog Pawar mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1887*c83a76b0SSuyog Pawar
1888*c83a76b0SSuyog Pawar /* Adjust the blk coord to point to top left locn */
1889*c83a76b0SSuyog Pawar blk_x -= 1;
1890*c83a76b0SSuyog Pawar blk_y -= 1;
1891*c83a76b0SSuyog Pawar
1892*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
1893*c83a76b0SSuyog Pawar i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1894*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
1895*c83a76b0SSuyog Pawar
1896*c83a76b0SSuyog Pawar i4_offset += (u1_pred_dir_ctr == 1);
1897*c83a76b0SSuyog Pawar
1898*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1899*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1900*c83a76b0SSuyog Pawar
1901*c83a76b0SSuyog Pawar ps_mv_base = ps_mv;
1902*c83a76b0SSuyog Pawar pi1_ref_idx_base = pi1_ref_idx;
1903*c83a76b0SSuyog Pawar
1904*c83a76b0SSuyog Pawar /* the 0, 0 entry of the grid pts to top left for the ctb */
1905*c83a76b0SSuyog Pawar ps_grid_node = &ps_mv_grid->as_node[0];
1906*c83a76b0SSuyog Pawar
1907*c83a76b0SSuyog Pawar /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */
1908*c83a76b0SSuyog Pawar for(i = 0; i < 18; i++)
1909*c83a76b0SSuyog Pawar {
1910*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1911*c83a76b0SSuyog Pawar ps_grid_node++;
1912*c83a76b0SSuyog Pawar inc = 1;
1913*c83a76b0SSuyog Pawar /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1914*c83a76b0SSuyog Pawar if(i & 1)
1915*c83a76b0SSuyog Pawar inc = jump;
1916*c83a76b0SSuyog Pawar
1917*c83a76b0SSuyog Pawar ps_mv += (mvs_in_blk * inc);
1918*c83a76b0SSuyog Pawar pi1_ref_idx += (mvs_in_blk * inc);
1919*c83a76b0SSuyog Pawar }
1920*c83a76b0SSuyog Pawar
1921*c83a76b0SSuyog Pawar ps_mv = ps_mv_base + mvs_in_row;
1922*c83a76b0SSuyog Pawar pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1923*c83a76b0SSuyog Pawar
1924*c83a76b0SSuyog Pawar /* now copy left 16 left mvs */
1925*c83a76b0SSuyog Pawar ps_grid_node = &ps_mv_grid->as_node[0];
1926*c83a76b0SSuyog Pawar ps_grid_node += (ps_mv_grid->i4_stride);
1927*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
1928*c83a76b0SSuyog Pawar {
1929*c83a76b0SSuyog Pawar COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1930*c83a76b0SSuyog Pawar ps_grid_node += ps_mv_grid->i4_stride;
1931*c83a76b0SSuyog Pawar inc = 1;
1932*c83a76b0SSuyog Pawar /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1933*c83a76b0SSuyog Pawar if(!(i & 1))
1934*c83a76b0SSuyog Pawar inc = jump;
1935*c83a76b0SSuyog Pawar
1936*c83a76b0SSuyog Pawar ps_mv += (mvs_in_row * inc);
1937*c83a76b0SSuyog Pawar pi1_ref_idx += (mvs_in_row * inc);
1938*c83a76b0SSuyog Pawar }
1939*c83a76b0SSuyog Pawar /* last one set to invalid as bottom left not yet encoded */
1940*c83a76b0SSuyog Pawar ps_grid_node->u1_is_avail = 0;
1941*c83a76b0SSuyog Pawar }
1942*c83a76b0SSuyog Pawar
hme_reset_wkg_mem(buf_mgr_t * ps_buf_mgr)1943*c83a76b0SSuyog Pawar void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr)
1944*c83a76b0SSuyog Pawar {
1945*c83a76b0SSuyog Pawar ps_buf_mgr->i4_used = 0;
1946*c83a76b0SSuyog Pawar }
hme_init_wkg_mem(buf_mgr_t * ps_buf_mgr,U08 * pu1_mem,S32 size)1947*c83a76b0SSuyog Pawar void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size)
1948*c83a76b0SSuyog Pawar {
1949*c83a76b0SSuyog Pawar ps_buf_mgr->pu1_wkg_mem = pu1_mem;
1950*c83a76b0SSuyog Pawar ps_buf_mgr->i4_total = size;
1951*c83a76b0SSuyog Pawar hme_reset_wkg_mem(ps_buf_mgr);
1952*c83a76b0SSuyog Pawar }
1953*c83a76b0SSuyog Pawar
hme_init_mv_grid(mv_grid_t * ps_mv_grid)1954*c83a76b0SSuyog Pawar void hme_init_mv_grid(mv_grid_t *ps_mv_grid)
1955*c83a76b0SSuyog Pawar {
1956*c83a76b0SSuyog Pawar S32 i, j;
1957*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
1958*c83a76b0SSuyog Pawar /*************************************************************************/
1959*c83a76b0SSuyog Pawar /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/
1960*c83a76b0SSuyog Pawar /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */
1961*c83a76b0SSuyog Pawar /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/
1962*c83a76b0SSuyog Pawar /* left and top edges of this grid is filled run time. The center portion*/
1963*c83a76b0SSuyog Pawar /* represents the actual CTB MVs (16x16) and is also filled run time. */
1964*c83a76b0SSuyog Pawar /* However, the availability is always set as available (init time) */
1965*c83a76b0SSuyog Pawar /*************************************************************************/
1966*c83a76b0SSuyog Pawar ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID;
1967*c83a76b0SSuyog Pawar ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD;
1968*c83a76b0SSuyog Pawar ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset];
1969*c83a76b0SSuyog Pawar for(i = 0; i < 16; i++)
1970*c83a76b0SSuyog Pawar {
1971*c83a76b0SSuyog Pawar for(j = 0; j < 16; j++)
1972*c83a76b0SSuyog Pawar {
1973*c83a76b0SSuyog Pawar ps_search_node[j].u1_is_avail = 1;
1974*c83a76b0SSuyog Pawar }
1975*c83a76b0SSuyog Pawar
1976*c83a76b0SSuyog Pawar ps_search_node += ps_mv_grid->i4_stride;
1977*c83a76b0SSuyog Pawar }
1978*c83a76b0SSuyog Pawar }
1979*c83a76b0SSuyog Pawar /**
1980*c83a76b0SSuyog Pawar ********************************************************************************
1981*c83a76b0SSuyog Pawar * @fn void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1982*c83a76b0SSuyog Pawar *
1983*c83a76b0SSuyog Pawar * @brief Pads horizontally to left side. Each pixel replicated across a line
1984*c83a76b0SSuyog Pawar *
1985*c83a76b0SSuyog Pawar * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
1986*c83a76b0SSuyog Pawar *
1987*c83a76b0SSuyog Pawar * @param[in] stride : stride of destination buffer
1988*c83a76b0SSuyog Pawar *
1989*c83a76b0SSuyog Pawar * @param[in] pad_wd : Amt of horizontal padding to be done
1990*c83a76b0SSuyog Pawar *
1991*c83a76b0SSuyog Pawar * @param[in] pad_ht : Number of lines for which horizontal padding to be done
1992*c83a76b0SSuyog Pawar *
1993*c83a76b0SSuyog Pawar * @return void
1994*c83a76b0SSuyog Pawar ********************************************************************************
1995*c83a76b0SSuyog Pawar */
hme_pad_left(U08 * pu1_dst,S32 stride,S32 pad_wd,S32 pad_ht)1996*c83a76b0SSuyog Pawar void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1997*c83a76b0SSuyog Pawar {
1998*c83a76b0SSuyog Pawar S32 i, j;
1999*c83a76b0SSuyog Pawar U08 u1_val;
2000*c83a76b0SSuyog Pawar for(i = 0; i < pad_ht; i++)
2001*c83a76b0SSuyog Pawar {
2002*c83a76b0SSuyog Pawar u1_val = pu1_dst[0];
2003*c83a76b0SSuyog Pawar for(j = -pad_wd; j < 0; j++)
2004*c83a76b0SSuyog Pawar pu1_dst[j] = u1_val;
2005*c83a76b0SSuyog Pawar
2006*c83a76b0SSuyog Pawar pu1_dst += stride;
2007*c83a76b0SSuyog Pawar }
2008*c83a76b0SSuyog Pawar }
2009*c83a76b0SSuyog Pawar /**
2010*c83a76b0SSuyog Pawar ********************************************************************************
2011*c83a76b0SSuyog Pawar * @fn void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2012*c83a76b0SSuyog Pawar *
2013*c83a76b0SSuyog Pawar * @brief Pads horizontally to rt side. Each pixel replicated across a line
2014*c83a76b0SSuyog Pawar *
2015*c83a76b0SSuyog Pawar * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
2016*c83a76b0SSuyog Pawar *
2017*c83a76b0SSuyog Pawar * @param[in] stride : stride of destination buffer
2018*c83a76b0SSuyog Pawar *
2019*c83a76b0SSuyog Pawar * @param[in] pad_wd : Amt of horizontal padding to be done
2020*c83a76b0SSuyog Pawar *
2021*c83a76b0SSuyog Pawar * @param[in] pad_ht : Number of lines for which horizontal padding to be done
2022*c83a76b0SSuyog Pawar *
2023*c83a76b0SSuyog Pawar * @return void
2024*c83a76b0SSuyog Pawar ********************************************************************************
2025*c83a76b0SSuyog Pawar */
hme_pad_right(U08 * pu1_dst,S32 stride,S32 pad_wd,S32 pad_ht)2026*c83a76b0SSuyog Pawar void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2027*c83a76b0SSuyog Pawar {
2028*c83a76b0SSuyog Pawar S32 i, j;
2029*c83a76b0SSuyog Pawar U08 u1_val;
2030*c83a76b0SSuyog Pawar for(i = 0; i < pad_ht; i++)
2031*c83a76b0SSuyog Pawar {
2032*c83a76b0SSuyog Pawar u1_val = pu1_dst[0];
2033*c83a76b0SSuyog Pawar for(j = 1; j <= pad_wd; j++)
2034*c83a76b0SSuyog Pawar pu1_dst[j] = u1_val;
2035*c83a76b0SSuyog Pawar
2036*c83a76b0SSuyog Pawar pu1_dst += stride;
2037*c83a76b0SSuyog Pawar }
2038*c83a76b0SSuyog Pawar }
2039*c83a76b0SSuyog Pawar /**
2040*c83a76b0SSuyog Pawar ********************************************************************************
2041*c83a76b0SSuyog Pawar * @fn void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2042*c83a76b0SSuyog Pawar *
2043*c83a76b0SSuyog Pawar * @brief Pads vertically on the top. Repeats the top line for top padding
2044*c83a76b0SSuyog Pawar *
2045*c83a76b0SSuyog Pawar * @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2046*c83a76b0SSuyog Pawar *
2047*c83a76b0SSuyog Pawar * @param[in] stride : stride of destination buffer
2048*c83a76b0SSuyog Pawar *
2049*c83a76b0SSuyog Pawar * @param[in] pad_ht : Amt of vertical padding to be done
2050*c83a76b0SSuyog Pawar *
2051*c83a76b0SSuyog Pawar * @param[in] pad_wd : Number of columns for which vertical padding to be done
2052*c83a76b0SSuyog Pawar *
2053*c83a76b0SSuyog Pawar * @return void
2054*c83a76b0SSuyog Pawar ********************************************************************************
2055*c83a76b0SSuyog Pawar */
hme_pad_top(U08 * pu1_dst,S32 stride,S32 pad_ht,S32 pad_wd)2056*c83a76b0SSuyog Pawar void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2057*c83a76b0SSuyog Pawar {
2058*c83a76b0SSuyog Pawar S32 i;
2059*c83a76b0SSuyog Pawar for(i = 1; i <= pad_ht; i++)
2060*c83a76b0SSuyog Pawar memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd);
2061*c83a76b0SSuyog Pawar }
2062*c83a76b0SSuyog Pawar /**
2063*c83a76b0SSuyog Pawar ********************************************************************************
2064*c83a76b0SSuyog Pawar * @fn void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2065*c83a76b0SSuyog Pawar *
2066*c83a76b0SSuyog Pawar * @brief Pads vertically on the bot. Repeats the top line for top padding
2067*c83a76b0SSuyog Pawar *
2068*c83a76b0SSuyog Pawar * @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2069*c83a76b0SSuyog Pawar *
2070*c83a76b0SSuyog Pawar * @param[in] stride : stride of destination buffer
2071*c83a76b0SSuyog Pawar *
2072*c83a76b0SSuyog Pawar * @param[in] pad_ht : Amt of vertical padding to be done
2073*c83a76b0SSuyog Pawar *
2074*c83a76b0SSuyog Pawar * @param[in] pad_wd : Number of columns for which vertical padding to be done
2075*c83a76b0SSuyog Pawar *
2076*c83a76b0SSuyog Pawar * @return void
2077*c83a76b0SSuyog Pawar ********************************************************************************
2078*c83a76b0SSuyog Pawar */
hme_pad_bot(U08 * pu1_dst,S32 stride,S32 pad_ht,S32 pad_wd)2079*c83a76b0SSuyog Pawar void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2080*c83a76b0SSuyog Pawar {
2081*c83a76b0SSuyog Pawar S32 i;
2082*c83a76b0SSuyog Pawar for(i = 1; i <= pad_ht; i++)
2083*c83a76b0SSuyog Pawar memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd);
2084*c83a76b0SSuyog Pawar }
2085*c83a76b0SSuyog Pawar
2086*c83a76b0SSuyog Pawar /**
2087*c83a76b0SSuyog Pawar ********************************************************************************
2088*c83a76b0SSuyog Pawar * @fn void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer, S32 pos_x,
2089*c83a76b0SSuyog Pawar * S32 pos_y, S32 size)
2090*c83a76b0SSuyog Pawar *
2091*c83a76b0SSuyog Pawar * @brief Does weighting of the input in case the search needs to happen
2092*c83a76b0SSuyog Pawar * with reference frames weighted
2093*c83a76b0SSuyog Pawar *
2094*c83a76b0SSuyog Pawar * @param[in] ps_curr_layer: layer ctxt
2095*c83a76b0SSuyog Pawar *
2096*c83a76b0SSuyog Pawar * @param[in] pos_x : x coordinate of the input blk in the picture
2097*c83a76b0SSuyog Pawar *
2098*c83a76b0SSuyog Pawar * @param[in] pos_y : y coordinate of hte input blk in the picture
2099*c83a76b0SSuyog Pawar *
2100*c83a76b0SSuyog Pawar * @param[in] size : size of the input block
2101*c83a76b0SSuyog Pawar *
2102*c83a76b0SSuyog Pawar * @param[in] num_ref : Number of reference frames
2103*c83a76b0SSuyog Pawar *
2104*c83a76b0SSuyog Pawar * @return void
2105*c83a76b0SSuyog Pawar ********************************************************************************
2106*c83a76b0SSuyog Pawar */
hme_get_wt_inp(layer_ctxt_t * ps_curr_layer,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 dst_stride,S32 pos_x,S32 pos_y,S32 size,S32 num_ref,U08 u1_is_wt_pred_on)2107*c83a76b0SSuyog Pawar void hme_get_wt_inp(
2108*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
2109*c83a76b0SSuyog Pawar wgt_pred_ctxt_t *ps_wt_inp_prms,
2110*c83a76b0SSuyog Pawar S32 dst_stride,
2111*c83a76b0SSuyog Pawar S32 pos_x,
2112*c83a76b0SSuyog Pawar S32 pos_y,
2113*c83a76b0SSuyog Pawar S32 size,
2114*c83a76b0SSuyog Pawar S32 num_ref,
2115*c83a76b0SSuyog Pawar U08 u1_is_wt_pred_on)
2116*c83a76b0SSuyog Pawar {
2117*c83a76b0SSuyog Pawar S32 ref, i, j;
2118*c83a76b0SSuyog Pawar U08 *pu1_src, *pu1_dst, *pu1_src_tmp;
2119*c83a76b0SSuyog Pawar S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc;
2120*c83a76b0SSuyog Pawar S32 x_count, y_count;
2121*c83a76b0SSuyog Pawar
2122*c83a76b0SSuyog Pawar /* Fixed source */
2123*c83a76b0SSuyog Pawar pu1_src = ps_curr_layer->pu1_inp;
2124*c83a76b0SSuyog Pawar
2125*c83a76b0SSuyog Pawar /* Make sure the start positions of block are inside frame limits */
2126*c83a76b0SSuyog Pawar pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1);
2127*c83a76b0SSuyog Pawar pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1);
2128*c83a76b0SSuyog Pawar
2129*c83a76b0SSuyog Pawar pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride));
2130*c83a76b0SSuyog Pawar
2131*c83a76b0SSuyog Pawar /* In case we handle imcomplete CTBs, we copy only as much as reqd */
2132*c83a76b0SSuyog Pawar /* from input buffers to prevent out of bound accesses. In this */
2133*c83a76b0SSuyog Pawar /* case, we do padding in x or y or both dirns */
2134*c83a76b0SSuyog Pawar x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
2135*c83a76b0SSuyog Pawar y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
2136*c83a76b0SSuyog Pawar
2137*c83a76b0SSuyog Pawar for(i = 0; i < num_ref + 1; i++)
2138*c83a76b0SSuyog Pawar {
2139*c83a76b0SSuyog Pawar ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
2140*c83a76b0SSuyog Pawar }
2141*c83a76b0SSuyog Pawar
2142*c83a76b0SSuyog Pawar /* Run thro all ref ids */
2143*c83a76b0SSuyog Pawar for(ref = 0; ref < num_ref + 1; ref++)
2144*c83a76b0SSuyog Pawar {
2145*c83a76b0SSuyog Pawar S32 wt, off;
2146*c83a76b0SSuyog Pawar S32 inv_wt;
2147*c83a76b0SSuyog Pawar
2148*c83a76b0SSuyog Pawar pu1_src_tmp = pu1_src;
2149*c83a76b0SSuyog Pawar
2150*c83a76b0SSuyog Pawar /* Each ref id may have differnet wt/offset. */
2151*c83a76b0SSuyog Pawar /* So we have unique inp buf for each ref id */
2152*c83a76b0SSuyog Pawar pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2153*c83a76b0SSuyog Pawar
2154*c83a76b0SSuyog Pawar if(ref == num_ref)
2155*c83a76b0SSuyog Pawar {
2156*c83a76b0SSuyog Pawar /* last ref will be non weighted input */
2157*c83a76b0SSuyog Pawar for(i = 0; i < y_count; i++)
2158*c83a76b0SSuyog Pawar {
2159*c83a76b0SSuyog Pawar for(j = 0; j < x_count; j++)
2160*c83a76b0SSuyog Pawar {
2161*c83a76b0SSuyog Pawar pu1_dst[j] = pu1_src_tmp[j];
2162*c83a76b0SSuyog Pawar }
2163*c83a76b0SSuyog Pawar pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2164*c83a76b0SSuyog Pawar pu1_dst += dst_stride;
2165*c83a76b0SSuyog Pawar }
2166*c83a76b0SSuyog Pawar }
2167*c83a76b0SSuyog Pawar else
2168*c83a76b0SSuyog Pawar {
2169*c83a76b0SSuyog Pawar /* Wt and off specific to this ref id */
2170*c83a76b0SSuyog Pawar wt = ps_wt_inp_prms->a_wpred_wt[ref];
2171*c83a76b0SSuyog Pawar inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref];
2172*c83a76b0SSuyog Pawar off = ps_wt_inp_prms->a_wpred_off[ref];
2173*c83a76b0SSuyog Pawar
2174*c83a76b0SSuyog Pawar /* Generate size*size worth of modified input samples */
2175*c83a76b0SSuyog Pawar for(i = 0; i < y_count; i++)
2176*c83a76b0SSuyog Pawar {
2177*c83a76b0SSuyog Pawar for(j = 0; j < x_count; j++)
2178*c83a76b0SSuyog Pawar {
2179*c83a76b0SSuyog Pawar S32 tmp;
2180*c83a76b0SSuyog Pawar
2181*c83a76b0SSuyog Pawar /* Since we scale input, we use inverse transform of wt pred */
2182*c83a76b0SSuyog Pawar //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc);
2183*c83a76b0SSuyog Pawar tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc);
2184*c83a76b0SSuyog Pawar pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255));
2185*c83a76b0SSuyog Pawar }
2186*c83a76b0SSuyog Pawar pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2187*c83a76b0SSuyog Pawar pu1_dst += dst_stride;
2188*c83a76b0SSuyog Pawar }
2189*c83a76b0SSuyog Pawar }
2190*c83a76b0SSuyog Pawar
2191*c83a76b0SSuyog Pawar /* Check and do padding in right direction if need be */
2192*c83a76b0SSuyog Pawar pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2193*c83a76b0SSuyog Pawar if(x_count != size)
2194*c83a76b0SSuyog Pawar {
2195*c83a76b0SSuyog Pawar hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
2196*c83a76b0SSuyog Pawar }
2197*c83a76b0SSuyog Pawar
2198*c83a76b0SSuyog Pawar /* Check and do padding in bottom directino if need be */
2199*c83a76b0SSuyog Pawar if(y_count != size)
2200*c83a76b0SSuyog Pawar {
2201*c83a76b0SSuyog Pawar hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
2202*c83a76b0SSuyog Pawar }
2203*c83a76b0SSuyog Pawar }
2204*c83a76b0SSuyog Pawar }
2205*c83a76b0SSuyog Pawar /**
2206*c83a76b0SSuyog Pawar ****************************************************************************************
2207*c83a76b0SSuyog Pawar * @fn hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst,
2208*c83a76b0SSuyog Pawar * pu_result_t *ps_pu_results_inp,
2209*c83a76b0SSuyog Pawar * UWORD8 u1_num_results_per_part,
2210*c83a76b0SSuyog Pawar * UWORD8 u1_num_best_cand)
2211*c83a76b0SSuyog Pawar *
2212*c83a76b0SSuyog Pawar * @brief Does the candidate evaluation across all the current candidates and returns
2213*c83a76b0SSuyog Pawar * the best two or one candidates across given lists
2214*c83a76b0SSuyog Pawar *
2215*c83a76b0SSuyog Pawar * @param[in] - ps_pu_results_inp : Pointer to the input candidates
2216*c83a76b0SSuyog Pawar * - u1_num_results_per_part: Number of available candidates
2217*c83a76b0SSuyog Pawar *
2218*c83a76b0SSuyog Pawar * @param[out] - ps_pu_results_dst : Pointer to best PU results
2219*c83a76b0SSuyog Pawar *
2220*c83a76b0SSuyog Pawar ****************************************************************************************
2221*c83a76b0SSuyog Pawar */
hme_pick_best_pu_cand(pu_result_t * ps_pu_results_dst,pu_result_t * ps_pu_results_list0,pu_result_t * ps_pu_results_list1,UWORD8 u1_num_results_per_part_l0,UWORD8 u1_num_results_per_part_l1,UWORD8 u1_candidate_rank)2222*c83a76b0SSuyog Pawar void hme_pick_best_pu_cand(
2223*c83a76b0SSuyog Pawar pu_result_t *ps_pu_results_dst,
2224*c83a76b0SSuyog Pawar pu_result_t *ps_pu_results_list0,
2225*c83a76b0SSuyog Pawar pu_result_t *ps_pu_results_list1,
2226*c83a76b0SSuyog Pawar UWORD8 u1_num_results_per_part_l0,
2227*c83a76b0SSuyog Pawar UWORD8 u1_num_results_per_part_l1,
2228*c83a76b0SSuyog Pawar UWORD8 u1_candidate_rank)
2229*c83a76b0SSuyog Pawar {
2230*c83a76b0SSuyog Pawar struct cand_pos_data
2231*c83a76b0SSuyog Pawar {
2232*c83a76b0SSuyog Pawar U08 u1_cand_list_id;
2233*c83a76b0SSuyog Pawar
2234*c83a76b0SSuyog Pawar U08 u1_cand_id_in_cand_list;
2235*c83a76b0SSuyog Pawar } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2236*c83a76b0SSuyog Pawar
2237*c83a76b0SSuyog Pawar S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2238*c83a76b0SSuyog Pawar U08 i, j;
2239*c83a76b0SSuyog Pawar
2240*c83a76b0SSuyog Pawar for(i = 0; i < u1_num_results_per_part_l0; i++)
2241*c83a76b0SSuyog Pawar {
2242*c83a76b0SSuyog Pawar ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost;
2243*c83a76b0SSuyog Pawar as_cand_pos_data[i].u1_cand_id_in_cand_list = i;
2244*c83a76b0SSuyog Pawar as_cand_pos_data[i].u1_cand_list_id = 0;
2245*c83a76b0SSuyog Pawar }
2246*c83a76b0SSuyog Pawar
2247*c83a76b0SSuyog Pawar for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++)
2248*c83a76b0SSuyog Pawar {
2249*c83a76b0SSuyog Pawar ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost;
2250*c83a76b0SSuyog Pawar as_cand_pos_data[j].u1_cand_id_in_cand_list = i;
2251*c83a76b0SSuyog Pawar as_cand_pos_data[j].u1_cand_list_id = 1;
2252*c83a76b0SSuyog Pawar }
2253*c83a76b0SSuyog Pawar
2254*c83a76b0SSuyog Pawar SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
2255*c83a76b0SSuyog Pawar ai4_costs,
2256*c83a76b0SSuyog Pawar as_cand_pos_data,
2257*c83a76b0SSuyog Pawar u1_num_results_per_part_l0 + u1_num_results_per_part_l1,
2258*c83a76b0SSuyog Pawar struct cand_pos_data);
2259*c83a76b0SSuyog Pawar
2260*c83a76b0SSuyog Pawar if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id)
2261*c83a76b0SSuyog Pawar {
2262*c83a76b0SSuyog Pawar ps_pu_results_dst[0] =
2263*c83a76b0SSuyog Pawar ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2264*c83a76b0SSuyog Pawar }
2265*c83a76b0SSuyog Pawar else
2266*c83a76b0SSuyog Pawar {
2267*c83a76b0SSuyog Pawar ps_pu_results_dst[0] =
2268*c83a76b0SSuyog Pawar ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2269*c83a76b0SSuyog Pawar }
2270*c83a76b0SSuyog Pawar }
2271*c83a76b0SSuyog Pawar
2272*c83a76b0SSuyog Pawar /* Returns the number of candidates */
hme_tu_recur_cand_harvester(part_type_results_t * ps_cand_container,inter_pu_results_t * ps_pu_data,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_part_mask)2273*c83a76b0SSuyog Pawar static S32 hme_tu_recur_cand_harvester(
2274*c83a76b0SSuyog Pawar part_type_results_t *ps_cand_container,
2275*c83a76b0SSuyog Pawar inter_pu_results_t *ps_pu_data,
2276*c83a76b0SSuyog Pawar inter_ctb_prms_t *ps_inter_ctb_prms,
2277*c83a76b0SSuyog Pawar S32 i4_part_mask)
2278*c83a76b0SSuyog Pawar {
2279*c83a76b0SSuyog Pawar part_type_results_t s_cand_data;
2280*c83a76b0SSuyog Pawar
2281*c83a76b0SSuyog Pawar U08 i, j;
2282*c83a76b0SSuyog Pawar PART_ID_T e_part_id;
2283*c83a76b0SSuyog Pawar
2284*c83a76b0SSuyog Pawar S32 i4_num_cands = 0;
2285*c83a76b0SSuyog Pawar
2286*c83a76b0SSuyog Pawar /* 2Nx2N part_type decision part */
2287*c83a76b0SSuyog Pawar if(i4_part_mask & ENABLE_2Nx2N)
2288*c83a76b0SSuyog Pawar {
2289*c83a76b0SSuyog Pawar U08 u1_num_candt_to_pick;
2290*c83a76b0SSuyog Pawar
2291*c83a76b0SSuyog Pawar e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0];
2292*c83a76b0SSuyog Pawar
2293*c83a76b0SSuyog Pawar ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1);
2294*c83a76b0SSuyog Pawar
2295*c83a76b0SSuyog Pawar if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N))
2296*c83a76b0SSuyog Pawar {
2297*c83a76b0SSuyog Pawar u1_num_candt_to_pick =
2298*c83a76b0SSuyog Pawar MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands,
2299*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2300*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2301*c83a76b0SSuyog Pawar }
2302*c83a76b0SSuyog Pawar else
2303*c83a76b0SSuyog Pawar {
2304*c83a76b0SSuyog Pawar u1_num_candt_to_pick =
2305*c83a76b0SSuyog Pawar MIN(1,
2306*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2307*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2308*c83a76b0SSuyog Pawar }
2309*c83a76b0SSuyog Pawar
2310*c83a76b0SSuyog Pawar if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset)
2311*c83a76b0SSuyog Pawar {
2312*c83a76b0SSuyog Pawar u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25);
2313*c83a76b0SSuyog Pawar }
2314*c83a76b0SSuyog Pawar
2315*c83a76b0SSuyog Pawar for(i = 0; i < u1_num_candt_to_pick; i++)
2316*c83a76b0SSuyog Pawar {
2317*c83a76b0SSuyog Pawar /* Picks the best two candidates of all the available ones */
2318*c83a76b0SSuyog Pawar hme_pick_best_pu_cand(
2319*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].as_pu_results,
2320*c83a76b0SSuyog Pawar ps_pu_data->aps_pu_results[0][e_part_id],
2321*c83a76b0SSuyog Pawar ps_pu_data->aps_pu_results[1][e_part_id],
2322*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2323*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2324*c83a76b0SSuyog Pawar i);
2325*c83a76b0SSuyog Pawar
2326*c83a76b0SSuyog Pawar /* Update the other params part_type and total_cost in part_type_results */
2327*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].u1_part_type = e_part_id;
2328*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].i4_tot_cost =
2329*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost;
2330*c83a76b0SSuyog Pawar
2331*c83a76b0SSuyog Pawar i4_num_cands++;
2332*c83a76b0SSuyog Pawar }
2333*c83a76b0SSuyog Pawar }
2334*c83a76b0SSuyog Pawar
2335*c83a76b0SSuyog Pawar /* SMP */
2336*c83a76b0SSuyog Pawar {
2337*c83a76b0SSuyog Pawar S32 i4_total_cost;
2338*c83a76b0SSuyog Pawar
2339*c83a76b0SSuyog Pawar S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1;
2340*c83a76b0SSuyog Pawar S32 start_part_type = PRT_2NxN;
2341*c83a76b0SSuyog Pawar S32 best_cost = MAX_32BIT_VAL;
2342*c83a76b0SSuyog Pawar S32 part_type_cnt = 0;
2343*c83a76b0SSuyog Pawar
2344*c83a76b0SSuyog Pawar for(j = 0; j < num_part_types; j++)
2345*c83a76b0SSuyog Pawar {
2346*c83a76b0SSuyog Pawar if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2347*c83a76b0SSuyog Pawar {
2348*c83a76b0SSuyog Pawar continue;
2349*c83a76b0SSuyog Pawar }
2350*c83a76b0SSuyog Pawar
2351*c83a76b0SSuyog Pawar for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2352*c83a76b0SSuyog Pawar {
2353*c83a76b0SSuyog Pawar e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2354*c83a76b0SSuyog Pawar
2355*c83a76b0SSuyog Pawar /* Pick the best candidate for the partition acroos lists */
2356*c83a76b0SSuyog Pawar hme_pick_best_pu_cand(
2357*c83a76b0SSuyog Pawar &s_cand_data.as_pu_results[i],
2358*c83a76b0SSuyog Pawar ps_pu_data->aps_pu_results[0][e_part_id],
2359*c83a76b0SSuyog Pawar ps_pu_data->aps_pu_results[1][e_part_id],
2360*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2361*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2362*c83a76b0SSuyog Pawar 0);
2363*c83a76b0SSuyog Pawar }
2364*c83a76b0SSuyog Pawar
2365*c83a76b0SSuyog Pawar i4_total_cost =
2366*c83a76b0SSuyog Pawar s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2367*c83a76b0SSuyog Pawar
2368*c83a76b0SSuyog Pawar if(i4_total_cost < best_cost)
2369*c83a76b0SSuyog Pawar {
2370*c83a76b0SSuyog Pawar /* Stores the index of the best part_type in the sub-catoegory */
2371*c83a76b0SSuyog Pawar best_cost = i4_total_cost;
2372*c83a76b0SSuyog Pawar
2373*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands] = s_cand_data;
2374*c83a76b0SSuyog Pawar
2375*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2376*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2377*c83a76b0SSuyog Pawar }
2378*c83a76b0SSuyog Pawar
2379*c83a76b0SSuyog Pawar part_type_cnt++;
2380*c83a76b0SSuyog Pawar }
2381*c83a76b0SSuyog Pawar
2382*c83a76b0SSuyog Pawar i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2383*c83a76b0SSuyog Pawar }
2384*c83a76b0SSuyog Pawar
2385*c83a76b0SSuyog Pawar /* AMP */
2386*c83a76b0SSuyog Pawar {
2387*c83a76b0SSuyog Pawar S32 i4_total_cost;
2388*c83a76b0SSuyog Pawar
2389*c83a76b0SSuyog Pawar S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1;
2390*c83a76b0SSuyog Pawar S32 start_part_type = PRT_2NxnU;
2391*c83a76b0SSuyog Pawar S32 best_cost = MAX_32BIT_VAL;
2392*c83a76b0SSuyog Pawar S32 part_type_cnt = 0;
2393*c83a76b0SSuyog Pawar
2394*c83a76b0SSuyog Pawar for(j = 0; j < num_part_types; j++)
2395*c83a76b0SSuyog Pawar {
2396*c83a76b0SSuyog Pawar if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2397*c83a76b0SSuyog Pawar {
2398*c83a76b0SSuyog Pawar continue;
2399*c83a76b0SSuyog Pawar }
2400*c83a76b0SSuyog Pawar
2401*c83a76b0SSuyog Pawar for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2402*c83a76b0SSuyog Pawar {
2403*c83a76b0SSuyog Pawar e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2404*c83a76b0SSuyog Pawar
2405*c83a76b0SSuyog Pawar /* Pick the best candidate for the partition acroos lists */
2406*c83a76b0SSuyog Pawar hme_pick_best_pu_cand(
2407*c83a76b0SSuyog Pawar &s_cand_data.as_pu_results[i],
2408*c83a76b0SSuyog Pawar ps_pu_data->aps_pu_results[0][e_part_id],
2409*c83a76b0SSuyog Pawar ps_pu_data->aps_pu_results[1][e_part_id],
2410*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2411*c83a76b0SSuyog Pawar ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2412*c83a76b0SSuyog Pawar 0);
2413*c83a76b0SSuyog Pawar }
2414*c83a76b0SSuyog Pawar
2415*c83a76b0SSuyog Pawar i4_total_cost =
2416*c83a76b0SSuyog Pawar s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2417*c83a76b0SSuyog Pawar
2418*c83a76b0SSuyog Pawar if(i4_total_cost < best_cost)
2419*c83a76b0SSuyog Pawar {
2420*c83a76b0SSuyog Pawar /* Stores the index of the best part_type in the sub-catoegory */
2421*c83a76b0SSuyog Pawar best_cost = i4_total_cost;
2422*c83a76b0SSuyog Pawar
2423*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands] = s_cand_data;
2424*c83a76b0SSuyog Pawar
2425*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2426*c83a76b0SSuyog Pawar ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2427*c83a76b0SSuyog Pawar }
2428*c83a76b0SSuyog Pawar
2429*c83a76b0SSuyog Pawar part_type_cnt++;
2430*c83a76b0SSuyog Pawar }
2431*c83a76b0SSuyog Pawar
2432*c83a76b0SSuyog Pawar i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2433*c83a76b0SSuyog Pawar }
2434*c83a76b0SSuyog Pawar
2435*c83a76b0SSuyog Pawar return i4_num_cands;
2436*c83a76b0SSuyog Pawar }
2437*c83a76b0SSuyog Pawar
2438*c83a76b0SSuyog Pawar /**
2439*c83a76b0SSuyog Pawar *****************************************************************************
2440*c83a76b0SSuyog Pawar * @fn hme_decide_part_types(search_results_t *ps_search_results)
2441*c83a76b0SSuyog Pawar *
2442*c83a76b0SSuyog Pawar * @brief Does uni/bi evaluation accross various partition types,
2443*c83a76b0SSuyog Pawar * decides best inter partition types for the CU, compares
2444*c83a76b0SSuyog Pawar * intra cost and decides the best K results for the CU
2445*c83a76b0SSuyog Pawar *
2446*c83a76b0SSuyog Pawar * This is called post subpel refinmenent for 16x16s, 8x8s and
2447*c83a76b0SSuyog Pawar * for post merge evaluation for 32x32,64x64 CUs
2448*c83a76b0SSuyog Pawar *
2449*c83a76b0SSuyog Pawar * @param[in,out] ps_search_results : Search results data structure
2450*c83a76b0SSuyog Pawar * - In : 2 lists of upto 2mvs & refids, active partition mask
2451*c83a76b0SSuyog Pawar * - Out: Best results for final rdo evaluation of the cu
2452*c83a76b0SSuyog Pawar *
2453*c83a76b0SSuyog Pawar * @param[in] ps_subpel_prms : Sub pel params data structure
2454*c83a76b0SSuyog Pawar *
2455*c83a76b0SSuyog Pawar *
2456*c83a76b0SSuyog Pawar * @par Description
2457*c83a76b0SSuyog Pawar * --------------------------------------------------------------------------------
2458*c83a76b0SSuyog Pawar * Flow:
2459*c83a76b0SSuyog Pawar * for each category (SMP,AMP,2Nx2N based on part mask)
2460*c83a76b0SSuyog Pawar * {
2461*c83a76b0SSuyog Pawar * for each part_type
2462*c83a76b0SSuyog Pawar * {
2463*c83a76b0SSuyog Pawar * for each part
2464*c83a76b0SSuyog Pawar * pick best candidate from each list
2465*c83a76b0SSuyog Pawar * combine uni part type
2466*c83a76b0SSuyog Pawar * update best results for part type
2467*c83a76b0SSuyog Pawar * }
2468*c83a76b0SSuyog Pawar * pick the best part type for given category (for SMP & AMP)
2469*c83a76b0SSuyog Pawar * }
2470*c83a76b0SSuyog Pawar * ||
2471*c83a76b0SSuyog Pawar * ||
2472*c83a76b0SSuyog Pawar * \/
2473*c83a76b0SSuyog Pawar * Bi-Pred evaluation:
2474*c83a76b0SSuyog Pawar * for upto 4 best part types
2475*c83a76b0SSuyog Pawar * {
2476*c83a76b0SSuyog Pawar * for each part
2477*c83a76b0SSuyog Pawar * {
2478*c83a76b0SSuyog Pawar * compute fixed size had for all uni and remember coeffs
2479*c83a76b0SSuyog Pawar * compute bisatd
2480*c83a76b0SSuyog Pawar * uni vs bi and gives upto two results
2481*c83a76b0SSuyog Pawar * also gives the pt level pred buffer
2482*c83a76b0SSuyog Pawar * }
2483*c83a76b0SSuyog Pawar * }
2484*c83a76b0SSuyog Pawar * ||
2485*c83a76b0SSuyog Pawar * ||
2486*c83a76b0SSuyog Pawar * \/
2487*c83a76b0SSuyog Pawar * select X candidates for tu recursion as per the Note below
2488*c83a76b0SSuyog Pawar * tu_rec_on_part_type (reuse transform coeffs)
2489*c83a76b0SSuyog Pawar * ||
2490*c83a76b0SSuyog Pawar * ||
2491*c83a76b0SSuyog Pawar * \/
2492*c83a76b0SSuyog Pawar * insert intra nodes at appropriate result id
2493*c83a76b0SSuyog Pawar * ||
2494*c83a76b0SSuyog Pawar * ||
2495*c83a76b0SSuyog Pawar * \/
2496*c83a76b0SSuyog Pawar * populate y best resuls for rdo based on preset
2497*c83a76b0SSuyog Pawar *
2498*c83a76b0SSuyog Pawar * Note :
2499*c83a76b0SSuyog Pawar * number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2500*c83a76b0SSuyog Pawar * number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2501*c83a76b0SSuyog Pawar * --------------------------------------------------------------------------------
2502*c83a76b0SSuyog Pawar *
2503*c83a76b0SSuyog Pawar * @return None
2504*c83a76b0SSuyog Pawar ********************************************************************************
2505*c83a76b0SSuyog Pawar */
hme_decide_part_types(inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,inter_ctb_prms_t * ps_inter_ctb_prms,me_frm_ctxt_t * ps_ctxt,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)2506*c83a76b0SSuyog Pawar void hme_decide_part_types(
2507*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results,
2508*c83a76b0SSuyog Pawar inter_pu_results_t *ps_pu_results,
2509*c83a76b0SSuyog Pawar inter_ctb_prms_t *ps_inter_ctb_prms,
2510*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt,
2511*c83a76b0SSuyog Pawar ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
2512*c83a76b0SSuyog Pawar ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
2513*c83a76b0SSuyog Pawar
2514*c83a76b0SSuyog Pawar )
2515*c83a76b0SSuyog Pawar {
2516*c83a76b0SSuyog Pawar S32 i, j;
2517*c83a76b0SSuyog Pawar S32 i4_part_mask;
2518*c83a76b0SSuyog Pawar ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2519*c83a76b0SSuyog Pawar ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2520*c83a76b0SSuyog Pawar S32 i4_noise_term;
2521*c83a76b0SSuyog Pawar WORD32 e_part_id;
2522*c83a76b0SSuyog Pawar
2523*c83a76b0SSuyog Pawar PF_SAD_FXN_TU_REC apf_err_compute[4];
2524*c83a76b0SSuyog Pawar
2525*c83a76b0SSuyog Pawar part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS];
2526*c83a76b0SSuyog Pawar part_type_results_t *ps_part_type_results;
2527*c83a76b0SSuyog Pawar
2528*c83a76b0SSuyog Pawar S32 num_best_cand = 0;
2529*c83a76b0SSuyog Pawar const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
2530*c83a76b0SSuyog Pawar
2531*c83a76b0SSuyog Pawar i4_part_mask = ps_cu_results->i4_part_mask;
2532*c83a76b0SSuyog Pawar
2533*c83a76b0SSuyog Pawar num_best_cand = hme_tu_recur_cand_harvester(
2534*c83a76b0SSuyog Pawar as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask);
2535*c83a76b0SSuyog Pawar
2536*c83a76b0SSuyog Pawar /* Partition ID for the current PU */
2537*c83a76b0SSuyog Pawar e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0];
2538*c83a76b0SSuyog Pawar
2539*c83a76b0SSuyog Pawar ps_part_type_results = as_part_type_results;
2540*c83a76b0SSuyog Pawar for(i = 0; i < num_best_cand; i++)
2541*c83a76b0SSuyog Pawar {
2542*c83a76b0SSuyog Pawar hme_compute_pred_and_evaluate_bi(
2543*c83a76b0SSuyog Pawar ps_cu_results,
2544*c83a76b0SSuyog Pawar ps_pu_results,
2545*c83a76b0SSuyog Pawar ps_inter_ctb_prms,
2546*c83a76b0SSuyog Pawar &(ps_part_type_results[i]),
2547*c83a76b0SSuyog Pawar au8_pred_sigmaXSquare[i],
2548*c83a76b0SSuyog Pawar au8_pred_sigmaX[i],
2549*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list,
2550*c83a76b0SSuyog Pawar ps_me_optimised_function_list
2551*c83a76b0SSuyog Pawar
2552*c83a76b0SSuyog Pawar );
2553*c83a76b0SSuyog Pawar }
2554*c83a76b0SSuyog Pawar /* Perform TU_REC on the best candidates selected */
2555*c83a76b0SSuyog Pawar {
2556*c83a76b0SSuyog Pawar WORD32 i4_sad_grid;
2557*c83a76b0SSuyog Pawar WORD32 ai4_tu_split_flag[4];
2558*c83a76b0SSuyog Pawar WORD32 ai4_tu_early_cbf[4];
2559*c83a76b0SSuyog Pawar
2560*c83a76b0SSuyog Pawar WORD32 best_cost[NUM_BEST_ME_OUTPUTS];
2561*c83a76b0SSuyog Pawar WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS];
2562*c83a76b0SSuyog Pawar WORD16 i2_wght;
2563*c83a76b0SSuyog Pawar WORD32 i4_satd;
2564*c83a76b0SSuyog Pawar
2565*c83a76b0SSuyog Pawar err_prms_t s_err_prms;
2566*c83a76b0SSuyog Pawar err_prms_t *ps_err_prms = &s_err_prms;
2567*c83a76b0SSuyog Pawar
2568*c83a76b0SSuyog Pawar /* Default cost and final idx initialization */
2569*c83a76b0SSuyog Pawar for(i = 0; i < num_best_cand; i++)
2570*c83a76b0SSuyog Pawar {
2571*c83a76b0SSuyog Pawar best_cost[i] = MAX_32BIT_VAL;
2572*c83a76b0SSuyog Pawar ai4_final_idx[i] = -1;
2573*c83a76b0SSuyog Pawar }
2574*c83a76b0SSuyog Pawar
2575*c83a76b0SSuyog Pawar /* Assign the stad function to the err_compute function pointer :
2576*c83a76b0SSuyog Pawar Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */
2577*c83a76b0SSuyog Pawar apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
2578*c83a76b0SSuyog Pawar apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
2579*c83a76b0SSuyog Pawar apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
2580*c83a76b0SSuyog Pawar apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
2581*c83a76b0SSuyog Pawar
2582*c83a76b0SSuyog Pawar ps_err_prms->pi4_sad_grid = &i4_sad_grid;
2583*c83a76b0SSuyog Pawar ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag;
2584*c83a76b0SSuyog Pawar ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth;
2585*c83a76b0SSuyog Pawar ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf;
2586*c83a76b0SSuyog Pawar ps_err_prms->i4_grid_mask = 1;
2587*c83a76b0SSuyog Pawar ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem;
2588*c83a76b0SSuyog Pawar ps_err_prms->u1_max_tr_size = 32;
2589*c83a76b0SSuyog Pawar
2590*c83a76b0SSuyog Pawar if(ps_inter_ctb_prms->u1_is_cu_noisy)
2591*c83a76b0SSuyog Pawar {
2592*c83a76b0SSuyog Pawar ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY;
2593*c83a76b0SSuyog Pawar }
2594*c83a76b0SSuyog Pawar
2595*c83a76b0SSuyog Pawar /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that
2596*c83a76b0SSuyog Pawar are disabled by Part_mask */
2597*c83a76b0SSuyog Pawar for(i = 0; i < num_best_cand; i++)
2598*c83a76b0SSuyog Pawar {
2599*c83a76b0SSuyog Pawar part_type_results_t *ps_best_results;
2600*c83a76b0SSuyog Pawar pu_result_t *ps_pu_result;
2601*c83a76b0SSuyog Pawar WORD32 part_type_cost;
2602*c83a76b0SSuyog Pawar WORD32 cand_idx;
2603*c83a76b0SSuyog Pawar
2604*c83a76b0SSuyog Pawar WORD32 pred_dir;
2605*c83a76b0SSuyog Pawar S32 i4_inp_off;
2606*c83a76b0SSuyog Pawar
2607*c83a76b0SSuyog Pawar S32 lambda;
2608*c83a76b0SSuyog Pawar U08 lambda_qshift;
2609*c83a76b0SSuyog Pawar U08 *apu1_inp[MAX_NUM_INTER_PARTS];
2610*c83a76b0SSuyog Pawar S16 ai2_wt[MAX_NUM_INTER_PARTS];
2611*c83a76b0SSuyog Pawar S32 ai4_inv_wt[MAX_NUM_INTER_PARTS];
2612*c83a76b0SSuyog Pawar S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS];
2613*c83a76b0SSuyog Pawar
2614*c83a76b0SSuyog Pawar WORD32 part_type = ps_part_type_results[i].u1_part_type;
2615*c83a76b0SSuyog Pawar WORD32 e_cu_size = ps_cu_results->u1_cu_size;
2616*c83a76b0SSuyog Pawar WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
2617*c83a76b0SSuyog Pawar U08 u1_num_parts = gau1_num_parts_in_part_type[part_type];
2618*c83a76b0SSuyog Pawar U08 u1_inp_buf_idx = UCHAR_MAX;
2619*c83a76b0SSuyog Pawar
2620*c83a76b0SSuyog Pawar ps_err_prms->i4_part_mask = i4_part_mask;
2621*c83a76b0SSuyog Pawar ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
2622*c83a76b0SSuyog Pawar ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
2623*c83a76b0SSuyog Pawar ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred;
2624*c83a76b0SSuyog Pawar ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride;
2625*c83a76b0SSuyog Pawar
2626*c83a76b0SSuyog Pawar /* Current offset for the present part type */
2627*c83a76b0SSuyog Pawar i4_inp_off = ps_cu_results->i4_inp_offset;
2628*c83a76b0SSuyog Pawar
2629*c83a76b0SSuyog Pawar ps_best_results = &(ps_part_type_results[i]);
2630*c83a76b0SSuyog Pawar
2631*c83a76b0SSuyog Pawar part_type_cost = 0;
2632*c83a76b0SSuyog Pawar lambda = ps_inter_ctb_prms->i4_lamda;
2633*c83a76b0SSuyog Pawar lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift;
2634*c83a76b0SSuyog Pawar
2635*c83a76b0SSuyog Pawar for(j = 0; j < u1_num_parts; j++)
2636*c83a76b0SSuyog Pawar {
2637*c83a76b0SSuyog Pawar ps_pu_result = &(ps_best_results->as_pu_results[j]);
2638*c83a76b0SSuyog Pawar
2639*c83a76b0SSuyog Pawar pred_dir = ps_pu_result->pu.b2_pred_mode;
2640*c83a76b0SSuyog Pawar
2641*c83a76b0SSuyog Pawar if(PRED_L0 == pred_dir)
2642*c83a76b0SSuyog Pawar {
2643*c83a76b0SSuyog Pawar apu1_inp[j] =
2644*c83a76b0SSuyog Pawar ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] +
2645*c83a76b0SSuyog Pawar i4_inp_off;
2646*c83a76b0SSuyog Pawar ai2_wt[j] =
2647*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx]
2648*c83a76b0SSuyog Pawar ->s_weight_offset.i2_luma_weight;
2649*c83a76b0SSuyog Pawar ai4_inv_wt[j] =
2650*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pi4_inv_wt
2651*c83a76b0SSuyog Pawar [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2652*c83a76b0SSuyog Pawar ai4_inv_wt_shift_val[j] =
2653*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pi4_inv_wt_shift_val
2654*c83a76b0SSuyog Pawar [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2655*c83a76b0SSuyog Pawar }
2656*c83a76b0SSuyog Pawar else if(PRED_L1 == pred_dir)
2657*c83a76b0SSuyog Pawar {
2658*c83a76b0SSuyog Pawar apu1_inp[j] =
2659*c83a76b0SSuyog Pawar ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] +
2660*c83a76b0SSuyog Pawar i4_inp_off;
2661*c83a76b0SSuyog Pawar ai2_wt[j] =
2662*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx]
2663*c83a76b0SSuyog Pawar ->s_weight_offset.i2_luma_weight;
2664*c83a76b0SSuyog Pawar ai4_inv_wt[j] =
2665*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pi4_inv_wt
2666*c83a76b0SSuyog Pawar [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2667*c83a76b0SSuyog Pawar ai4_inv_wt_shift_val[j] =
2668*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pi4_inv_wt_shift_val
2669*c83a76b0SSuyog Pawar [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2670*c83a76b0SSuyog Pawar }
2671*c83a76b0SSuyog Pawar else if(PRED_BI == pred_dir)
2672*c83a76b0SSuyog Pawar {
2673*c83a76b0SSuyog Pawar apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off;
2674*c83a76b0SSuyog Pawar ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc;
2675*c83a76b0SSuyog Pawar ai4_inv_wt[j] = i4_default_src_wt;
2676*c83a76b0SSuyog Pawar ai4_inv_wt_shift_val[j] = 0;
2677*c83a76b0SSuyog Pawar }
2678*c83a76b0SSuyog Pawar else
2679*c83a76b0SSuyog Pawar {
2680*c83a76b0SSuyog Pawar ASSERT(0);
2681*c83a76b0SSuyog Pawar }
2682*c83a76b0SSuyog Pawar
2683*c83a76b0SSuyog Pawar part_type_cost += ps_pu_result->i4_mv_cost;
2684*c83a76b0SSuyog Pawar }
2685*c83a76b0SSuyog Pawar
2686*c83a76b0SSuyog Pawar if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1]))
2687*c83a76b0SSuyog Pawar {
2688*c83a76b0SSuyog Pawar ps_err_prms->pu1_inp = apu1_inp[0];
2689*c83a76b0SSuyog Pawar ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride;
2690*c83a76b0SSuyog Pawar i2_wght = ai2_wt[0];
2691*c83a76b0SSuyog Pawar }
2692*c83a76b0SSuyog Pawar else
2693*c83a76b0SSuyog Pawar {
2694*c83a76b0SSuyog Pawar if(1 != ihevce_get_free_pred_buf_indices(
2695*c83a76b0SSuyog Pawar &u1_inp_buf_idx,
2696*c83a76b0SSuyog Pawar &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2697*c83a76b0SSuyog Pawar 1))
2698*c83a76b0SSuyog Pawar {
2699*c83a76b0SSuyog Pawar ASSERT(0);
2700*c83a76b0SSuyog Pawar }
2701*c83a76b0SSuyog Pawar else
2702*c83a76b0SSuyog Pawar {
2703*c83a76b0SSuyog Pawar U08 *pu1_dst =
2704*c83a76b0SSuyog Pawar ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2705*c83a76b0SSuyog Pawar U08 *pu1_src = apu1_inp[0];
2706*c83a76b0SSuyog Pawar U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2;
2707*c83a76b0SSuyog Pawar U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2;
2708*c83a76b0SSuyog Pawar U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2;
2709*c83a76b0SSuyog Pawar U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2;
2710*c83a76b0SSuyog Pawar
2711*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
2712*c83a76b0SSuyog Pawar pu1_dst,
2713*c83a76b0SSuyog Pawar MAX_CU_SIZE,
2714*c83a76b0SSuyog Pawar pu1_src,
2715*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_inp_stride,
2716*c83a76b0SSuyog Pawar u1_pu1_wd,
2717*c83a76b0SSuyog Pawar u1_pu1_ht);
2718*c83a76b0SSuyog Pawar
2719*c83a76b0SSuyog Pawar pu1_dst +=
2720*c83a76b0SSuyog Pawar (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2721*c83a76b0SSuyog Pawar ? u1_pu1_ht * MAX_CU_SIZE
2722*c83a76b0SSuyog Pawar : u1_pu1_wd);
2723*c83a76b0SSuyog Pawar pu1_src =
2724*c83a76b0SSuyog Pawar apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2725*c83a76b0SSuyog Pawar ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride
2726*c83a76b0SSuyog Pawar : u1_pu1_wd);
2727*c83a76b0SSuyog Pawar
2728*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
2729*c83a76b0SSuyog Pawar pu1_dst,
2730*c83a76b0SSuyog Pawar MAX_CU_SIZE,
2731*c83a76b0SSuyog Pawar pu1_src,
2732*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_inp_stride,
2733*c83a76b0SSuyog Pawar u1_pu2_wd,
2734*c83a76b0SSuyog Pawar u1_pu2_ht);
2735*c83a76b0SSuyog Pawar
2736*c83a76b0SSuyog Pawar ps_err_prms->pu1_inp =
2737*c83a76b0SSuyog Pawar ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2738*c83a76b0SSuyog Pawar ps_err_prms->i4_inp_stride = MAX_CU_SIZE;
2739*c83a76b0SSuyog Pawar i2_wght = ai2_wt[1];
2740*c83a76b0SSuyog Pawar }
2741*c83a76b0SSuyog Pawar }
2742*c83a76b0SSuyog Pawar
2743*c83a76b0SSuyog Pawar #if !DISABLE_TU_RECURSION
2744*c83a76b0SSuyog Pawar i4_satd = apf_err_compute[e_cu_size](
2745*c83a76b0SSuyog Pawar ps_err_prms,
2746*c83a76b0SSuyog Pawar lambda,
2747*c83a76b0SSuyog Pawar lambda_qshift,
2748*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_qstep_ls8,
2749*c83a76b0SSuyog Pawar ps_ctxt->ps_func_selector);
2750*c83a76b0SSuyog Pawar #else
2751*c83a76b0SSuyog Pawar ps_err_prms->pi4_sad_grid = &i4_satd;
2752*c83a76b0SSuyog Pawar
2753*c83a76b0SSuyog Pawar pf_err_compute(ps_err_prms);
2754*c83a76b0SSuyog Pawar
2755*c83a76b0SSuyog Pawar if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64))
2756*c83a76b0SSuyog Pawar {
2757*c83a76b0SSuyog Pawar ai4_tu_split_flag[0] = 1;
2758*c83a76b0SSuyog Pawar ai4_tu_split_flag[1] = 1;
2759*c83a76b0SSuyog Pawar ai4_tu_split_flag[2] = 1;
2760*c83a76b0SSuyog Pawar ai4_tu_split_flag[3] = 1;
2761*c83a76b0SSuyog Pawar
2762*c83a76b0SSuyog Pawar ps_err_prms->i4_tu_split_cost = 0;
2763*c83a76b0SSuyog Pawar }
2764*c83a76b0SSuyog Pawar else
2765*c83a76b0SSuyog Pawar {
2766*c83a76b0SSuyog Pawar ai4_tu_split_flag[0] = 1;
2767*c83a76b0SSuyog Pawar ai4_tu_split_flag[1] = 1;
2768*c83a76b0SSuyog Pawar ai4_tu_split_flag[2] = 1;
2769*c83a76b0SSuyog Pawar ai4_tu_split_flag[3] = 1;
2770*c83a76b0SSuyog Pawar
2771*c83a76b0SSuyog Pawar ps_err_prms->i4_tu_split_cost = 0;
2772*c83a76b0SSuyog Pawar }
2773*c83a76b0SSuyog Pawar #endif
2774*c83a76b0SSuyog Pawar
2775*c83a76b0SSuyog Pawar #if UNI_SATD_SCALE
2776*c83a76b0SSuyog Pawar i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc;
2777*c83a76b0SSuyog Pawar #endif
2778*c83a76b0SSuyog Pawar
2779*c83a76b0SSuyog Pawar if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
2780*c83a76b0SSuyog Pawar {
2781*c83a76b0SSuyog Pawar ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX;
2782*c83a76b0SSuyog Pawar ULWORD64 u8_src_variance, u8_pred_variance;
2783*c83a76b0SSuyog Pawar unsigned long u4_shift_val;
2784*c83a76b0SSuyog Pawar S32 i4_bits_req;
2785*c83a76b0SSuyog Pawar S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
2786*c83a76b0SSuyog Pawar
2787*c83a76b0SSuyog Pawar if(1 == u1_num_parts)
2788*c83a76b0SSuyog Pawar {
2789*c83a76b0SSuyog Pawar u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0];
2790*c83a76b0SSuyog Pawar u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX;
2791*c83a76b0SSuyog Pawar
2792*c83a76b0SSuyog Pawar if(e_cu_size == CU_8x8)
2793*c83a76b0SSuyog Pawar {
2794*c83a76b0SSuyog Pawar PART_ID_T e_part_id = (PART_ID_T)(
2795*c83a76b0SSuyog Pawar (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) +
2796*c83a76b0SSuyog Pawar ((ps_cu_results->u1_y_off & 1) << 1));
2797*c83a76b0SSuyog Pawar
2798*c83a76b0SSuyog Pawar u4_shift_val = ihevce_calc_stim_injected_variance(
2799*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu8_part_src_sigmaX,
2800*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2801*c83a76b0SSuyog Pawar &u8_src_variance,
2802*c83a76b0SSuyog Pawar ai4_inv_wt[0],
2803*c83a76b0SSuyog Pawar ai4_inv_wt_shift_val[0],
2804*c83a76b0SSuyog Pawar ps_inter_ctb_prms->wpred_log_wdc,
2805*c83a76b0SSuyog Pawar e_part_id);
2806*c83a76b0SSuyog Pawar }
2807*c83a76b0SSuyog Pawar else
2808*c83a76b0SSuyog Pawar {
2809*c83a76b0SSuyog Pawar u4_shift_val = ihevce_calc_stim_injected_variance(
2810*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu8_part_src_sigmaX,
2811*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2812*c83a76b0SSuyog Pawar &u8_src_variance,
2813*c83a76b0SSuyog Pawar ai4_inv_wt[0],
2814*c83a76b0SSuyog Pawar ai4_inv_wt_shift_val[0],
2815*c83a76b0SSuyog Pawar ps_inter_ctb_prms->wpred_log_wdc,
2816*c83a76b0SSuyog Pawar e_part_id);
2817*c83a76b0SSuyog Pawar }
2818*c83a76b0SSuyog Pawar
2819*c83a76b0SSuyog Pawar u8_pred_variance = u8_pred_variance >> u4_shift_val;
2820*c83a76b0SSuyog Pawar
2821*c83a76b0SSuyog Pawar GETRANGE64(i4_bits_req, u8_pred_variance);
2822*c83a76b0SSuyog Pawar
2823*c83a76b0SSuyog Pawar if(i4_bits_req > 27)
2824*c83a76b0SSuyog Pawar {
2825*c83a76b0SSuyog Pawar u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2826*c83a76b0SSuyog Pawar u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2827*c83a76b0SSuyog Pawar }
2828*c83a76b0SSuyog Pawar
2829*c83a76b0SSuyog Pawar if(u8_src_variance == u8_pred_variance)
2830*c83a76b0SSuyog Pawar {
2831*c83a76b0SSuyog Pawar u8_temp_var = (1 << STIM_Q_FORMAT);
2832*c83a76b0SSuyog Pawar }
2833*c83a76b0SSuyog Pawar else
2834*c83a76b0SSuyog Pawar {
2835*c83a76b0SSuyog Pawar u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2836*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2837*c83a76b0SSuyog Pawar u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2838*c83a76b0SSuyog Pawar (u8_pred_variance * u8_pred_variance);
2839*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2840*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var / u8_temp_var1);
2841*c83a76b0SSuyog Pawar }
2842*c83a76b0SSuyog Pawar
2843*c83a76b0SSuyog Pawar i4_noise_term = (UWORD32)u8_temp_var;
2844*c83a76b0SSuyog Pawar
2845*c83a76b0SSuyog Pawar ASSERT(i4_noise_term >= 0);
2846*c83a76b0SSuyog Pawar
2847*c83a76b0SSuyog Pawar i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2848*c83a76b0SSuyog Pawar
2849*c83a76b0SSuyog Pawar u8_temp_var = i4_satd;
2850*c83a76b0SSuyog Pawar u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2851*c83a76b0SSuyog Pawar u8_temp_var += (1 << ((i4_q_level)-1));
2852*c83a76b0SSuyog Pawar i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2853*c83a76b0SSuyog Pawar }
2854*c83a76b0SSuyog Pawar else /*if(e_cu_size <= CU_16x16)*/
2855*c83a76b0SSuyog Pawar {
2856*c83a76b0SSuyog Pawar unsigned long temp_shift_val;
2857*c83a76b0SSuyog Pawar PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = {
2858*c83a76b0SSuyog Pawar ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1]
2859*c83a76b0SSuyog Pawar };
2860*c83a76b0SSuyog Pawar
2861*c83a76b0SSuyog Pawar u4_shift_val = ihevce_calc_variance_for_diff_weights(
2862*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu8_part_src_sigmaX,
2863*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2864*c83a76b0SSuyog Pawar &u8_src_variance,
2865*c83a76b0SSuyog Pawar ai4_inv_wt,
2866*c83a76b0SSuyog Pawar ai4_inv_wt_shift_val,
2867*c83a76b0SSuyog Pawar ps_best_results->as_pu_results,
2868*c83a76b0SSuyog Pawar ps_inter_ctb_prms->wpred_log_wdc,
2869*c83a76b0SSuyog Pawar ae_part_id,
2870*c83a76b0SSuyog Pawar gau1_blk_size_to_wd[e_blk_size],
2871*c83a76b0SSuyog Pawar u1_num_parts,
2872*c83a76b0SSuyog Pawar 1);
2873*c83a76b0SSuyog Pawar
2874*c83a76b0SSuyog Pawar temp_shift_val = u4_shift_val;
2875*c83a76b0SSuyog Pawar
2876*c83a76b0SSuyog Pawar u4_shift_val = ihevce_calc_variance_for_diff_weights(
2877*c83a76b0SSuyog Pawar au8_pred_sigmaX[i],
2878*c83a76b0SSuyog Pawar au8_pred_sigmaXSquare[i],
2879*c83a76b0SSuyog Pawar &u8_pred_variance,
2880*c83a76b0SSuyog Pawar ai4_inv_wt,
2881*c83a76b0SSuyog Pawar ai4_inv_wt_shift_val,
2882*c83a76b0SSuyog Pawar ps_best_results->as_pu_results,
2883*c83a76b0SSuyog Pawar 0,
2884*c83a76b0SSuyog Pawar ae_part_id,
2885*c83a76b0SSuyog Pawar gau1_blk_size_to_wd[e_blk_size],
2886*c83a76b0SSuyog Pawar u1_num_parts,
2887*c83a76b0SSuyog Pawar 0);
2888*c83a76b0SSuyog Pawar
2889*c83a76b0SSuyog Pawar u8_pred_variance = u8_pred_variance >> temp_shift_val;
2890*c83a76b0SSuyog Pawar
2891*c83a76b0SSuyog Pawar GETRANGE64(i4_bits_req, u8_pred_variance);
2892*c83a76b0SSuyog Pawar
2893*c83a76b0SSuyog Pawar if(i4_bits_req > 27)
2894*c83a76b0SSuyog Pawar {
2895*c83a76b0SSuyog Pawar u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2896*c83a76b0SSuyog Pawar u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2897*c83a76b0SSuyog Pawar }
2898*c83a76b0SSuyog Pawar
2899*c83a76b0SSuyog Pawar if(u8_src_variance == u8_pred_variance)
2900*c83a76b0SSuyog Pawar {
2901*c83a76b0SSuyog Pawar u8_temp_var = (1 << STIM_Q_FORMAT);
2902*c83a76b0SSuyog Pawar }
2903*c83a76b0SSuyog Pawar else
2904*c83a76b0SSuyog Pawar {
2905*c83a76b0SSuyog Pawar u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2906*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2907*c83a76b0SSuyog Pawar u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2908*c83a76b0SSuyog Pawar (u8_pred_variance * u8_pred_variance);
2909*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2910*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var / u8_temp_var1);
2911*c83a76b0SSuyog Pawar }
2912*c83a76b0SSuyog Pawar
2913*c83a76b0SSuyog Pawar i4_noise_term = (UWORD32)u8_temp_var;
2914*c83a76b0SSuyog Pawar
2915*c83a76b0SSuyog Pawar ASSERT(i4_noise_term >= 0);
2916*c83a76b0SSuyog Pawar ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT)));
2917*c83a76b0SSuyog Pawar
2918*c83a76b0SSuyog Pawar i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2919*c83a76b0SSuyog Pawar
2920*c83a76b0SSuyog Pawar u8_temp_var = i4_satd;
2921*c83a76b0SSuyog Pawar u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2922*c83a76b0SSuyog Pawar u8_temp_var += (1 << ((i4_q_level)-1));
2923*c83a76b0SSuyog Pawar i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2924*c83a76b0SSuyog Pawar
2925*c83a76b0SSuyog Pawar ASSERT(i4_satd >= 0);
2926*c83a76b0SSuyog Pawar }
2927*c83a76b0SSuyog Pawar }
2928*c83a76b0SSuyog Pawar
2929*c83a76b0SSuyog Pawar if(u1_inp_buf_idx != UCHAR_MAX)
2930*c83a76b0SSuyog Pawar {
2931*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
2932*c83a76b0SSuyog Pawar &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2933*c83a76b0SSuyog Pawar u1_inp_buf_idx);
2934*c83a76b0SSuyog Pawar }
2935*c83a76b0SSuyog Pawar
2936*c83a76b0SSuyog Pawar part_type_cost += i4_satd;
2937*c83a76b0SSuyog Pawar
2938*c83a76b0SSuyog Pawar /*Update the best results with the new results */
2939*c83a76b0SSuyog Pawar ps_best_results->i4_tot_cost = part_type_cost;
2940*c83a76b0SSuyog Pawar
2941*c83a76b0SSuyog Pawar ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost;
2942*c83a76b0SSuyog Pawar
2943*c83a76b0SSuyog Pawar ASSERT(ai4_tu_split_flag[0] >= 0);
2944*c83a76b0SSuyog Pawar if(e_cu_size == CU_64x64)
2945*c83a76b0SSuyog Pawar {
2946*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2947*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1];
2948*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2];
2949*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3];
2950*c83a76b0SSuyog Pawar
2951*c83a76b0SSuyog Pawar /* Update the TU early cbf flags into the best results structure */
2952*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2953*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1];
2954*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2];
2955*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3];
2956*c83a76b0SSuyog Pawar }
2957*c83a76b0SSuyog Pawar else
2958*c83a76b0SSuyog Pawar {
2959*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2960*c83a76b0SSuyog Pawar ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2961*c83a76b0SSuyog Pawar }
2962*c83a76b0SSuyog Pawar
2963*c83a76b0SSuyog Pawar if(part_type_cost < best_cost[num_best_cand - 1])
2964*c83a76b0SSuyog Pawar {
2965*c83a76b0SSuyog Pawar /* Push and sort current part type if it is one of the num_best_cand */
2966*c83a76b0SSuyog Pawar for(cand_idx = 0; cand_idx < i; cand_idx++)
2967*c83a76b0SSuyog Pawar {
2968*c83a76b0SSuyog Pawar if(part_type_cost <= best_cost[cand_idx])
2969*c83a76b0SSuyog Pawar {
2970*c83a76b0SSuyog Pawar memmove(
2971*c83a76b0SSuyog Pawar &ai4_final_idx[cand_idx + 1],
2972*c83a76b0SSuyog Pawar &ai4_final_idx[cand_idx],
2973*c83a76b0SSuyog Pawar sizeof(WORD32) * (i - cand_idx));
2974*c83a76b0SSuyog Pawar memmove(
2975*c83a76b0SSuyog Pawar &best_cost[cand_idx + 1],
2976*c83a76b0SSuyog Pawar &best_cost[cand_idx],
2977*c83a76b0SSuyog Pawar sizeof(WORD32) * (i - cand_idx));
2978*c83a76b0SSuyog Pawar break;
2979*c83a76b0SSuyog Pawar }
2980*c83a76b0SSuyog Pawar }
2981*c83a76b0SSuyog Pawar
2982*c83a76b0SSuyog Pawar ai4_final_idx[cand_idx] = i;
2983*c83a76b0SSuyog Pawar best_cost[cand_idx] = part_type_cost;
2984*c83a76b0SSuyog Pawar }
2985*c83a76b0SSuyog Pawar }
2986*c83a76b0SSuyog Pawar
2987*c83a76b0SSuyog Pawar ps_cu_results->u1_num_best_results = num_best_cand;
2988*c83a76b0SSuyog Pawar
2989*c83a76b0SSuyog Pawar for(i = 0; i < num_best_cand; i++)
2990*c83a76b0SSuyog Pawar {
2991*c83a76b0SSuyog Pawar ASSERT(ai4_final_idx[i] < num_best_cand);
2992*c83a76b0SSuyog Pawar
2993*c83a76b0SSuyog Pawar if(ai4_final_idx[i] != -1)
2994*c83a76b0SSuyog Pawar {
2995*c83a76b0SSuyog Pawar memcpy(
2996*c83a76b0SSuyog Pawar &(ps_cu_results->ps_best_results[i]),
2997*c83a76b0SSuyog Pawar &(ps_part_type_results[ai4_final_idx[i]]),
2998*c83a76b0SSuyog Pawar sizeof(part_type_results_t));
2999*c83a76b0SSuyog Pawar }
3000*c83a76b0SSuyog Pawar }
3001*c83a76b0SSuyog Pawar }
3002*c83a76b0SSuyog Pawar
3003*c83a76b0SSuyog Pawar for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++)
3004*c83a76b0SSuyog Pawar {
3005*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
3006*c83a76b0SSuyog Pawar &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i);
3007*c83a76b0SSuyog Pawar }
3008*c83a76b0SSuyog Pawar }
3009*c83a76b0SSuyog Pawar
3010*c83a76b0SSuyog Pawar /**
3011*c83a76b0SSuyog Pawar **************************************************************************************************
3012*c83a76b0SSuyog Pawar * @fn hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3013*c83a76b0SSuyog Pawar *
3014*c83a76b0SSuyog Pawar * @brief Does the population of the inter_cu_results structure with the results after the
3015*c83a76b0SSuyog Pawar * subpel refinement
3016*c83a76b0SSuyog Pawar *
3017*c83a76b0SSuyog Pawar * This is called post subpel refinmenent for 16x16s, 8x8s and
3018*c83a76b0SSuyog Pawar * for post merge evaluation for 32x32,64x64 CUs
3019*c83a76b0SSuyog Pawar *
3020*c83a76b0SSuyog Pawar * @param[in,out] ps_search_results : Search results data structure
3021*c83a76b0SSuyog Pawar * - ps_cu_results : cu_results data structure
3022*c83a76b0SSuyog Pawar * ps_pu_result : Pointer to the memory for storing PU's
3023*c83a76b0SSuyog Pawar *
3024*c83a76b0SSuyog Pawar ****************************************************************************************************
3025*c83a76b0SSuyog Pawar */
hme_populate_pus(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,hme_subpel_prms_t * ps_subpel_prms,search_results_t * ps_search_results,inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result,inter_ctb_prms_t * ps_inter_ctb_prms,wgt_pred_ctxt_t * ps_wt_prms,layer_ctxt_t * ps_curr_layer,U08 * pu1_pred_dir_searched,WORD32 i4_num_active_ref)3026*c83a76b0SSuyog Pawar void hme_populate_pus(
3027*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt,
3028*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt,
3029*c83a76b0SSuyog Pawar hme_subpel_prms_t *ps_subpel_prms,
3030*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
3031*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results,
3032*c83a76b0SSuyog Pawar inter_pu_results_t *ps_pu_results,
3033*c83a76b0SSuyog Pawar pu_result_t *ps_pu_result,
3034*c83a76b0SSuyog Pawar inter_ctb_prms_t *ps_inter_ctb_prms,
3035*c83a76b0SSuyog Pawar wgt_pred_ctxt_t *ps_wt_prms,
3036*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
3037*c83a76b0SSuyog Pawar U08 *pu1_pred_dir_searched,
3038*c83a76b0SSuyog Pawar WORD32 i4_num_active_ref)
3039*c83a76b0SSuyog Pawar {
3040*c83a76b0SSuyog Pawar WORD32 i, j, k;
3041*c83a76b0SSuyog Pawar WORD32 i4_part_mask;
3042*c83a76b0SSuyog Pawar WORD32 i4_ref;
3043*c83a76b0SSuyog Pawar UWORD8 e_part_id;
3044*c83a76b0SSuyog Pawar pu_result_t *ps_curr_pu;
3045*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
3046*c83a76b0SSuyog Pawar part_attr_t *ps_part_attr;
3047*c83a76b0SSuyog Pawar UWORD8 e_cu_size = ps_search_results->e_cu_size;
3048*c83a76b0SSuyog Pawar WORD32 num_results_per_part_l0 = 0;
3049*c83a76b0SSuyog Pawar WORD32 num_results_per_part_l1 = 0;
3050*c83a76b0SSuyog Pawar WORD32 i4_ref_id;
3051*c83a76b0SSuyog Pawar WORD32 i4_total_act_ref;
3052*c83a76b0SSuyog Pawar
3053*c83a76b0SSuyog Pawar i4_part_mask = ps_search_results->i4_part_mask;
3054*c83a76b0SSuyog Pawar
3055*c83a76b0SSuyog Pawar /* pred_buf_mngr init */
3056*c83a76b0SSuyog Pawar {
3057*c83a76b0SSuyog Pawar hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD);
3058*c83a76b0SSuyog Pawar
3059*c83a76b0SSuyog Pawar ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX;
3060*c83a76b0SSuyog Pawar
3061*c83a76b0SSuyog Pawar for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++)
3062*c83a76b0SSuyog Pawar {
3063*c83a76b0SSuyog Pawar ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] =
3064*c83a76b0SSuyog Pawar ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3065*c83a76b0SSuyog Pawar ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i);
3066*c83a76b0SSuyog Pawar }
3067*c83a76b0SSuyog Pawar
3068*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3069*c83a76b0SSuyog Pawar }
3070*c83a76b0SSuyog Pawar
3071*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
3072*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy;
3073*c83a76b0SSuyog Pawar ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda;
3074*c83a76b0SSuyog Pawar
3075*c83a76b0SSuyog Pawar /* Populate the CU level parameters */
3076*c83a76b0SSuyog Pawar ps_cu_results->u1_cu_size = ps_search_results->e_cu_size;
3077*c83a76b0SSuyog Pawar ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3078*c83a76b0SSuyog Pawar ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask;
3079*c83a76b0SSuyog Pawar ps_cu_results->u1_x_off = ps_search_results->u1_x_off;
3080*c83a76b0SSuyog Pawar ps_cu_results->u1_y_off = ps_search_results->u1_y_off;
3081*c83a76b0SSuyog Pawar
3082*c83a76b0SSuyog Pawar i4_total_act_ref =
3083*c83a76b0SSuyog Pawar ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
3084*c83a76b0SSuyog Pawar /*Populate the partition results
3085*c83a76b0SSuyog Pawar Loop across all the active references that are enabled right now */
3086*c83a76b0SSuyog Pawar for(i = 0; i < MAX_PART_TYPES; i++)
3087*c83a76b0SSuyog Pawar {
3088*c83a76b0SSuyog Pawar if(!(i4_part_mask & gai4_part_type_to_part_mask[i]))
3089*c83a76b0SSuyog Pawar {
3090*c83a76b0SSuyog Pawar continue;
3091*c83a76b0SSuyog Pawar }
3092*c83a76b0SSuyog Pawar
3093*c83a76b0SSuyog Pawar for(j = 0; j < gau1_num_parts_in_part_type[i]; j++)
3094*c83a76b0SSuyog Pawar {
3095*c83a76b0SSuyog Pawar /* Partition ID for the current PU */
3096*c83a76b0SSuyog Pawar e_part_id = (UWORD8)ge_part_type_to_part_id[i][j];
3097*c83a76b0SSuyog Pawar ps_part_attr = &gas_part_attr_in_cu[e_part_id];
3098*c83a76b0SSuyog Pawar
3099*c83a76b0SSuyog Pawar num_results_per_part_l0 = 0;
3100*c83a76b0SSuyog Pawar num_results_per_part_l1 = 0;
3101*c83a76b0SSuyog Pawar
3102*c83a76b0SSuyog Pawar ps_pu_results->aps_pu_results[0][e_part_id] =
3103*c83a76b0SSuyog Pawar ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST);
3104*c83a76b0SSuyog Pawar ps_pu_results->aps_pu_results[1][e_part_id] =
3105*c83a76b0SSuyog Pawar ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3106*c83a76b0SSuyog Pawar
3107*c83a76b0SSuyog Pawar for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3108*c83a76b0SSuyog Pawar {
3109*c83a76b0SSuyog Pawar U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3110*c83a76b0SSuyog Pawar
3111*c83a76b0SSuyog Pawar for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3112*c83a76b0SSuyog Pawar {
3113*c83a76b0SSuyog Pawar ps_search_node =
3114*c83a76b0SSuyog Pawar &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k];
3115*c83a76b0SSuyog Pawar
3116*c83a76b0SSuyog Pawar /* If subpel is done then the node is a valid candidate else break the loop */
3117*c83a76b0SSuyog Pawar if(ps_search_node->u1_subpel_done)
3118*c83a76b0SSuyog Pawar {
3119*c83a76b0SSuyog Pawar i4_ref_id = ps_search_node->i1_ref_idx;
3120*c83a76b0SSuyog Pawar
3121*c83a76b0SSuyog Pawar ASSERT(i4_ref_id >= 0);
3122*c83a76b0SSuyog Pawar
3123*c83a76b0SSuyog Pawar /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */
3124*c83a76b0SSuyog Pawar if(!u1_pred_dir)
3125*c83a76b0SSuyog Pawar {
3126*c83a76b0SSuyog Pawar ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] +
3127*c83a76b0SSuyog Pawar num_results_per_part_l0;
3128*c83a76b0SSuyog Pawar
3129*c83a76b0SSuyog Pawar ASSERT(
3130*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3131*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_num_active_ref_l0);
3132*c83a76b0SSuyog Pawar
3133*c83a76b0SSuyog Pawar /* Always populate the ref_idx value in l0_ref_idx */
3134*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l0_ref_idx =
3135*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3136*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3137*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3138*c83a76b0SSuyog Pawar ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3139*c83a76b0SSuyog Pawar
3140*c83a76b0SSuyog Pawar ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] =
3141*c83a76b0SSuyog Pawar ps_wt_prms->apu1_wt_inp[i4_ref_id];
3142*c83a76b0SSuyog Pawar
3143*c83a76b0SSuyog Pawar num_results_per_part_l0++;
3144*c83a76b0SSuyog Pawar }
3145*c83a76b0SSuyog Pawar else
3146*c83a76b0SSuyog Pawar {
3147*c83a76b0SSuyog Pawar ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] +
3148*c83a76b0SSuyog Pawar num_results_per_part_l1;
3149*c83a76b0SSuyog Pawar
3150*c83a76b0SSuyog Pawar ASSERT(
3151*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3152*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_num_active_ref_l1);
3153*c83a76b0SSuyog Pawar
3154*c83a76b0SSuyog Pawar /* populate the ref_idx value in l1_ref_idx */
3155*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l1_ref_idx =
3156*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3157*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3158*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3159*c83a76b0SSuyog Pawar ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3160*c83a76b0SSuyog Pawar
3161*c83a76b0SSuyog Pawar /* Copy the values from weighted params to common_frm_aprams */
3162*c83a76b0SSuyog Pawar ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] =
3163*c83a76b0SSuyog Pawar ps_wt_prms->apu1_wt_inp[i4_ref_id];
3164*c83a76b0SSuyog Pawar
3165*c83a76b0SSuyog Pawar num_results_per_part_l1++;
3166*c83a76b0SSuyog Pawar }
3167*c83a76b0SSuyog Pawar ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3168*c83a76b0SSuyog Pawar ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3169*c83a76b0SSuyog Pawar
3170*c83a76b0SSuyog Pawar #if UNI_SATD_SCALE
3171*c83a76b0SSuyog Pawar /*SATD is scaled by weight. Hence rescale the SATD */
3172*c83a76b0SSuyog Pawar ps_curr_pu->i4_tot_cost =
3173*c83a76b0SSuyog Pawar ((ps_search_node->i4_sad *
3174*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3175*c83a76b0SSuyog Pawar (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3176*c83a76b0SSuyog Pawar ps_inter_ctb_prms->wpred_log_wdc) +
3177*c83a76b0SSuyog Pawar ps_search_node->i4_mv_cost;
3178*c83a76b0SSuyog Pawar #endif
3179*c83a76b0SSuyog Pawar
3180*c83a76b0SSuyog Pawar /* Packed format of the width and height */
3181*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1;
3182*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1;
3183*c83a76b0SSuyog Pawar
3184*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_pos_x =
3185*c83a76b0SSuyog Pawar (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >>
3186*c83a76b0SSuyog Pawar 2);
3187*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_pos_y =
3188*c83a76b0SSuyog Pawar (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >>
3189*c83a76b0SSuyog Pawar 2);
3190*c83a76b0SSuyog Pawar
3191*c83a76b0SSuyog Pawar ps_curr_pu->pu.b1_intra_flag = 0;
3192*c83a76b0SSuyog Pawar
3193*c83a76b0SSuyog Pawar /* Unweighted input */
3194*c83a76b0SSuyog Pawar ps_inter_ctb_prms->pu1_non_wt_inp =
3195*c83a76b0SSuyog Pawar ps_wt_prms->apu1_wt_inp[i4_total_act_ref];
3196*c83a76b0SSuyog Pawar
3197*c83a76b0SSuyog Pawar ps_search_node++;
3198*c83a76b0SSuyog Pawar }
3199*c83a76b0SSuyog Pawar else
3200*c83a76b0SSuyog Pawar {
3201*c83a76b0SSuyog Pawar break;
3202*c83a76b0SSuyog Pawar }
3203*c83a76b0SSuyog Pawar }
3204*c83a76b0SSuyog Pawar }
3205*c83a76b0SSuyog Pawar
3206*c83a76b0SSuyog Pawar ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0;
3207*c83a76b0SSuyog Pawar ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1;
3208*c83a76b0SSuyog Pawar }
3209*c83a76b0SSuyog Pawar }
3210*c83a76b0SSuyog Pawar }
3211*c83a76b0SSuyog Pawar
3212*c83a76b0SSuyog Pawar /**
3213*c83a76b0SSuyog Pawar *********************************************************************************************************
3214*c83a76b0SSuyog Pawar * @fn hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3215*c83a76b0SSuyog Pawar *
3216*c83a76b0SSuyog Pawar * @brief Does the population of the inter_cu_results structure with the results after the
3217*c83a76b0SSuyog Pawar * subpel refinement
3218*c83a76b0SSuyog Pawar *
3219*c83a76b0SSuyog Pawar * This is called post subpel refinmenent for 16x16s, 8x8s and
3220*c83a76b0SSuyog Pawar * for post merge evaluation for 32x32,64x64 CUs
3221*c83a76b0SSuyog Pawar *
3222*c83a76b0SSuyog Pawar * @param[in,out] ps_search_results : Search results data structure
3223*c83a76b0SSuyog Pawar * - ps_cu_results : cu_results data structure
3224*c83a76b0SSuyog Pawar * ps_pu_results : Pointer for the PU's
3225*c83a76b0SSuyog Pawar * ps_pu_result : Pointer to the memory for storing PU's
3226*c83a76b0SSuyog Pawar *
3227*c83a76b0SSuyog Pawar *********************************************************************************************************
3228*c83a76b0SSuyog Pawar */
hme_populate_pus_8x8_cu(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,hme_subpel_prms_t * ps_subpel_prms,search_results_t * ps_search_results,inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result,inter_ctb_prms_t * ps_inter_ctb_prms,U08 * pu1_pred_dir_searched,WORD32 i4_num_active_ref,U08 u1_blk_8x8_mask)3229*c83a76b0SSuyog Pawar void hme_populate_pus_8x8_cu(
3230*c83a76b0SSuyog Pawar me_ctxt_t *ps_thrd_ctxt,
3231*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt,
3232*c83a76b0SSuyog Pawar hme_subpel_prms_t *ps_subpel_prms,
3233*c83a76b0SSuyog Pawar search_results_t *ps_search_results,
3234*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results,
3235*c83a76b0SSuyog Pawar inter_pu_results_t *ps_pu_results,
3236*c83a76b0SSuyog Pawar pu_result_t *ps_pu_result,
3237*c83a76b0SSuyog Pawar inter_ctb_prms_t *ps_inter_ctb_prms,
3238*c83a76b0SSuyog Pawar U08 *pu1_pred_dir_searched,
3239*c83a76b0SSuyog Pawar WORD32 i4_num_active_ref,
3240*c83a76b0SSuyog Pawar U08 u1_blk_8x8_mask)
3241*c83a76b0SSuyog Pawar {
3242*c83a76b0SSuyog Pawar WORD32 i, k;
3243*c83a76b0SSuyog Pawar WORD32 i4_part_mask;
3244*c83a76b0SSuyog Pawar WORD32 i4_ref;
3245*c83a76b0SSuyog Pawar pu_result_t *ps_curr_pu;
3246*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
3247*c83a76b0SSuyog Pawar WORD32 i4_ref_id;
3248*c83a76b0SSuyog Pawar WORD32 x_off, y_off;
3249*c83a76b0SSuyog Pawar
3250*c83a76b0SSuyog Pawar /* Make part mask available as only 2Nx2N
3251*c83a76b0SSuyog Pawar Later support for 4x8 and 8x4 needs to be added */
3252*c83a76b0SSuyog Pawar i4_part_mask = ENABLE_2Nx2N;
3253*c83a76b0SSuyog Pawar
3254*c83a76b0SSuyog Pawar x_off = ps_search_results->u1_x_off;
3255*c83a76b0SSuyog Pawar y_off = ps_search_results->u1_y_off;
3256*c83a76b0SSuyog Pawar
3257*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
3258*c83a76b0SSuyog Pawar {
3259*c83a76b0SSuyog Pawar if(u1_blk_8x8_mask & (1 << i))
3260*c83a76b0SSuyog Pawar {
3261*c83a76b0SSuyog Pawar UWORD8 u1_x_pos, u1_y_pos;
3262*c83a76b0SSuyog Pawar
3263*c83a76b0SSuyog Pawar WORD32 num_results_per_part_l0 = 0;
3264*c83a76b0SSuyog Pawar WORD32 num_results_per_part_l1 = 0;
3265*c83a76b0SSuyog Pawar
3266*c83a76b0SSuyog Pawar ps_cu_results->u1_cu_size = CU_8x8;
3267*c83a76b0SSuyog Pawar ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3268*c83a76b0SSuyog Pawar ps_cu_results->i4_part_mask = i4_part_mask;
3269*c83a76b0SSuyog Pawar ps_cu_results->u1_x_off = x_off + (i & 1) * 8;
3270*c83a76b0SSuyog Pawar ps_cu_results->u1_y_off = y_off + (i >> 1) * 8;
3271*c83a76b0SSuyog Pawar ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64);
3272*c83a76b0SSuyog Pawar
3273*c83a76b0SSuyog Pawar ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL;
3274*c83a76b0SSuyog Pawar ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3275*c83a76b0SSuyog Pawar
3276*c83a76b0SSuyog Pawar u1_x_pos = ps_cu_results->u1_x_off >> 2;
3277*c83a76b0SSuyog Pawar u1_y_pos = ps_cu_results->u1_y_off >> 2;
3278*c83a76b0SSuyog Pawar
3279*c83a76b0SSuyog Pawar if(!(ps_search_results->i4_part_mask & ENABLE_NxN))
3280*c83a76b0SSuyog Pawar {
3281*c83a76b0SSuyog Pawar ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0];
3282*c83a76b0SSuyog Pawar
3283*c83a76b0SSuyog Pawar ps_cu_results->i4_part_mask = 0;
3284*c83a76b0SSuyog Pawar ps_cu_results->u1_num_best_results = 0;
3285*c83a76b0SSuyog Pawar
3286*c83a76b0SSuyog Pawar ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3287*c83a76b0SSuyog Pawar
3288*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_wd = 1;
3289*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_ht = 1;
3290*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3291*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3292*c83a76b0SSuyog Pawar ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3293*c83a76b0SSuyog Pawar
3294*c83a76b0SSuyog Pawar ps_cu_results++;
3295*c83a76b0SSuyog Pawar ps_pu_results++;
3296*c83a76b0SSuyog Pawar
3297*c83a76b0SSuyog Pawar continue;
3298*c83a76b0SSuyog Pawar }
3299*c83a76b0SSuyog Pawar
3300*c83a76b0SSuyog Pawar ps_pu_results->aps_pu_results[0][0] =
3301*c83a76b0SSuyog Pawar ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST);
3302*c83a76b0SSuyog Pawar ps_pu_results->aps_pu_results[1][0] =
3303*c83a76b0SSuyog Pawar ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3304*c83a76b0SSuyog Pawar
3305*c83a76b0SSuyog Pawar for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3306*c83a76b0SSuyog Pawar {
3307*c83a76b0SSuyog Pawar U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3308*c83a76b0SSuyog Pawar
3309*c83a76b0SSuyog Pawar /* Select the NxN partition node for the current ref_idx in the search results*/
3310*c83a76b0SSuyog Pawar ps_search_node =
3311*c83a76b0SSuyog Pawar ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i];
3312*c83a76b0SSuyog Pawar
3313*c83a76b0SSuyog Pawar for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3314*c83a76b0SSuyog Pawar {
3315*c83a76b0SSuyog Pawar /* If subpel is done then the node is a valid candidate else break the loop */
3316*c83a76b0SSuyog Pawar if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done))
3317*c83a76b0SSuyog Pawar {
3318*c83a76b0SSuyog Pawar i4_ref_id = ps_search_node->i1_ref_idx;
3319*c83a76b0SSuyog Pawar
3320*c83a76b0SSuyog Pawar ASSERT(i4_ref_id >= 0);
3321*c83a76b0SSuyog Pawar
3322*c83a76b0SSuyog Pawar if(!u1_pred_dir)
3323*c83a76b0SSuyog Pawar {
3324*c83a76b0SSuyog Pawar ps_curr_pu =
3325*c83a76b0SSuyog Pawar ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3326*c83a76b0SSuyog Pawar
3327*c83a76b0SSuyog Pawar ASSERT(
3328*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3329*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_num_active_ref_l0);
3330*c83a76b0SSuyog Pawar
3331*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l0_ref_idx =
3332*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3333*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3334*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3335*c83a76b0SSuyog Pawar ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3336*c83a76b0SSuyog Pawar
3337*c83a76b0SSuyog Pawar num_results_per_part_l0++;
3338*c83a76b0SSuyog Pawar }
3339*c83a76b0SSuyog Pawar else
3340*c83a76b0SSuyog Pawar {
3341*c83a76b0SSuyog Pawar ps_curr_pu =
3342*c83a76b0SSuyog Pawar ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3343*c83a76b0SSuyog Pawar
3344*c83a76b0SSuyog Pawar ASSERT(
3345*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3346*c83a76b0SSuyog Pawar ps_inter_ctb_prms->u1_num_active_ref_l1);
3347*c83a76b0SSuyog Pawar
3348*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l1_ref_idx =
3349*c83a76b0SSuyog Pawar ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3350*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3351*c83a76b0SSuyog Pawar ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3352*c83a76b0SSuyog Pawar ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3353*c83a76b0SSuyog Pawar
3354*c83a76b0SSuyog Pawar num_results_per_part_l1++;
3355*c83a76b0SSuyog Pawar }
3356*c83a76b0SSuyog Pawar ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3357*c83a76b0SSuyog Pawar ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3358*c83a76b0SSuyog Pawar
3359*c83a76b0SSuyog Pawar #if UNI_SATD_SCALE
3360*c83a76b0SSuyog Pawar /*SATD is scaled by weight. Hence rescale the SATD */
3361*c83a76b0SSuyog Pawar ps_curr_pu->i4_tot_cost =
3362*c83a76b0SSuyog Pawar ((ps_search_node->i4_sad *
3363*c83a76b0SSuyog Pawar ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3364*c83a76b0SSuyog Pawar (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3365*c83a76b0SSuyog Pawar ps_inter_ctb_prms->wpred_log_wdc) +
3366*c83a76b0SSuyog Pawar ps_search_node->i4_mv_cost;
3367*c83a76b0SSuyog Pawar #endif
3368*c83a76b0SSuyog Pawar
3369*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_wd = 1;
3370*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_ht = 1;
3371*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3372*c83a76b0SSuyog Pawar ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3373*c83a76b0SSuyog Pawar ps_curr_pu->pu.b1_intra_flag = 0;
3374*c83a76b0SSuyog Pawar
3375*c83a76b0SSuyog Pawar ps_search_node++;
3376*c83a76b0SSuyog Pawar }
3377*c83a76b0SSuyog Pawar else
3378*c83a76b0SSuyog Pawar {
3379*c83a76b0SSuyog Pawar /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU
3380*c83a76b0SSuyog Pawar to remove 8x8's as possible candidates during evaluation */
3381*c83a76b0SSuyog Pawar
3382*c83a76b0SSuyog Pawar ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3383*c83a76b0SSuyog Pawar
3384*c83a76b0SSuyog Pawar ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3385*c83a76b0SSuyog Pawar
3386*c83a76b0SSuyog Pawar ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3387*c83a76b0SSuyog Pawar
3388*c83a76b0SSuyog Pawar ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3389*c83a76b0SSuyog Pawar
3390*c83a76b0SSuyog Pawar break;
3391*c83a76b0SSuyog Pawar }
3392*c83a76b0SSuyog Pawar }
3393*c83a76b0SSuyog Pawar }
3394*c83a76b0SSuyog Pawar
3395*c83a76b0SSuyog Pawar /* Update the num_results per_part across lists L0 and L1 */
3396*c83a76b0SSuyog Pawar ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0;
3397*c83a76b0SSuyog Pawar ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1;
3398*c83a76b0SSuyog Pawar }
3399*c83a76b0SSuyog Pawar ps_cu_results++;
3400*c83a76b0SSuyog Pawar ps_pu_results++;
3401*c83a76b0SSuyog Pawar }
3402*c83a76b0SSuyog Pawar }
3403*c83a76b0SSuyog Pawar
3404*c83a76b0SSuyog Pawar /**
3405*c83a76b0SSuyog Pawar ********************************************************************************
3406*c83a76b0SSuyog Pawar * @fn hme_insert_intra_nodes_post_bipred
3407*c83a76b0SSuyog Pawar *
3408*c83a76b0SSuyog Pawar * @brief Compares intra costs (populated by IPE) with the best inter costs
3409*c83a76b0SSuyog Pawar * (populated after evaluating bi-pred) and updates the best results
3410*c83a76b0SSuyog Pawar * if intra cost is better
3411*c83a76b0SSuyog Pawar *
3412*c83a76b0SSuyog Pawar * @param[in,out] ps_cu_results [inout] : Best results structure of CU
3413*c83a76b0SSuyog Pawar * ps_cur_ipe_ctb [in] : intra results for the current CTB
3414*c83a76b0SSuyog Pawar * i4_frm_qstep [in] : current frame quantizer(qscale)*
3415*c83a76b0SSuyog Pawar *
3416*c83a76b0SSuyog Pawar * @return None
3417*c83a76b0SSuyog Pawar ********************************************************************************
3418*c83a76b0SSuyog Pawar */
hme_insert_intra_nodes_post_bipred(inter_cu_results_t * ps_cu_results,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,WORD32 i4_frm_qstep)3419*c83a76b0SSuyog Pawar void hme_insert_intra_nodes_post_bipred(
3420*c83a76b0SSuyog Pawar inter_cu_results_t *ps_cu_results,
3421*c83a76b0SSuyog Pawar ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
3422*c83a76b0SSuyog Pawar WORD32 i4_frm_qstep)
3423*c83a76b0SSuyog Pawar {
3424*c83a76b0SSuyog Pawar WORD32 i;
3425*c83a76b0SSuyog Pawar WORD32 num_results;
3426*c83a76b0SSuyog Pawar WORD32 cu_size = ps_cu_results->u1_cu_size;
3427*c83a76b0SSuyog Pawar UWORD8 u1_x_off = ps_cu_results->u1_x_off;
3428*c83a76b0SSuyog Pawar UWORD8 u1_y_off = ps_cu_results->u1_y_off;
3429*c83a76b0SSuyog Pawar
3430*c83a76b0SSuyog Pawar /* Id of the 32x32 block, 16x16 block in a CTB */
3431*c83a76b0SSuyog Pawar WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5);
3432*c83a76b0SSuyog Pawar WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1);
3433*c83a76b0SSuyog Pawar
3434*c83a76b0SSuyog Pawar /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */
3435*c83a76b0SSuyog Pawar WORD32 disable_intra64 = 0;
3436*c83a76b0SSuyog Pawar WORD32 disable_intra32 = 0;
3437*c83a76b0SSuyog Pawar WORD32 disable_intra16 = 0;
3438*c83a76b0SSuyog Pawar
3439*c83a76b0SSuyog Pawar S32 i4_intra_2nx2n_cost;
3440*c83a76b0SSuyog Pawar
3441*c83a76b0SSuyog Pawar /* ME final results for this CU (post seeding of best uni/bi pred results) */
3442*c83a76b0SSuyog Pawar part_type_results_t *ps_best_result;
3443*c83a76b0SSuyog Pawar
3444*c83a76b0SSuyog Pawar i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE;
3445*c83a76b0SSuyog Pawar
3446*c83a76b0SSuyog Pawar /*If inter candidates are enabled then enter the for loop to update the intra candidate */
3447*c83a76b0SSuyog Pawar
3448*c83a76b0SSuyog Pawar if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size))
3449*c83a76b0SSuyog Pawar {
3450*c83a76b0SSuyog Pawar ps_cu_results->u1_num_best_results = 1;
3451*c83a76b0SSuyog Pawar }
3452*c83a76b0SSuyog Pawar
3453*c83a76b0SSuyog Pawar num_results = ps_cu_results->u1_num_best_results;
3454*c83a76b0SSuyog Pawar
3455*c83a76b0SSuyog Pawar ps_best_result = &ps_cu_results->ps_best_results[0];
3456*c83a76b0SSuyog Pawar
3457*c83a76b0SSuyog Pawar /* Disable intra16/32/64 flags based on split flags recommended by IPE */
3458*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->u1_split_flag)
3459*c83a76b0SSuyog Pawar {
3460*c83a76b0SSuyog Pawar disable_intra64 = 1;
3461*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
3462*c83a76b0SSuyog Pawar {
3463*c83a76b0SSuyog Pawar disable_intra32 = 1;
3464*c83a76b0SSuyog Pawar
3465*c83a76b0SSuyog Pawar if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
3466*c83a76b0SSuyog Pawar .as_intra16_analyse[i4_16x16_id]
3467*c83a76b0SSuyog Pawar .b1_split_flag)
3468*c83a76b0SSuyog Pawar {
3469*c83a76b0SSuyog Pawar disable_intra16 = 1;
3470*c83a76b0SSuyog Pawar }
3471*c83a76b0SSuyog Pawar }
3472*c83a76b0SSuyog Pawar }
3473*c83a76b0SSuyog Pawar
3474*c83a76b0SSuyog Pawar /* Derive the intra cost based on current cu size and offset */
3475*c83a76b0SSuyog Pawar switch(cu_size)
3476*c83a76b0SSuyog Pawar {
3477*c83a76b0SSuyog Pawar case CU_8x8:
3478*c83a76b0SSuyog Pawar {
3479*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)];
3480*c83a76b0SSuyog Pawar
3481*c83a76b0SSuyog Pawar /* Accounting for coding noise in the open loop IPE cost */
3482*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost +=
3483*c83a76b0SSuyog Pawar ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3484*c83a76b0SSuyog Pawar
3485*c83a76b0SSuyog Pawar break;
3486*c83a76b0SSuyog Pawar }
3487*c83a76b0SSuyog Pawar
3488*c83a76b0SSuyog Pawar case CU_16x16:
3489*c83a76b0SSuyog Pawar {
3490*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost =
3491*c83a76b0SSuyog Pawar ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)];
3492*c83a76b0SSuyog Pawar
3493*c83a76b0SSuyog Pawar /* Accounting for coding noise in the open loop IPE cost */
3494*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost +=
3495*c83a76b0SSuyog Pawar ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */
3496*c83a76b0SSuyog Pawar
3497*c83a76b0SSuyog Pawar if(disable_intra16)
3498*c83a76b0SSuyog Pawar {
3499*c83a76b0SSuyog Pawar /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */
3500*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3501*c83a76b0SSuyog Pawar }
3502*c83a76b0SSuyog Pawar break;
3503*c83a76b0SSuyog Pawar }
3504*c83a76b0SSuyog Pawar
3505*c83a76b0SSuyog Pawar case CU_32x32:
3506*c83a76b0SSuyog Pawar {
3507*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost =
3508*c83a76b0SSuyog Pawar ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)];
3509*c83a76b0SSuyog Pawar
3510*c83a76b0SSuyog Pawar /* Accounting for coding noise in the open loop IPE cost */
3511*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost +=
3512*c83a76b0SSuyog Pawar (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3513*c83a76b0SSuyog Pawar
3514*c83a76b0SSuyog Pawar if(disable_intra32)
3515*c83a76b0SSuyog Pawar {
3516*c83a76b0SSuyog Pawar /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */
3517*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3518*c83a76b0SSuyog Pawar }
3519*c83a76b0SSuyog Pawar break;
3520*c83a76b0SSuyog Pawar }
3521*c83a76b0SSuyog Pawar
3522*c83a76b0SSuyog Pawar case CU_64x64:
3523*c83a76b0SSuyog Pawar {
3524*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost;
3525*c83a76b0SSuyog Pawar
3526*c83a76b0SSuyog Pawar /* Accounting for coding noise in the open loop IPE cost */
3527*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost +=
3528*c83a76b0SSuyog Pawar (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3529*c83a76b0SSuyog Pawar
3530*c83a76b0SSuyog Pawar if(disable_intra64)
3531*c83a76b0SSuyog Pawar {
3532*c83a76b0SSuyog Pawar /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */
3533*c83a76b0SSuyog Pawar i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3534*c83a76b0SSuyog Pawar }
3535*c83a76b0SSuyog Pawar break;
3536*c83a76b0SSuyog Pawar }
3537*c83a76b0SSuyog Pawar
3538*c83a76b0SSuyog Pawar default:
3539*c83a76b0SSuyog Pawar ASSERT(0);
3540*c83a76b0SSuyog Pawar }
3541*c83a76b0SSuyog Pawar
3542*c83a76b0SSuyog Pawar {
3543*c83a76b0SSuyog Pawar /*****************************************************************/
3544*c83a76b0SSuyog Pawar /* Intra / Inter cost comparison for 2Nx2N : cu size 8/16/32/64 */
3545*c83a76b0SSuyog Pawar /* Identify where the current result isto be placed. Basically */
3546*c83a76b0SSuyog Pawar /* find the node which has cost just higher than node under test */
3547*c83a76b0SSuyog Pawar /*****************************************************************/
3548*c83a76b0SSuyog Pawar for(i = 0; i < num_results; i++)
3549*c83a76b0SSuyog Pawar {
3550*c83a76b0SSuyog Pawar /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */
3551*c83a76b0SSuyog Pawar WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost;
3552*c83a76b0SSuyog Pawar
3553*c83a76b0SSuyog Pawar if(i4_intra_2nx2n_cost < inter_cost)
3554*c83a76b0SSuyog Pawar {
3555*c83a76b0SSuyog Pawar if(i < (num_results - 1))
3556*c83a76b0SSuyog Pawar {
3557*c83a76b0SSuyog Pawar memmove(
3558*c83a76b0SSuyog Pawar ps_best_result + i + 1,
3559*c83a76b0SSuyog Pawar ps_best_result + i,
3560*c83a76b0SSuyog Pawar sizeof(ps_best_result[0]) * (num_results - 1 - i));
3561*c83a76b0SSuyog Pawar }
3562*c83a76b0SSuyog Pawar
3563*c83a76b0SSuyog Pawar /* Insert the intra node result */
3564*c83a76b0SSuyog Pawar ps_best_result[i].u1_part_type = PRT_2Nx2N;
3565*c83a76b0SSuyog Pawar ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost;
3566*c83a76b0SSuyog Pawar ps_best_result[i].ai4_tu_split_flag[0] = 0;
3567*c83a76b0SSuyog Pawar ps_best_result[i].ai4_tu_split_flag[1] = 0;
3568*c83a76b0SSuyog Pawar ps_best_result[i].ai4_tu_split_flag[2] = 0;
3569*c83a76b0SSuyog Pawar ps_best_result[i].ai4_tu_split_flag[3] = 0;
3570*c83a76b0SSuyog Pawar
3571*c83a76b0SSuyog Pawar /* Populate intra flag, cost and default mvs, refidx for intra pu */
3572*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost;
3573*c83a76b0SSuyog Pawar //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost;
3574*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].i4_mv_cost = 0;
3575*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1;
3576*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1;
3577*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1;
3578*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV;
3579*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV;
3580*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV;
3581*c83a76b0SSuyog Pawar ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV;
3582*c83a76b0SSuyog Pawar
3583*c83a76b0SSuyog Pawar break;
3584*c83a76b0SSuyog Pawar }
3585*c83a76b0SSuyog Pawar }
3586*c83a76b0SSuyog Pawar }
3587*c83a76b0SSuyog Pawar }
3588*c83a76b0SSuyog Pawar
hme_recompute_lambda_from_min_8x8_act_in_ctb(me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb)3589*c83a76b0SSuyog Pawar S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
3590*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb)
3591*c83a76b0SSuyog Pawar {
3592*c83a76b0SSuyog Pawar double lambda;
3593*c83a76b0SSuyog Pawar double lambda_modifier;
3594*c83a76b0SSuyog Pawar WORD32 i4_cu_qp;
3595*c83a76b0SSuyog Pawar frm_lambda_ctxt_t *ps_frm_lambda_ctxt;
3596*c83a76b0SSuyog Pawar //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
3597*c83a76b0SSuyog Pawar WORD32 i4_frame_qp;
3598*c83a76b0SSuyog Pawar rc_quant_t *ps_rc_quant_ctxt;
3599*c83a76b0SSuyog Pawar WORD32 i4_is_bpic;
3600*c83a76b0SSuyog Pawar
3601*c83a76b0SSuyog Pawar ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt;
3602*c83a76b0SSuyog Pawar //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base;
3603*c83a76b0SSuyog Pawar i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp;
3604*c83a76b0SSuyog Pawar ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt;
3605*c83a76b0SSuyog Pawar i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled;
3606*c83a76b0SSuyog Pawar
3607*c83a76b0SSuyog Pawar i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset];
3608*c83a76b0SSuyog Pawar
3609*c83a76b0SSuyog Pawar {
3610*c83a76b0SSuyog Pawar if(ps_ctxt->i4_l0me_qp_mod)
3611*c83a76b0SSuyog Pawar {
3612*c83a76b0SSuyog Pawar #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
3613*c83a76b0SSuyog Pawar #if LAMDA_BASED_ON_QUANT
3614*c83a76b0SSuyog Pawar WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0];
3615*c83a76b0SSuyog Pawar #else
3616*c83a76b0SSuyog Pawar WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0];
3617*c83a76b0SSuyog Pawar #endif
3618*c83a76b0SSuyog Pawar i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
3619*c83a76b0SSuyog Pawar QP_LEVEL_MOD_ACT_FACTOR;
3620*c83a76b0SSuyog Pawar
3621*c83a76b0SSuyog Pawar #endif
3622*c83a76b0SSuyog Pawar }
3623*c83a76b0SSuyog Pawar if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale)
3624*c83a76b0SSuyog Pawar i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale;
3625*c83a76b0SSuyog Pawar else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale)
3626*c83a76b0SSuyog Pawar i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale;
3627*c83a76b0SSuyog Pawar
3628*c83a76b0SSuyog Pawar i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp];
3629*c83a76b0SSuyog Pawar }
3630*c83a76b0SSuyog Pawar
3631*c83a76b0SSuyog Pawar if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp)
3632*c83a76b0SSuyog Pawar i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp;
3633*c83a76b0SSuyog Pawar else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp)
3634*c83a76b0SSuyog Pawar i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp;
3635*c83a76b0SSuyog Pawar
3636*c83a76b0SSuyog Pawar lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3));
3637*c83a76b0SSuyog Pawar
3638*c83a76b0SSuyog Pawar lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier;
3639*c83a76b0SSuyog Pawar
3640*c83a76b0SSuyog Pawar if(i4_is_bpic)
3641*c83a76b0SSuyog Pawar {
3642*c83a76b0SSuyog Pawar lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3643*c83a76b0SSuyog Pawar }
3644*c83a76b0SSuyog Pawar if(ps_ctxt->i4_use_const_lamda_modifier)
3645*c83a76b0SSuyog Pawar {
3646*c83a76b0SSuyog Pawar if(ps_ctxt->s_frm_prms.is_i_pic)
3647*c83a76b0SSuyog Pawar {
3648*c83a76b0SSuyog Pawar lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3649*c83a76b0SSuyog Pawar }
3650*c83a76b0SSuyog Pawar else
3651*c83a76b0SSuyog Pawar {
3652*c83a76b0SSuyog Pawar lambda_modifier = CONST_LAMDA_MOD_VAL;
3653*c83a76b0SSuyog Pawar }
3654*c83a76b0SSuyog Pawar }
3655*c83a76b0SSuyog Pawar lambda *= lambda_modifier;
3656*c83a76b0SSuyog Pawar
3657*c83a76b0SSuyog Pawar return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)));
3658*c83a76b0SSuyog Pawar }
3659*c83a76b0SSuyog Pawar
3660*c83a76b0SSuyog Pawar /**
3661*c83a76b0SSuyog Pawar ********************************************************************************
3662*c83a76b0SSuyog Pawar * @fn hme_update_dynamic_search_params
3663*c83a76b0SSuyog Pawar *
3664*c83a76b0SSuyog Pawar * @brief Update the Dynamic search params based on the current MVs
3665*c83a76b0SSuyog Pawar *
3666*c83a76b0SSuyog Pawar * @param[in,out] ps_dyn_range_prms [inout] : Dyn. Range Param str.
3667*c83a76b0SSuyog Pawar * i2_mvy [in] : current MV y comp.
3668*c83a76b0SSuyog Pawar *
3669*c83a76b0SSuyog Pawar * @return None
3670*c83a76b0SSuyog Pawar ********************************************************************************
3671*c83a76b0SSuyog Pawar */
hme_update_dynamic_search_params(dyn_range_prms_t * ps_dyn_range_prms,WORD16 i2_mvy)3672*c83a76b0SSuyog Pawar void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy)
3673*c83a76b0SSuyog Pawar {
3674*c83a76b0SSuyog Pawar /* If MV is up large, update i2_dyn_max_y */
3675*c83a76b0SSuyog Pawar if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y)
3676*c83a76b0SSuyog Pawar ps_dyn_range_prms->i2_dyn_max_y = i2_mvy;
3677*c83a76b0SSuyog Pawar /* If MV is down large, update i2_dyn_min_y */
3678*c83a76b0SSuyog Pawar if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y)
3679*c83a76b0SSuyog Pawar ps_dyn_range_prms->i2_dyn_min_y = i2_mvy;
3680*c83a76b0SSuyog Pawar }
3681*c83a76b0SSuyog Pawar
hme_add_new_node_to_a_sorted_array(search_node_t * ps_result_node,search_node_t ** pps_sorted_array,U08 * pu1_shifts,U32 u4_num_results_updated,U08 u1_shift)3682*c83a76b0SSuyog Pawar void hme_add_new_node_to_a_sorted_array(
3683*c83a76b0SSuyog Pawar search_node_t *ps_result_node,
3684*c83a76b0SSuyog Pawar search_node_t **pps_sorted_array,
3685*c83a76b0SSuyog Pawar U08 *pu1_shifts,
3686*c83a76b0SSuyog Pawar U32 u4_num_results_updated,
3687*c83a76b0SSuyog Pawar U08 u1_shift)
3688*c83a76b0SSuyog Pawar {
3689*c83a76b0SSuyog Pawar U32 i;
3690*c83a76b0SSuyog Pawar
3691*c83a76b0SSuyog Pawar if(NULL == pu1_shifts)
3692*c83a76b0SSuyog Pawar {
3693*c83a76b0SSuyog Pawar S32 i4_cur_node_cost = ps_result_node->i4_tot_cost;
3694*c83a76b0SSuyog Pawar
3695*c83a76b0SSuyog Pawar for(i = 0; i < u4_num_results_updated; i++)
3696*c83a76b0SSuyog Pawar {
3697*c83a76b0SSuyog Pawar if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost)
3698*c83a76b0SSuyog Pawar {
3699*c83a76b0SSuyog Pawar memmove(
3700*c83a76b0SSuyog Pawar &pps_sorted_array[i + 1],
3701*c83a76b0SSuyog Pawar &pps_sorted_array[i],
3702*c83a76b0SSuyog Pawar (u4_num_results_updated - i) * sizeof(search_node_t *));
3703*c83a76b0SSuyog Pawar
3704*c83a76b0SSuyog Pawar break;
3705*c83a76b0SSuyog Pawar }
3706*c83a76b0SSuyog Pawar }
3707*c83a76b0SSuyog Pawar }
3708*c83a76b0SSuyog Pawar else
3709*c83a76b0SSuyog Pawar {
3710*c83a76b0SSuyog Pawar S32 i4_cur_node_cost =
3711*c83a76b0SSuyog Pawar (u1_shift == 0) ? ps_result_node->i4_tot_cost
3712*c83a76b0SSuyog Pawar : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift;
3713*c83a76b0SSuyog Pawar
3714*c83a76b0SSuyog Pawar for(i = 0; i < u4_num_results_updated; i++)
3715*c83a76b0SSuyog Pawar {
3716*c83a76b0SSuyog Pawar S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost
3717*c83a76b0SSuyog Pawar : (pps_sorted_array[i]->i4_tot_cost +
3718*c83a76b0SSuyog Pawar (1 << (pu1_shifts[i] - 1))) >>
3719*c83a76b0SSuyog Pawar pu1_shifts[i];
3720*c83a76b0SSuyog Pawar
3721*c83a76b0SSuyog Pawar if(i4_cur_node_cost < i4_prev_node_cost)
3722*c83a76b0SSuyog Pawar {
3723*c83a76b0SSuyog Pawar memmove(
3724*c83a76b0SSuyog Pawar &pps_sorted_array[i + 1],
3725*c83a76b0SSuyog Pawar &pps_sorted_array[i],
3726*c83a76b0SSuyog Pawar (u4_num_results_updated - i) * sizeof(search_node_t *));
3727*c83a76b0SSuyog Pawar memmove(
3728*c83a76b0SSuyog Pawar &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08));
3729*c83a76b0SSuyog Pawar
3730*c83a76b0SSuyog Pawar break;
3731*c83a76b0SSuyog Pawar }
3732*c83a76b0SSuyog Pawar }
3733*c83a76b0SSuyog Pawar
3734*c83a76b0SSuyog Pawar pu1_shifts[i] = u1_shift;
3735*c83a76b0SSuyog Pawar }
3736*c83a76b0SSuyog Pawar
3737*c83a76b0SSuyog Pawar pps_sorted_array[i] = ps_result_node;
3738*c83a76b0SSuyog Pawar }
3739*c83a76b0SSuyog Pawar
hme_find_pos_of_implicitly_stored_ref_id(S08 * pi1_ref_idx,S08 i1_ref_idx,S32 i4_result_id,S32 i4_num_results)3740*c83a76b0SSuyog Pawar S32 hme_find_pos_of_implicitly_stored_ref_id(
3741*c83a76b0SSuyog Pawar S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results)
3742*c83a76b0SSuyog Pawar {
3743*c83a76b0SSuyog Pawar S32 i;
3744*c83a76b0SSuyog Pawar
3745*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_results; i++)
3746*c83a76b0SSuyog Pawar {
3747*c83a76b0SSuyog Pawar if(i1_ref_idx == pi1_ref_idx[i])
3748*c83a76b0SSuyog Pawar {
3749*c83a76b0SSuyog Pawar if(0 == i4_result_id)
3750*c83a76b0SSuyog Pawar {
3751*c83a76b0SSuyog Pawar return i;
3752*c83a76b0SSuyog Pawar }
3753*c83a76b0SSuyog Pawar else
3754*c83a76b0SSuyog Pawar {
3755*c83a76b0SSuyog Pawar i4_result_id--;
3756*c83a76b0SSuyog Pawar }
3757*c83a76b0SSuyog Pawar }
3758*c83a76b0SSuyog Pawar }
3759*c83a76b0SSuyog Pawar
3760*c83a76b0SSuyog Pawar return -1;
3761*c83a76b0SSuyog Pawar }
3762*c83a76b0SSuyog Pawar
hme_search_node_populator(search_node_t * ps_search_node,hme_mv_t * ps_mv,S08 i1_ref_idx,S08 i1_mv_magnitude_shift)3763*c83a76b0SSuyog Pawar static __inline void hme_search_node_populator(
3764*c83a76b0SSuyog Pawar search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift)
3765*c83a76b0SSuyog Pawar {
3766*c83a76b0SSuyog Pawar ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift);
3767*c83a76b0SSuyog Pawar ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift);
3768*c83a76b0SSuyog Pawar ps_search_node->i1_ref_idx = i1_ref_idx;
3769*c83a76b0SSuyog Pawar ps_search_node->u1_is_avail = 1;
3770*c83a76b0SSuyog Pawar ps_search_node->u1_subpel_done = 0;
3771*c83a76b0SSuyog Pawar }
3772*c83a76b0SSuyog Pawar
hme_populate_search_candidates(fpel_srch_cand_init_data_t * ps_ctxt)3773*c83a76b0SSuyog Pawar S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt)
3774*c83a76b0SSuyog Pawar {
3775*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
3776*c83a76b0SSuyog Pawar
3777*c83a76b0SSuyog Pawar S32 wd_c, ht_c, wd_p, ht_p;
3778*c83a76b0SSuyog Pawar S32 blksize_p, blksize_c;
3779*c83a76b0SSuyog Pawar S32 i;
3780*c83a76b0SSuyog Pawar S08 *pi1_ref_idx;
3781*c83a76b0SSuyog Pawar /* Cache for storing offsets */
3782*c83a76b0SSuyog Pawar S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS];
3783*c83a76b0SSuyog Pawar
3784*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer;
3785*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer;
3786*c83a76b0SSuyog Pawar layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
3787*c83a76b0SSuyog Pawar layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
3788*c83a76b0SSuyog Pawar search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands;
3789*c83a76b0SSuyog Pawar hme_mv_t s_zero_mv = { 0 };
3790*c83a76b0SSuyog Pawar
3791*c83a76b0SSuyog Pawar S32 i4_pos_x = ps_ctxt->i4_pos_x;
3792*c83a76b0SSuyog Pawar S32 i4_pos_y = ps_ctxt->i4_pos_y;
3793*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0;
3794*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1;
3795*c83a76b0SSuyog Pawar U08 u1_pred_dir = ps_ctxt->u1_pred_dir;
3796*c83a76b0SSuyog Pawar U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr;
3797*c83a76b0SSuyog Pawar U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank;
3798*c83a76b0SSuyog Pawar U08 u1_num_results_in_coarse_mvbank =
3799*c83a76b0SSuyog Pawar (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref)
3800*c83a76b0SSuyog Pawar : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref);
3801*c83a76b0SSuyog Pawar S32 i4_init_offset_projected =
3802*c83a76b0SSuyog Pawar (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0;
3803*c83a76b0SSuyog Pawar S32 i4_init_offset_spatial =
3804*c83a76b0SSuyog Pawar (u1_pred_dir_ctr == 1)
3805*c83a76b0SSuyog Pawar ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank)
3806*c83a76b0SSuyog Pawar : 0;
3807*c83a76b0SSuyog Pawar U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index;
3808*c83a76b0SSuyog Pawar U08 u1_max_num_search_cands =
3809*c83a76b0SSuyog Pawar gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index];
3810*c83a76b0SSuyog Pawar S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1);
3811*c83a76b0SSuyog Pawar U16 u2_is_offset_available = 0;
3812*c83a76b0SSuyog Pawar U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1;
3813*c83a76b0SSuyog Pawar
3814*c83a76b0SSuyog Pawar /* Width and ht of current and prev layers */
3815*c83a76b0SSuyog Pawar wd_c = ps_curr_layer->i4_wd;
3816*c83a76b0SSuyog Pawar ht_c = ps_curr_layer->i4_ht;
3817*c83a76b0SSuyog Pawar wd_p = ps_coarse_layer->i4_wd;
3818*c83a76b0SSuyog Pawar ht_p = ps_coarse_layer->i4_ht;
3819*c83a76b0SSuyog Pawar
3820*c83a76b0SSuyog Pawar blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size];
3821*c83a76b0SSuyog Pawar blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size];
3822*c83a76b0SSuyog Pawar
3823*c83a76b0SSuyog Pawar /* ASSERT for valid sizes */
3824*c83a76b0SSuyog Pawar ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
3825*c83a76b0SSuyog Pawar
3826*c83a76b0SSuyog Pawar {
3827*c83a76b0SSuyog Pawar S32 x = i4_pos_x >> 4;
3828*c83a76b0SSuyog Pawar S32 y = i4_pos_y >> 4;
3829*c83a76b0SSuyog Pawar
3830*c83a76b0SSuyog Pawar if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size])
3831*c83a76b0SSuyog Pawar {
3832*c83a76b0SSuyog Pawar x *= 2;
3833*c83a76b0SSuyog Pawar y *= 2;
3834*c83a76b0SSuyog Pawar
3835*c83a76b0SSuyog Pawar u1_search_blk_to_spatial_mvbank_blk_size_factor = 2;
3836*c83a76b0SSuyog Pawar }
3837*c83a76b0SSuyog Pawar
3838*c83a76b0SSuyog Pawar i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) *
3839*c83a76b0SSuyog Pawar ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3840*c83a76b0SSuyog Pawar }
3841*c83a76b0SSuyog Pawar
3842*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_srch_cands; i++)
3843*c83a76b0SSuyog Pawar {
3844*c83a76b0SSuyog Pawar SEARCH_CANDIDATE_TYPE_T e_search_cand_type =
3845*c83a76b0SSuyog Pawar gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index]
3846*c83a76b0SSuyog Pawar [i];
3847*c83a76b0SSuyog Pawar SEARCH_CAND_LOCATIONS_T e_search_cand_loc =
3848*c83a76b0SSuyog Pawar gae_search_cand_type_to_location_map[e_search_cand_type];
3849*c83a76b0SSuyog Pawar S08 i1_result_id = MIN(
3850*c83a76b0SSuyog Pawar gai1_search_cand_type_to_result_id_map[e_search_cand_type],
3851*c83a76b0SSuyog Pawar (e_search_cand_loc < 0 ? 0
3852*c83a76b0SSuyog Pawar : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1));
3853*c83a76b0SSuyog Pawar U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3854*c83a76b0SSuyog Pawar U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3855*c83a76b0SSuyog Pawar U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) ||
3856*c83a76b0SSuyog Pawar (ZERO_MV_ALTREF == e_search_cand_type);
3857*c83a76b0SSuyog Pawar
3858*c83a76b0SSuyog Pawar /* When spatial candidates are available, use them, else use the projected candidates */
3859*c83a76b0SSuyog Pawar /* This is required since some blocks will never have certain spatial candidates, and in order */
3860*c83a76b0SSuyog Pawar /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list, */
3861*c83a76b0SSuyog Pawar /* all candidates apart from the 'LEFT' have been marked as projected */
3862*c83a76b0SSuyog Pawar if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) ||
3863*c83a76b0SSuyog Pawar (e_search_cand_loc == TOPRIGHT)) &&
3864*c83a76b0SSuyog Pawar (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand)
3865*c83a76b0SSuyog Pawar {
3866*c83a76b0SSuyog Pawar if(e_search_cand_loc == TOPLEFT)
3867*c83a76b0SSuyog Pawar {
3868*c83a76b0SSuyog Pawar u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available ||
3869*c83a76b0SSuyog Pawar !ps_ctxt->u1_is_left_available;
3870*c83a76b0SSuyog Pawar }
3871*c83a76b0SSuyog Pawar else if(e_search_cand_loc == TOPRIGHT)
3872*c83a76b0SSuyog Pawar {
3873*c83a76b0SSuyog Pawar u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available;
3874*c83a76b0SSuyog Pawar }
3875*c83a76b0SSuyog Pawar else
3876*c83a76b0SSuyog Pawar {
3877*c83a76b0SSuyog Pawar u1_is_spatial_cand = ps_ctxt->u1_is_top_available;
3878*c83a76b0SSuyog Pawar }
3879*c83a76b0SSuyog Pawar
3880*c83a76b0SSuyog Pawar u1_is_proj_cand = !u1_is_spatial_cand;
3881*c83a76b0SSuyog Pawar }
3882*c83a76b0SSuyog Pawar
3883*c83a76b0SSuyog Pawar switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2))
3884*c83a76b0SSuyog Pawar {
3885*c83a76b0SSuyog Pawar case 1:
3886*c83a76b0SSuyog Pawar {
3887*c83a76b0SSuyog Pawar hme_search_node_populator(
3888*c83a76b0SSuyog Pawar ps_search_cands[i].ps_search_node,
3889*c83a76b0SSuyog Pawar &s_zero_mv,
3890*c83a76b0SSuyog Pawar (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id
3891*c83a76b0SSuyog Pawar : ps_ctxt->i1_alt_default_ref_id,
3892*c83a76b0SSuyog Pawar 0);
3893*c83a76b0SSuyog Pawar
3894*c83a76b0SSuyog Pawar break;
3895*c83a76b0SSuyog Pawar }
3896*c83a76b0SSuyog Pawar case 2:
3897*c83a76b0SSuyog Pawar {
3898*c83a76b0SSuyog Pawar S08 i1_mv_magnitude_shift = 0;
3899*c83a76b0SSuyog Pawar
3900*c83a76b0SSuyog Pawar S32 i4_offset = i4_init_offset_spatial;
3901*c83a76b0SSuyog Pawar
3902*c83a76b0SSuyog Pawar i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1);
3903*c83a76b0SSuyog Pawar i4_offset += i1_result_id;
3904*c83a76b0SSuyog Pawar
3905*c83a76b0SSuyog Pawar switch(e_search_cand_loc)
3906*c83a76b0SSuyog Pawar {
3907*c83a76b0SSuyog Pawar case LEFT:
3908*c83a76b0SSuyog Pawar {
3909*c83a76b0SSuyog Pawar if(ps_ctxt->u1_is_left_available)
3910*c83a76b0SSuyog Pawar {
3911*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = -2;
3912*c83a76b0SSuyog Pawar
3913*c83a76b0SSuyog Pawar i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3914*c83a76b0SSuyog Pawar
3915*c83a76b0SSuyog Pawar ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3916*c83a76b0SSuyog Pawar pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3917*c83a76b0SSuyog Pawar }
3918*c83a76b0SSuyog Pawar else
3919*c83a76b0SSuyog Pawar {
3920*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = 0;
3921*c83a76b0SSuyog Pawar
3922*c83a76b0SSuyog Pawar ps_mv = &s_zero_mv;
3923*c83a76b0SSuyog Pawar pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3924*c83a76b0SSuyog Pawar }
3925*c83a76b0SSuyog Pawar
3926*c83a76b0SSuyog Pawar break;
3927*c83a76b0SSuyog Pawar }
3928*c83a76b0SSuyog Pawar case TOPLEFT:
3929*c83a76b0SSuyog Pawar {
3930*c83a76b0SSuyog Pawar if(ps_ctxt->u1_is_topLeft_available)
3931*c83a76b0SSuyog Pawar {
3932*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = -2;
3933*c83a76b0SSuyog Pawar
3934*c83a76b0SSuyog Pawar i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3935*c83a76b0SSuyog Pawar i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3936*c83a76b0SSuyog Pawar
3937*c83a76b0SSuyog Pawar ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3938*c83a76b0SSuyog Pawar pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3939*c83a76b0SSuyog Pawar }
3940*c83a76b0SSuyog Pawar else
3941*c83a76b0SSuyog Pawar {
3942*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = 0;
3943*c83a76b0SSuyog Pawar
3944*c83a76b0SSuyog Pawar ps_mv = &s_zero_mv;
3945*c83a76b0SSuyog Pawar pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3946*c83a76b0SSuyog Pawar }
3947*c83a76b0SSuyog Pawar
3948*c83a76b0SSuyog Pawar break;
3949*c83a76b0SSuyog Pawar }
3950*c83a76b0SSuyog Pawar case TOP:
3951*c83a76b0SSuyog Pawar {
3952*c83a76b0SSuyog Pawar if(ps_ctxt->u1_is_top_available)
3953*c83a76b0SSuyog Pawar {
3954*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = -2;
3955*c83a76b0SSuyog Pawar
3956*c83a76b0SSuyog Pawar i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3957*c83a76b0SSuyog Pawar
3958*c83a76b0SSuyog Pawar ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3959*c83a76b0SSuyog Pawar pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3960*c83a76b0SSuyog Pawar }
3961*c83a76b0SSuyog Pawar else
3962*c83a76b0SSuyog Pawar {
3963*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = 0;
3964*c83a76b0SSuyog Pawar
3965*c83a76b0SSuyog Pawar ps_mv = &s_zero_mv;
3966*c83a76b0SSuyog Pawar pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3967*c83a76b0SSuyog Pawar }
3968*c83a76b0SSuyog Pawar
3969*c83a76b0SSuyog Pawar break;
3970*c83a76b0SSuyog Pawar }
3971*c83a76b0SSuyog Pawar case TOPRIGHT:
3972*c83a76b0SSuyog Pawar {
3973*c83a76b0SSuyog Pawar if(ps_ctxt->u1_is_topRight_available)
3974*c83a76b0SSuyog Pawar {
3975*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = -2;
3976*c83a76b0SSuyog Pawar
3977*c83a76b0SSuyog Pawar i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk *
3978*c83a76b0SSuyog Pawar u1_search_blk_to_spatial_mvbank_blk_size_factor;
3979*c83a76b0SSuyog Pawar i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3980*c83a76b0SSuyog Pawar
3981*c83a76b0SSuyog Pawar ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3982*c83a76b0SSuyog Pawar pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3983*c83a76b0SSuyog Pawar }
3984*c83a76b0SSuyog Pawar else
3985*c83a76b0SSuyog Pawar {
3986*c83a76b0SSuyog Pawar i1_mv_magnitude_shift = 0;
3987*c83a76b0SSuyog Pawar ps_mv = &s_zero_mv;
3988*c83a76b0SSuyog Pawar pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3989*c83a76b0SSuyog Pawar }
3990*c83a76b0SSuyog Pawar
3991*c83a76b0SSuyog Pawar break;
3992*c83a76b0SSuyog Pawar }
3993*c83a76b0SSuyog Pawar default:
3994*c83a76b0SSuyog Pawar {
3995*c83a76b0SSuyog Pawar /* AiyAiyYo!! */
3996*c83a76b0SSuyog Pawar ASSERT(0);
3997*c83a76b0SSuyog Pawar }
3998*c83a76b0SSuyog Pawar }
3999*c83a76b0SSuyog Pawar
4000*c83a76b0SSuyog Pawar hme_search_node_populator(
4001*c83a76b0SSuyog Pawar ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift);
4002*c83a76b0SSuyog Pawar
4003*c83a76b0SSuyog Pawar break;
4004*c83a76b0SSuyog Pawar }
4005*c83a76b0SSuyog Pawar case 4:
4006*c83a76b0SSuyog Pawar {
4007*c83a76b0SSuyog Pawar ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type);
4008*c83a76b0SSuyog Pawar ASSERT(ILLUSORY_LOCATION != e_search_cand_loc);
4009*c83a76b0SSuyog Pawar
4010*c83a76b0SSuyog Pawar i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1);
4011*c83a76b0SSuyog Pawar
4012*c83a76b0SSuyog Pawar if(!(u2_is_offset_available & (1 << e_search_cand_loc)))
4013*c83a76b0SSuyog Pawar {
4014*c83a76b0SSuyog Pawar S32 x, y;
4015*c83a76b0SSuyog Pawar
4016*c83a76b0SSuyog Pawar x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc];
4017*c83a76b0SSuyog Pawar y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc];
4018*c83a76b0SSuyog Pawar
4019*c83a76b0SSuyog Pawar /* Safety check to avoid uninitialized access across temporal layers */
4020*c83a76b0SSuyog Pawar x = CLIP3(x, 0, (wd_c - blksize_p));
4021*c83a76b0SSuyog Pawar y = CLIP3(y, 0, (ht_c - blksize_p));
4022*c83a76b0SSuyog Pawar
4023*c83a76b0SSuyog Pawar /* Project the positions to prev layer */
4024*c83a76b0SSuyog Pawar x = x >> blksize_p;
4025*c83a76b0SSuyog Pawar y = y >> blksize_p;
4026*c83a76b0SSuyog Pawar
4027*c83a76b0SSuyog Pawar ai4_cand_offsets[e_search_cand_loc] =
4028*c83a76b0SSuyog Pawar (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk);
4029*c83a76b0SSuyog Pawar ai4_cand_offsets[e_search_cand_loc] +=
4030*c83a76b0SSuyog Pawar (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row);
4031*c83a76b0SSuyog Pawar ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected;
4032*c83a76b0SSuyog Pawar
4033*c83a76b0SSuyog Pawar u2_is_offset_available |= (1 << e_search_cand_loc);
4034*c83a76b0SSuyog Pawar }
4035*c83a76b0SSuyog Pawar
4036*c83a76b0SSuyog Pawar ps_mv =
4037*c83a76b0SSuyog Pawar ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4038*c83a76b0SSuyog Pawar pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx +
4039*c83a76b0SSuyog Pawar ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4040*c83a76b0SSuyog Pawar
4041*c83a76b0SSuyog Pawar hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1);
4042*c83a76b0SSuyog Pawar
4043*c83a76b0SSuyog Pawar break;
4044*c83a76b0SSuyog Pawar }
4045*c83a76b0SSuyog Pawar default:
4046*c83a76b0SSuyog Pawar {
4047*c83a76b0SSuyog Pawar /* NoNoNoNoNooooooooNO! */
4048*c83a76b0SSuyog Pawar ASSERT(0);
4049*c83a76b0SSuyog Pawar }
4050*c83a76b0SSuyog Pawar }
4051*c83a76b0SSuyog Pawar
4052*c83a76b0SSuyog Pawar ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0);
4053*c83a76b0SSuyog Pawar ASSERT(
4054*c83a76b0SSuyog Pawar !u1_pred_dir
4055*c83a76b0SSuyog Pawar ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4056*c83a76b0SSuyog Pawar i4_num_act_ref_l0)
4057*c83a76b0SSuyog Pawar : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4058*c83a76b0SSuyog Pawar ps_ctxt->i4_num_act_ref_l1));
4059*c83a76b0SSuyog Pawar }
4060*c83a76b0SSuyog Pawar
4061*c83a76b0SSuyog Pawar return i4_num_srch_cands;
4062*c83a76b0SSuyog Pawar }
4063*c83a76b0SSuyog Pawar
hme_mv_clipper(hme_search_prms_t * ps_search_prms_blk,S32 i4_num_srch_cands,S08 i1_check_for_mult_refs,U08 u1_fpel_refine_extent,U08 u1_hpel_refine_extent,U08 u1_qpel_refine_extent)4064*c83a76b0SSuyog Pawar void hme_mv_clipper(
4065*c83a76b0SSuyog Pawar hme_search_prms_t *ps_search_prms_blk,
4066*c83a76b0SSuyog Pawar S32 i4_num_srch_cands,
4067*c83a76b0SSuyog Pawar S08 i1_check_for_mult_refs,
4068*c83a76b0SSuyog Pawar U08 u1_fpel_refine_extent,
4069*c83a76b0SSuyog Pawar U08 u1_hpel_refine_extent,
4070*c83a76b0SSuyog Pawar U08 u1_qpel_refine_extent)
4071*c83a76b0SSuyog Pawar {
4072*c83a76b0SSuyog Pawar S32 candt;
4073*c83a76b0SSuyog Pawar range_prms_t *ps_range_prms;
4074*c83a76b0SSuyog Pawar
4075*c83a76b0SSuyog Pawar for(candt = 0; candt < i4_num_srch_cands; candt++)
4076*c83a76b0SSuyog Pawar {
4077*c83a76b0SSuyog Pawar search_node_t *ps_search_node;
4078*c83a76b0SSuyog Pawar
4079*c83a76b0SSuyog Pawar ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node;
4080*c83a76b0SSuyog Pawar ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx];
4081*c83a76b0SSuyog Pawar
4082*c83a76b0SSuyog Pawar /* Clip the motion vectors as well here since after clipping
4083*c83a76b0SSuyog Pawar two candidates can become same and they will be removed during deduplication */
4084*c83a76b0SSuyog Pawar CLIP_MV_WITHIN_RANGE(
4085*c83a76b0SSuyog Pawar ps_search_node->ps_mv->i2_mvx,
4086*c83a76b0SSuyog Pawar ps_search_node->ps_mv->i2_mvy,
4087*c83a76b0SSuyog Pawar ps_range_prms,
4088*c83a76b0SSuyog Pawar u1_fpel_refine_extent,
4089*c83a76b0SSuyog Pawar u1_hpel_refine_extent,
4090*c83a76b0SSuyog Pawar u1_qpel_refine_extent);
4091*c83a76b0SSuyog Pawar }
4092*c83a76b0SSuyog Pawar }
4093*c83a76b0SSuyog Pawar
hme_init_pred_buf_info(hme_pred_buf_info_t (* ps_info)[MAX_NUM_INTER_PARTS],hme_pred_buf_mngr_t * ps_buf_mngr,U08 u1_pu1_wd,U08 u1_pu1_ht,PART_TYPE_T e_part_type)4094*c83a76b0SSuyog Pawar void hme_init_pred_buf_info(
4095*c83a76b0SSuyog Pawar hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
4096*c83a76b0SSuyog Pawar hme_pred_buf_mngr_t *ps_buf_mngr,
4097*c83a76b0SSuyog Pawar U08 u1_pu1_wd,
4098*c83a76b0SSuyog Pawar U08 u1_pu1_ht,
4099*c83a76b0SSuyog Pawar PART_TYPE_T e_part_type)
4100*c83a76b0SSuyog Pawar {
4101*c83a76b0SSuyog Pawar U08 u1_pred_buf_array_id;
4102*c83a76b0SSuyog Pawar
4103*c83a76b0SSuyog Pawar if(1 != ihevce_get_free_pred_buf_indices(
4104*c83a76b0SSuyog Pawar &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1))
4105*c83a76b0SSuyog Pawar {
4106*c83a76b0SSuyog Pawar ASSERT(0);
4107*c83a76b0SSuyog Pawar }
4108*c83a76b0SSuyog Pawar else
4109*c83a76b0SSuyog Pawar {
4110*c83a76b0SSuyog Pawar ps_info[0][0].i4_pred_stride = MAX_CU_SIZE;
4111*c83a76b0SSuyog Pawar ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id];
4112*c83a76b0SSuyog Pawar ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id;
4113*c83a76b0SSuyog Pawar
4114*c83a76b0SSuyog Pawar if(PRT_2Nx2N != e_part_type)
4115*c83a76b0SSuyog Pawar {
4116*c83a76b0SSuyog Pawar ps_info[0][1].i4_pred_stride = MAX_CU_SIZE;
4117*c83a76b0SSuyog Pawar ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] +
4118*c83a76b0SSuyog Pawar (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]
4119*c83a76b0SSuyog Pawar ? u1_pu1_ht * ps_info[0][1].i4_pred_stride
4120*c83a76b0SSuyog Pawar : u1_pu1_wd);
4121*c83a76b0SSuyog Pawar ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id;
4122*c83a76b0SSuyog Pawar }
4123*c83a76b0SSuyog Pawar }
4124*c83a76b0SSuyog Pawar }
4125*c83a76b0SSuyog Pawar
hme_debrief_bipred_eval(part_type_results_t * ps_part_type_result,hme_pred_buf_info_t (* ps_pred_buf_info)[MAX_NUM_INTER_PARTS],hme_pred_buf_mngr_t * ps_pred_buf_mngr,U08 * pu1_allocated_pred_buf_array_indixes,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)4126*c83a76b0SSuyog Pawar void hme_debrief_bipred_eval(
4127*c83a76b0SSuyog Pawar part_type_results_t *ps_part_type_result,
4128*c83a76b0SSuyog Pawar hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
4129*c83a76b0SSuyog Pawar hme_pred_buf_mngr_t *ps_pred_buf_mngr,
4130*c83a76b0SSuyog Pawar U08 *pu1_allocated_pred_buf_array_indixes,
4131*c83a76b0SSuyog Pawar ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list
4132*c83a76b0SSuyog Pawar
4133*c83a76b0SSuyog Pawar )
4134*c83a76b0SSuyog Pawar {
4135*c83a76b0SSuyog Pawar PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type;
4136*c83a76b0SSuyog Pawar
4137*c83a76b0SSuyog Pawar U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator;
4138*c83a76b0SSuyog Pawar U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]];
4139*c83a76b0SSuyog Pawar
4140*c83a76b0SSuyog Pawar if(0 == ps_part_type_result->u1_part_type)
4141*c83a76b0SSuyog Pawar {
4142*c83a76b0SSuyog Pawar if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI)
4143*c83a76b0SSuyog Pawar {
4144*c83a76b0SSuyog Pawar ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4145*c83a76b0SSuyog Pawar
4146*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4147*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4148*c83a76b0SSuyog Pawar
4149*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4150*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4151*c83a76b0SSuyog Pawar
4152*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4153*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4154*c83a76b0SSuyog Pawar }
4155*c83a76b0SSuyog Pawar else
4156*c83a76b0SSuyog Pawar {
4157*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4158*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4159*c83a76b0SSuyog Pawar
4160*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4161*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4162*c83a76b0SSuyog Pawar
4163*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4164*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4165*c83a76b0SSuyog Pawar
4166*c83a76b0SSuyog Pawar if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4167*c83a76b0SSuyog Pawar {
4168*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4169*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4170*c83a76b0SSuyog Pawar }
4171*c83a76b0SSuyog Pawar }
4172*c83a76b0SSuyog Pawar }
4173*c83a76b0SSuyog Pawar else
4174*c83a76b0SSuyog Pawar {
4175*c83a76b0SSuyog Pawar U08 *pu1_src_pred;
4176*c83a76b0SSuyog Pawar U08 *pu1_dst_pred;
4177*c83a76b0SSuyog Pawar S32 i4_src_pred_stride;
4178*c83a76b0SSuyog Pawar S32 i4_dst_pred_stride;
4179*c83a76b0SSuyog Pawar
4180*c83a76b0SSuyog Pawar U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2;
4181*c83a76b0SSuyog Pawar U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2;
4182*c83a76b0SSuyog Pawar U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2;
4183*c83a76b0SSuyog Pawar U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2;
4184*c83a76b0SSuyog Pawar
4185*c83a76b0SSuyog Pawar U08 u1_condition_for_switch =
4186*c83a76b0SSuyog Pawar (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) |
4187*c83a76b0SSuyog Pawar ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1);
4188*c83a76b0SSuyog Pawar
4189*c83a76b0SSuyog Pawar switch(u1_condition_for_switch)
4190*c83a76b0SSuyog Pawar {
4191*c83a76b0SSuyog Pawar case 0:
4192*c83a76b0SSuyog Pawar {
4193*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred =
4194*c83a76b0SSuyog Pawar ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]];
4195*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = MAX_CU_SIZE;
4196*c83a76b0SSuyog Pawar
4197*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4198*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4199*c83a76b0SSuyog Pawar
4200*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4201*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4202*c83a76b0SSuyog Pawar
4203*c83a76b0SSuyog Pawar if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4204*c83a76b0SSuyog Pawar {
4205*c83a76b0SSuyog Pawar pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4206*c83a76b0SSuyog Pawar pu1_dst_pred = ps_part_type_result->pu1_pred;
4207*c83a76b0SSuyog Pawar i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4208*c83a76b0SSuyog Pawar i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4209*c83a76b0SSuyog Pawar
4210*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
4211*c83a76b0SSuyog Pawar pu1_dst_pred,
4212*c83a76b0SSuyog Pawar i4_dst_pred_stride,
4213*c83a76b0SSuyog Pawar pu1_src_pred,
4214*c83a76b0SSuyog Pawar i4_src_pred_stride,
4215*c83a76b0SSuyog Pawar u1_pu1_wd,
4216*c83a76b0SSuyog Pawar u1_pu1_ht);
4217*c83a76b0SSuyog Pawar }
4218*c83a76b0SSuyog Pawar
4219*c83a76b0SSuyog Pawar if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id)
4220*c83a76b0SSuyog Pawar {
4221*c83a76b0SSuyog Pawar pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4222*c83a76b0SSuyog Pawar pu1_dst_pred = ps_part_type_result->pu1_pred +
4223*c83a76b0SSuyog Pawar (u1_is_part_vertical
4224*c83a76b0SSuyog Pawar ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4225*c83a76b0SSuyog Pawar : u1_pu1_wd);
4226*c83a76b0SSuyog Pawar i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4227*c83a76b0SSuyog Pawar i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4228*c83a76b0SSuyog Pawar
4229*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
4230*c83a76b0SSuyog Pawar pu1_dst_pred,
4231*c83a76b0SSuyog Pawar i4_dst_pred_stride,
4232*c83a76b0SSuyog Pawar pu1_src_pred,
4233*c83a76b0SSuyog Pawar i4_src_pred_stride,
4234*c83a76b0SSuyog Pawar u1_pu2_wd,
4235*c83a76b0SSuyog Pawar u1_pu2_ht);
4236*c83a76b0SSuyog Pawar }
4237*c83a76b0SSuyog Pawar
4238*c83a76b0SSuyog Pawar break;
4239*c83a76b0SSuyog Pawar }
4240*c83a76b0SSuyog Pawar case 1:
4241*c83a76b0SSuyog Pawar {
4242*c83a76b0SSuyog Pawar ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4243*c83a76b0SSuyog Pawar
4244*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4245*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4246*c83a76b0SSuyog Pawar
4247*c83a76b0SSuyog Pawar /* Copy PU1 pred into PU2's pred buf */
4248*c83a76b0SSuyog Pawar if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) &&
4249*c83a76b0SSuyog Pawar (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id))
4250*c83a76b0SSuyog Pawar {
4251*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred =
4252*c83a76b0SSuyog Pawar ps_pred_buf_info[0][1].pu1_pred -
4253*c83a76b0SSuyog Pawar (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride
4254*c83a76b0SSuyog Pawar : u1_pu1_wd);
4255*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4256*c83a76b0SSuyog Pawar
4257*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4258*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4259*c83a76b0SSuyog Pawar
4260*c83a76b0SSuyog Pawar pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred;
4261*c83a76b0SSuyog Pawar pu1_dst_pred = ps_part_type_result->pu1_pred;
4262*c83a76b0SSuyog Pawar i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4263*c83a76b0SSuyog Pawar i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4264*c83a76b0SSuyog Pawar
4265*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
4266*c83a76b0SSuyog Pawar pu1_dst_pred,
4267*c83a76b0SSuyog Pawar i4_dst_pred_stride,
4268*c83a76b0SSuyog Pawar pu1_src_pred,
4269*c83a76b0SSuyog Pawar i4_src_pred_stride,
4270*c83a76b0SSuyog Pawar u1_pu1_wd,
4271*c83a76b0SSuyog Pawar u1_pu1_ht);
4272*c83a76b0SSuyog Pawar }
4273*c83a76b0SSuyog Pawar else
4274*c83a76b0SSuyog Pawar {
4275*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4276*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4277*c83a76b0SSuyog Pawar
4278*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4279*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4280*c83a76b0SSuyog Pawar
4281*c83a76b0SSuyog Pawar pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4282*c83a76b0SSuyog Pawar pu1_dst_pred = ps_part_type_result->pu1_pred;
4283*c83a76b0SSuyog Pawar i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4284*c83a76b0SSuyog Pawar i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4285*c83a76b0SSuyog Pawar
4286*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
4287*c83a76b0SSuyog Pawar pu1_dst_pred,
4288*c83a76b0SSuyog Pawar i4_dst_pred_stride,
4289*c83a76b0SSuyog Pawar pu1_src_pred,
4290*c83a76b0SSuyog Pawar i4_src_pred_stride,
4291*c83a76b0SSuyog Pawar u1_pu2_wd,
4292*c83a76b0SSuyog Pawar u1_pu2_ht);
4293*c83a76b0SSuyog Pawar }
4294*c83a76b0SSuyog Pawar
4295*c83a76b0SSuyog Pawar break;
4296*c83a76b0SSuyog Pawar }
4297*c83a76b0SSuyog Pawar case 2:
4298*c83a76b0SSuyog Pawar {
4299*c83a76b0SSuyog Pawar ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4300*c83a76b0SSuyog Pawar
4301*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4302*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4303*c83a76b0SSuyog Pawar
4304*c83a76b0SSuyog Pawar /* Copy PU2 pred into PU1's pred buf */
4305*c83a76b0SSuyog Pawar if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) &&
4306*c83a76b0SSuyog Pawar (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id))
4307*c83a76b0SSuyog Pawar {
4308*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4309*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4310*c83a76b0SSuyog Pawar
4311*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4312*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4313*c83a76b0SSuyog Pawar
4314*c83a76b0SSuyog Pawar pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred;
4315*c83a76b0SSuyog Pawar pu1_dst_pred = ps_part_type_result->pu1_pred +
4316*c83a76b0SSuyog Pawar (u1_is_part_vertical
4317*c83a76b0SSuyog Pawar ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4318*c83a76b0SSuyog Pawar : u1_pu1_wd);
4319*c83a76b0SSuyog Pawar i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4320*c83a76b0SSuyog Pawar i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4321*c83a76b0SSuyog Pawar
4322*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
4323*c83a76b0SSuyog Pawar pu1_dst_pred,
4324*c83a76b0SSuyog Pawar i4_dst_pred_stride,
4325*c83a76b0SSuyog Pawar pu1_src_pred,
4326*c83a76b0SSuyog Pawar i4_src_pred_stride,
4327*c83a76b0SSuyog Pawar u1_pu2_wd,
4328*c83a76b0SSuyog Pawar u1_pu2_ht);
4329*c83a76b0SSuyog Pawar }
4330*c83a76b0SSuyog Pawar else
4331*c83a76b0SSuyog Pawar {
4332*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred =
4333*c83a76b0SSuyog Pawar ps_pred_buf_info[2][1].pu1_pred -
4334*c83a76b0SSuyog Pawar (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride
4335*c83a76b0SSuyog Pawar : u1_pu1_wd);
4336*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4337*c83a76b0SSuyog Pawar
4338*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4339*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4340*c83a76b0SSuyog Pawar
4341*c83a76b0SSuyog Pawar pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4342*c83a76b0SSuyog Pawar pu1_dst_pred = ps_part_type_result->pu1_pred;
4343*c83a76b0SSuyog Pawar i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4344*c83a76b0SSuyog Pawar i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4345*c83a76b0SSuyog Pawar
4346*c83a76b0SSuyog Pawar ps_cmn_utils_optimised_function_list->pf_copy_2d(
4347*c83a76b0SSuyog Pawar pu1_dst_pred,
4348*c83a76b0SSuyog Pawar i4_dst_pred_stride,
4349*c83a76b0SSuyog Pawar pu1_src_pred,
4350*c83a76b0SSuyog Pawar i4_src_pred_stride,
4351*c83a76b0SSuyog Pawar u1_pu1_wd,
4352*c83a76b0SSuyog Pawar u1_pu1_ht);
4353*c83a76b0SSuyog Pawar }
4354*c83a76b0SSuyog Pawar
4355*c83a76b0SSuyog Pawar break;
4356*c83a76b0SSuyog Pawar }
4357*c83a76b0SSuyog Pawar case 3:
4358*c83a76b0SSuyog Pawar {
4359*c83a76b0SSuyog Pawar ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4360*c83a76b0SSuyog Pawar ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4361*c83a76b0SSuyog Pawar ASSERT(
4362*c83a76b0SSuyog Pawar ps_pred_buf_info[2][1].u1_pred_buf_array_id ==
4363*c83a76b0SSuyog Pawar ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4364*c83a76b0SSuyog Pawar
4365*c83a76b0SSuyog Pawar ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4366*c83a76b0SSuyog Pawar ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4367*c83a76b0SSuyog Pawar
4368*c83a76b0SSuyog Pawar ihevce_set_pred_buf_as_free(
4369*c83a76b0SSuyog Pawar pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4370*c83a76b0SSuyog Pawar
4371*c83a76b0SSuyog Pawar break;
4372*c83a76b0SSuyog Pawar }
4373*c83a76b0SSuyog Pawar }
4374*c83a76b0SSuyog Pawar }
4375*c83a76b0SSuyog Pawar }
4376*c83a76b0SSuyog Pawar
hme_decide_search_candidate_priority_in_l1_and_l2_me(SEARCH_CANDIDATE_TYPE_T e_cand_type,ME_QUALITY_PRESETS_T e_quality_preset)4377*c83a76b0SSuyog Pawar U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
4378*c83a76b0SSuyog Pawar SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset)
4379*c83a76b0SSuyog Pawar {
4380*c83a76b0SSuyog Pawar U08 u1_priority_val =
4381*c83a76b0SSuyog Pawar gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type];
4382*c83a76b0SSuyog Pawar
4383*c83a76b0SSuyog Pawar if(UCHAR_MAX == u1_priority_val)
4384*c83a76b0SSuyog Pawar {
4385*c83a76b0SSuyog Pawar ASSERT(0);
4386*c83a76b0SSuyog Pawar }
4387*c83a76b0SSuyog Pawar
4388*c83a76b0SSuyog Pawar ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4389*c83a76b0SSuyog Pawar
4390*c83a76b0SSuyog Pawar return u1_priority_val;
4391*c83a76b0SSuyog Pawar }
4392*c83a76b0SSuyog Pawar
hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type,U08 u1_index)4393*c83a76b0SSuyog Pawar U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index)
4394*c83a76b0SSuyog Pawar {
4395*c83a76b0SSuyog Pawar U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type];
4396*c83a76b0SSuyog Pawar
4397*c83a76b0SSuyog Pawar if(UCHAR_MAX == u1_priority_val)
4398*c83a76b0SSuyog Pawar {
4399*c83a76b0SSuyog Pawar ASSERT(0);
4400*c83a76b0SSuyog Pawar }
4401*c83a76b0SSuyog Pawar
4402*c83a76b0SSuyog Pawar ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4403*c83a76b0SSuyog Pawar
4404*c83a76b0SSuyog Pawar return u1_priority_val;
4405*c83a76b0SSuyog Pawar }
4406*c83a76b0SSuyog Pawar
hme_search_cand_data_init(S32 * pi4_id_Z,S32 * pi4_id_coloc,S32 * pi4_num_coloc_cands,U08 * pu1_search_candidate_list_index,S32 i4_num_act_ref_l0,S32 i4_num_act_ref_l1,U08 u1_is_bidir_enabled,U08 u1_4x4_blk_in_l1me)4407*c83a76b0SSuyog Pawar void hme_search_cand_data_init(
4408*c83a76b0SSuyog Pawar S32 *pi4_id_Z,
4409*c83a76b0SSuyog Pawar S32 *pi4_id_coloc,
4410*c83a76b0SSuyog Pawar S32 *pi4_num_coloc_cands,
4411*c83a76b0SSuyog Pawar U08 *pu1_search_candidate_list_index,
4412*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l0,
4413*c83a76b0SSuyog Pawar S32 i4_num_act_ref_l1,
4414*c83a76b0SSuyog Pawar U08 u1_is_bidir_enabled,
4415*c83a76b0SSuyog Pawar U08 u1_4x4_blk_in_l1me)
4416*c83a76b0SSuyog Pawar {
4417*c83a76b0SSuyog Pawar S32 i, j;
4418*c83a76b0SSuyog Pawar S32 i4_num_coloc_cands;
4419*c83a76b0SSuyog Pawar
4420*c83a76b0SSuyog Pawar U08 u1_search_candidate_list_index;
4421*c83a76b0SSuyog Pawar
4422*c83a76b0SSuyog Pawar if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me)
4423*c83a76b0SSuyog Pawar {
4424*c83a76b0SSuyog Pawar S32 i;
4425*c83a76b0SSuyog Pawar
4426*c83a76b0SSuyog Pawar u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2;
4427*c83a76b0SSuyog Pawar i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4428*c83a76b0SSuyog Pawar
4429*c83a76b0SSuyog Pawar switch(i4_num_act_ref_l0)
4430*c83a76b0SSuyog Pawar {
4431*c83a76b0SSuyog Pawar case 1:
4432*c83a76b0SSuyog Pawar {
4433*c83a76b0SSuyog Pawar for(i = 0; i < 2; i++)
4434*c83a76b0SSuyog Pawar {
4435*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4436*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4437*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4438*c83a76b0SSuyog Pawar }
4439*c83a76b0SSuyog Pawar
4440*c83a76b0SSuyog Pawar break;
4441*c83a76b0SSuyog Pawar }
4442*c83a76b0SSuyog Pawar case 2:
4443*c83a76b0SSuyog Pawar {
4444*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
4445*c83a76b0SSuyog Pawar {
4446*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4447*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4448*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4449*c83a76b0SSuyog Pawar }
4450*c83a76b0SSuyog Pawar
4451*c83a76b0SSuyog Pawar break;
4452*c83a76b0SSuyog Pawar }
4453*c83a76b0SSuyog Pawar case 3:
4454*c83a76b0SSuyog Pawar {
4455*c83a76b0SSuyog Pawar for(i = 0; i < 6; i++)
4456*c83a76b0SSuyog Pawar {
4457*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4458*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4459*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4460*c83a76b0SSuyog Pawar }
4461*c83a76b0SSuyog Pawar
4462*c83a76b0SSuyog Pawar break;
4463*c83a76b0SSuyog Pawar }
4464*c83a76b0SSuyog Pawar case 4:
4465*c83a76b0SSuyog Pawar {
4466*c83a76b0SSuyog Pawar for(i = 0; i < 8; i++)
4467*c83a76b0SSuyog Pawar {
4468*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4469*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4470*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4471*c83a76b0SSuyog Pawar }
4472*c83a76b0SSuyog Pawar
4473*c83a76b0SSuyog Pawar break;
4474*c83a76b0SSuyog Pawar }
4475*c83a76b0SSuyog Pawar default:
4476*c83a76b0SSuyog Pawar {
4477*c83a76b0SSuyog Pawar ASSERT(0);
4478*c83a76b0SSuyog Pawar }
4479*c83a76b0SSuyog Pawar }
4480*c83a76b0SSuyog Pawar
4481*c83a76b0SSuyog Pawar *pi4_num_coloc_cands = i4_num_coloc_cands;
4482*c83a76b0SSuyog Pawar *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4483*c83a76b0SSuyog Pawar }
4484*c83a76b0SSuyog Pawar else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me)
4485*c83a76b0SSuyog Pawar {
4486*c83a76b0SSuyog Pawar S32 i;
4487*c83a76b0SSuyog Pawar
4488*c83a76b0SSuyog Pawar i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4489*c83a76b0SSuyog Pawar u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1;
4490*c83a76b0SSuyog Pawar
4491*c83a76b0SSuyog Pawar switch(i4_num_act_ref_l0)
4492*c83a76b0SSuyog Pawar {
4493*c83a76b0SSuyog Pawar case 1:
4494*c83a76b0SSuyog Pawar {
4495*c83a76b0SSuyog Pawar for(i = 0; i < 2; i++)
4496*c83a76b0SSuyog Pawar {
4497*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4498*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4499*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4500*c83a76b0SSuyog Pawar }
4501*c83a76b0SSuyog Pawar
4502*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4503*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4504*c83a76b0SSuyog Pawar
4505*c83a76b0SSuyog Pawar pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4506*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4507*c83a76b0SSuyog Pawar
4508*c83a76b0SSuyog Pawar pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4509*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4510*c83a76b0SSuyog Pawar
4511*c83a76b0SSuyog Pawar i4_num_coloc_cands += 3;
4512*c83a76b0SSuyog Pawar
4513*c83a76b0SSuyog Pawar break;
4514*c83a76b0SSuyog Pawar }
4515*c83a76b0SSuyog Pawar case 2:
4516*c83a76b0SSuyog Pawar {
4517*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
4518*c83a76b0SSuyog Pawar {
4519*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4520*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4521*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4522*c83a76b0SSuyog Pawar }
4523*c83a76b0SSuyog Pawar
4524*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4525*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4526*c83a76b0SSuyog Pawar
4527*c83a76b0SSuyog Pawar pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4528*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4529*c83a76b0SSuyog Pawar
4530*c83a76b0SSuyog Pawar pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4531*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4532*c83a76b0SSuyog Pawar
4533*c83a76b0SSuyog Pawar pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4534*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4535*c83a76b0SSuyog Pawar
4536*c83a76b0SSuyog Pawar pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4537*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4538*c83a76b0SSuyog Pawar
4539*c83a76b0SSuyog Pawar pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4540*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4541*c83a76b0SSuyog Pawar
4542*c83a76b0SSuyog Pawar i4_num_coloc_cands += 6;
4543*c83a76b0SSuyog Pawar
4544*c83a76b0SSuyog Pawar break;
4545*c83a76b0SSuyog Pawar }
4546*c83a76b0SSuyog Pawar case 3:
4547*c83a76b0SSuyog Pawar {
4548*c83a76b0SSuyog Pawar for(i = 0; i < 6; i++)
4549*c83a76b0SSuyog Pawar {
4550*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4551*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4552*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4553*c83a76b0SSuyog Pawar }
4554*c83a76b0SSuyog Pawar
4555*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4556*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4557*c83a76b0SSuyog Pawar
4558*c83a76b0SSuyog Pawar pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4559*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4560*c83a76b0SSuyog Pawar
4561*c83a76b0SSuyog Pawar pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4562*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4563*c83a76b0SSuyog Pawar
4564*c83a76b0SSuyog Pawar pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4565*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4566*c83a76b0SSuyog Pawar
4567*c83a76b0SSuyog Pawar pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4568*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4569*c83a76b0SSuyog Pawar
4570*c83a76b0SSuyog Pawar pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4571*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4572*c83a76b0SSuyog Pawar
4573*c83a76b0SSuyog Pawar i4_num_coloc_cands += 6;
4574*c83a76b0SSuyog Pawar
4575*c83a76b0SSuyog Pawar break;
4576*c83a76b0SSuyog Pawar }
4577*c83a76b0SSuyog Pawar case 4:
4578*c83a76b0SSuyog Pawar {
4579*c83a76b0SSuyog Pawar for(i = 0; i < 8; i++)
4580*c83a76b0SSuyog Pawar {
4581*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4582*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4583*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4584*c83a76b0SSuyog Pawar }
4585*c83a76b0SSuyog Pawar
4586*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4587*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4588*c83a76b0SSuyog Pawar
4589*c83a76b0SSuyog Pawar pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4590*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4591*c83a76b0SSuyog Pawar
4592*c83a76b0SSuyog Pawar pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4593*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4594*c83a76b0SSuyog Pawar
4595*c83a76b0SSuyog Pawar pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4596*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4597*c83a76b0SSuyog Pawar
4598*c83a76b0SSuyog Pawar pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4599*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4600*c83a76b0SSuyog Pawar
4601*c83a76b0SSuyog Pawar pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4602*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4603*c83a76b0SSuyog Pawar
4604*c83a76b0SSuyog Pawar i4_num_coloc_cands += 6;
4605*c83a76b0SSuyog Pawar
4606*c83a76b0SSuyog Pawar break;
4607*c83a76b0SSuyog Pawar }
4608*c83a76b0SSuyog Pawar default:
4609*c83a76b0SSuyog Pawar {
4610*c83a76b0SSuyog Pawar ASSERT(0);
4611*c83a76b0SSuyog Pawar }
4612*c83a76b0SSuyog Pawar }
4613*c83a76b0SSuyog Pawar
4614*c83a76b0SSuyog Pawar *pi4_num_coloc_cands = i4_num_coloc_cands;
4615*c83a76b0SSuyog Pawar *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4616*c83a76b0SSuyog Pawar }
4617*c83a76b0SSuyog Pawar else
4618*c83a76b0SSuyog Pawar {
4619*c83a76b0SSuyog Pawar /* The variable 'u1_search_candidate_list_index' is hardcoded */
4620*c83a76b0SSuyog Pawar /* to 10 and 11 respectively. But, these values are not returned */
4621*c83a76b0SSuyog Pawar /* by this function since the actual values are dependent on */
4622*c83a76b0SSuyog Pawar /* the number of refs in L0 and L1 respectively */
4623*c83a76b0SSuyog Pawar /* Hence, the actual return values are being recomputed */
4624*c83a76b0SSuyog Pawar /* in the latter part of this block */
4625*c83a76b0SSuyog Pawar
4626*c83a76b0SSuyog Pawar if(!u1_4x4_blk_in_l1me)
4627*c83a76b0SSuyog Pawar {
4628*c83a76b0SSuyog Pawar u1_search_candidate_list_index = 10;
4629*c83a76b0SSuyog Pawar
4630*c83a76b0SSuyog Pawar i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4631*c83a76b0SSuyog Pawar
4632*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_coloc_cands; i++)
4633*c83a76b0SSuyog Pawar {
4634*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4635*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4636*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4637*c83a76b0SSuyog Pawar }
4638*c83a76b0SSuyog Pawar }
4639*c83a76b0SSuyog Pawar else
4640*c83a76b0SSuyog Pawar {
4641*c83a76b0SSuyog Pawar u1_search_candidate_list_index = 11;
4642*c83a76b0SSuyog Pawar
4643*c83a76b0SSuyog Pawar i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4644*c83a76b0SSuyog Pawar
4645*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_coloc_cands; i++)
4646*c83a76b0SSuyog Pawar {
4647*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4648*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4649*c83a76b0SSuyog Pawar u1_search_candidate_list_index);
4650*c83a76b0SSuyog Pawar }
4651*c83a76b0SSuyog Pawar
4652*c83a76b0SSuyog Pawar pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4653*c83a76b0SSuyog Pawar PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4654*c83a76b0SSuyog Pawar
4655*c83a76b0SSuyog Pawar pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4656*c83a76b0SSuyog Pawar PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4657*c83a76b0SSuyog Pawar
4658*c83a76b0SSuyog Pawar pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4659*c83a76b0SSuyog Pawar PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4660*c83a76b0SSuyog Pawar }
4661*c83a76b0SSuyog Pawar
4662*c83a76b0SSuyog Pawar for(j = 0; j < 2; j++)
4663*c83a76b0SSuyog Pawar {
4664*c83a76b0SSuyog Pawar if(0 == j)
4665*c83a76b0SSuyog Pawar {
4666*c83a76b0SSuyog Pawar pu1_search_candidate_list_index[j] =
4667*c83a76b0SSuyog Pawar 8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me;
4668*c83a76b0SSuyog Pawar pi4_num_coloc_cands[j] =
4669*c83a76b0SSuyog Pawar (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2);
4670*c83a76b0SSuyog Pawar }
4671*c83a76b0SSuyog Pawar else
4672*c83a76b0SSuyog Pawar {
4673*c83a76b0SSuyog Pawar pu1_search_candidate_list_index[j] =
4674*c83a76b0SSuyog Pawar 8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me;
4675*c83a76b0SSuyog Pawar pi4_num_coloc_cands[j] =
4676*c83a76b0SSuyog Pawar (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2);
4677*c83a76b0SSuyog Pawar }
4678*c83a76b0SSuyog Pawar }
4679*c83a76b0SSuyog Pawar }
4680*c83a76b0SSuyog Pawar
4681*c83a76b0SSuyog Pawar if(i4_num_act_ref_l0 || i4_num_act_ref_l1)
4682*c83a76b0SSuyog Pawar {
4683*c83a76b0SSuyog Pawar pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me(
4684*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]);
4685*c83a76b0SSuyog Pawar }
4686*c83a76b0SSuyog Pawar
4687*c83a76b0SSuyog Pawar if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled)
4688*c83a76b0SSuyog Pawar {
4689*c83a76b0SSuyog Pawar pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me(
4690*c83a76b0SSuyog Pawar (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]);
4691*c83a76b0SSuyog Pawar }
4692*c83a76b0SSuyog Pawar }
4693*c83a76b0SSuyog Pawar
4694*c83a76b0SSuyog Pawar static U08
hme_determine_base_block_size(S32 * pi4_valid_part_array,S32 i4_num_valid_parts,U08 u1_cu_size)4695*c83a76b0SSuyog Pawar hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size)
4696*c83a76b0SSuyog Pawar {
4697*c83a76b0SSuyog Pawar ASSERT(i4_num_valid_parts > 0);
4698*c83a76b0SSuyog Pawar
4699*c83a76b0SSuyog Pawar if(1 == i4_num_valid_parts)
4700*c83a76b0SSuyog Pawar {
4701*c83a76b0SSuyog Pawar ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N);
4702*c83a76b0SSuyog Pawar
4703*c83a76b0SSuyog Pawar return u1_cu_size;
4704*c83a76b0SSuyog Pawar }
4705*c83a76b0SSuyog Pawar else
4706*c83a76b0SSuyog Pawar {
4707*c83a76b0SSuyog Pawar if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR)
4708*c83a76b0SSuyog Pawar {
4709*c83a76b0SSuyog Pawar return u1_cu_size / 2;
4710*c83a76b0SSuyog Pawar }
4711*c83a76b0SSuyog Pawar else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R)
4712*c83a76b0SSuyog Pawar {
4713*c83a76b0SSuyog Pawar return u1_cu_size / 4;
4714*c83a76b0SSuyog Pawar }
4715*c83a76b0SSuyog Pawar }
4716*c83a76b0SSuyog Pawar
4717*c83a76b0SSuyog Pawar return u1_cu_size / 4;
4718*c83a76b0SSuyog Pawar }
4719*c83a76b0SSuyog Pawar
hme_compute_variance_of_pu_from_base_blocks(ULWORD64 * pu8_SigmaX,ULWORD64 * pu8_SigmaXSquared,U08 u1_cu_size,U08 u1_base_block_size,S32 i4_part_id)4720*c83a76b0SSuyog Pawar static U32 hme_compute_variance_of_pu_from_base_blocks(
4721*c83a76b0SSuyog Pawar ULWORD64 *pu8_SigmaX,
4722*c83a76b0SSuyog Pawar ULWORD64 *pu8_SigmaXSquared,
4723*c83a76b0SSuyog Pawar U08 u1_cu_size,
4724*c83a76b0SSuyog Pawar U08 u1_base_block_size,
4725*c83a76b0SSuyog Pawar S32 i4_part_id)
4726*c83a76b0SSuyog Pawar {
4727*c83a76b0SSuyog Pawar U08 i, j;
4728*c83a76b0SSuyog Pawar ULWORD64 u8_final_variance;
4729*c83a76b0SSuyog Pawar
4730*c83a76b0SSuyog Pawar U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4731*c83a76b0SSuyog Pawar S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4732*c83a76b0SSuyog Pawar S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4733*c83a76b0SSuyog Pawar U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4734*c83a76b0SSuyog Pawar U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4735*c83a76b0SSuyog Pawar U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4736*c83a76b0SSuyog Pawar U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4737*c83a76b0SSuyog Pawar U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4738*c83a76b0SSuyog Pawar ULWORD64 u8_final_SigmaXSquared = 0;
4739*c83a76b0SSuyog Pawar ULWORD64 u8_final_SigmaX = 0;
4740*c83a76b0SSuyog Pawar
4741*c83a76b0SSuyog Pawar if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4742*c83a76b0SSuyog Pawar {
4743*c83a76b0SSuyog Pawar U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4744*c83a76b0SSuyog Pawar ? (gai1_is_part_vertical[i4_part_id]
4745*c83a76b0SSuyog Pawar ? 0
4746*c83a76b0SSuyog Pawar : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4747*c83a76b0SSuyog Pawar : 0;
4748*c83a76b0SSuyog Pawar U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4749*c83a76b0SSuyog Pawar ? (gai1_is_part_vertical[i4_part_id]
4750*c83a76b0SSuyog Pawar ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4751*c83a76b0SSuyog Pawar : 0)
4752*c83a76b0SSuyog Pawar : 0;
4753*c83a76b0SSuyog Pawar U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4754*c83a76b0SSuyog Pawar U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4755*c83a76b0SSuyog Pawar
4756*c83a76b0SSuyog Pawar for(i = u1_row_start_index; i < u1_row_end_index; i++)
4757*c83a76b0SSuyog Pawar {
4758*c83a76b0SSuyog Pawar for(j = u1_column_start_index; j < u1_column_end_index; j++)
4759*c83a76b0SSuyog Pawar {
4760*c83a76b0SSuyog Pawar u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4761*c83a76b0SSuyog Pawar u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4762*c83a76b0SSuyog Pawar }
4763*c83a76b0SSuyog Pawar }
4764*c83a76b0SSuyog Pawar
4765*c83a76b0SSuyog Pawar u8_final_variance =
4766*c83a76b0SSuyog Pawar u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4767*c83a76b0SSuyog Pawar u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4768*c83a76b0SSuyog Pawar u8_final_variance +=
4769*c83a76b0SSuyog Pawar ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4770*c83a76b0SSuyog Pawar (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4771*c83a76b0SSuyog Pawar u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4772*c83a76b0SSuyog Pawar (u1_num_base_blocks * u4_num_pixels_in_base_block);
4773*c83a76b0SSuyog Pawar
4774*c83a76b0SSuyog Pawar ASSERT(u8_final_variance <= UINT_MAX);
4775*c83a76b0SSuyog Pawar }
4776*c83a76b0SSuyog Pawar else
4777*c83a76b0SSuyog Pawar {
4778*c83a76b0SSuyog Pawar U08 u1_row_start_index;
4779*c83a76b0SSuyog Pawar U08 u1_column_start_index;
4780*c83a76b0SSuyog Pawar U08 u1_row_end_index;
4781*c83a76b0SSuyog Pawar U08 u1_column_end_index;
4782*c83a76b0SSuyog Pawar
4783*c83a76b0SSuyog Pawar switch(gau1_part_id_to_part_num[i4_part_id])
4784*c83a76b0SSuyog Pawar {
4785*c83a76b0SSuyog Pawar case 0:
4786*c83a76b0SSuyog Pawar {
4787*c83a76b0SSuyog Pawar u1_row_start_index = 0;
4788*c83a76b0SSuyog Pawar u1_column_start_index = 0;
4789*c83a76b0SSuyog Pawar
4790*c83a76b0SSuyog Pawar break;
4791*c83a76b0SSuyog Pawar }
4792*c83a76b0SSuyog Pawar case 1:
4793*c83a76b0SSuyog Pawar {
4794*c83a76b0SSuyog Pawar u1_row_start_index = 0;
4795*c83a76b0SSuyog Pawar u1_column_start_index = u1_num_base_blocks_in_pu_row;
4796*c83a76b0SSuyog Pawar
4797*c83a76b0SSuyog Pawar break;
4798*c83a76b0SSuyog Pawar }
4799*c83a76b0SSuyog Pawar case 2:
4800*c83a76b0SSuyog Pawar {
4801*c83a76b0SSuyog Pawar u1_row_start_index = u1_num_base_blocks_in_pu_column;
4802*c83a76b0SSuyog Pawar u1_column_start_index = 0;
4803*c83a76b0SSuyog Pawar
4804*c83a76b0SSuyog Pawar break;
4805*c83a76b0SSuyog Pawar }
4806*c83a76b0SSuyog Pawar case 3:
4807*c83a76b0SSuyog Pawar {
4808*c83a76b0SSuyog Pawar u1_row_start_index = u1_num_base_blocks_in_pu_column;
4809*c83a76b0SSuyog Pawar u1_column_start_index = u1_num_base_blocks_in_pu_row;
4810*c83a76b0SSuyog Pawar
4811*c83a76b0SSuyog Pawar break;
4812*c83a76b0SSuyog Pawar }
4813*c83a76b0SSuyog Pawar }
4814*c83a76b0SSuyog Pawar
4815*c83a76b0SSuyog Pawar u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4816*c83a76b0SSuyog Pawar u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4817*c83a76b0SSuyog Pawar
4818*c83a76b0SSuyog Pawar for(i = u1_row_start_index; i < u1_row_end_index; i++)
4819*c83a76b0SSuyog Pawar {
4820*c83a76b0SSuyog Pawar for(j = u1_column_start_index; j < u1_column_end_index; j++)
4821*c83a76b0SSuyog Pawar {
4822*c83a76b0SSuyog Pawar u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4823*c83a76b0SSuyog Pawar u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4824*c83a76b0SSuyog Pawar }
4825*c83a76b0SSuyog Pawar }
4826*c83a76b0SSuyog Pawar
4827*c83a76b0SSuyog Pawar u8_final_variance =
4828*c83a76b0SSuyog Pawar u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4829*c83a76b0SSuyog Pawar u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4830*c83a76b0SSuyog Pawar u8_final_variance +=
4831*c83a76b0SSuyog Pawar ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4832*c83a76b0SSuyog Pawar (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4833*c83a76b0SSuyog Pawar u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4834*c83a76b0SSuyog Pawar (u1_num_base_blocks * u4_num_pixels_in_base_block);
4835*c83a76b0SSuyog Pawar
4836*c83a76b0SSuyog Pawar ASSERT(u8_final_variance <= UINT_MAX);
4837*c83a76b0SSuyog Pawar }
4838*c83a76b0SSuyog Pawar
4839*c83a76b0SSuyog Pawar return u8_final_variance;
4840*c83a76b0SSuyog Pawar }
4841*c83a76b0SSuyog Pawar
hme_compute_variance_for_all_parts(U08 * pu1_data,S32 i4_data_stride,S32 * pi4_valid_part_array,U32 * pu4_variance,S32 i4_num_valid_parts,U08 u1_cu_size)4842*c83a76b0SSuyog Pawar void hme_compute_variance_for_all_parts(
4843*c83a76b0SSuyog Pawar U08 *pu1_data,
4844*c83a76b0SSuyog Pawar S32 i4_data_stride,
4845*c83a76b0SSuyog Pawar S32 *pi4_valid_part_array,
4846*c83a76b0SSuyog Pawar U32 *pu4_variance,
4847*c83a76b0SSuyog Pawar S32 i4_num_valid_parts,
4848*c83a76b0SSuyog Pawar U08 u1_cu_size)
4849*c83a76b0SSuyog Pawar {
4850*c83a76b0SSuyog Pawar ULWORD64 au8_SigmaX[16];
4851*c83a76b0SSuyog Pawar ULWORD64 au8_SigmaXSquared[16];
4852*c83a76b0SSuyog Pawar U08 i, j, k, l;
4853*c83a76b0SSuyog Pawar U08 u1_base_block_size;
4854*c83a76b0SSuyog Pawar U08 u1_num_base_blocks_in_cu_row;
4855*c83a76b0SSuyog Pawar U08 u1_num_base_blocks_in_cu_column;
4856*c83a76b0SSuyog Pawar
4857*c83a76b0SSuyog Pawar u1_base_block_size =
4858*c83a76b0SSuyog Pawar hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
4859*c83a76b0SSuyog Pawar
4860*c83a76b0SSuyog Pawar u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column =
4861*c83a76b0SSuyog Pawar u1_cu_size / u1_base_block_size;
4862*c83a76b0SSuyog Pawar
4863*c83a76b0SSuyog Pawar ASSERT(u1_num_base_blocks_in_cu_row <= 4);
4864*c83a76b0SSuyog Pawar
4865*c83a76b0SSuyog Pawar for(i = 0; i < u1_num_base_blocks_in_cu_column; i++)
4866*c83a76b0SSuyog Pawar {
4867*c83a76b0SSuyog Pawar for(j = 0; j < u1_num_base_blocks_in_cu_row; j++)
4868*c83a76b0SSuyog Pawar {
4869*c83a76b0SSuyog Pawar U08 *pu1_buf =
4870*c83a76b0SSuyog Pawar pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride);
4871*c83a76b0SSuyog Pawar
4872*c83a76b0SSuyog Pawar au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0;
4873*c83a76b0SSuyog Pawar au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0;
4874*c83a76b0SSuyog Pawar
4875*c83a76b0SSuyog Pawar for(k = 0; k < u1_base_block_size; k++)
4876*c83a76b0SSuyog Pawar {
4877*c83a76b0SSuyog Pawar for(l = 0; l < u1_base_block_size; l++)
4878*c83a76b0SSuyog Pawar {
4879*c83a76b0SSuyog Pawar au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] +=
4880*c83a76b0SSuyog Pawar pu1_buf[l + k * i4_data_stride];
4881*c83a76b0SSuyog Pawar au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] +=
4882*c83a76b0SSuyog Pawar pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride];
4883*c83a76b0SSuyog Pawar }
4884*c83a76b0SSuyog Pawar }
4885*c83a76b0SSuyog Pawar }
4886*c83a76b0SSuyog Pawar }
4887*c83a76b0SSuyog Pawar
4888*c83a76b0SSuyog Pawar for(i = 0; i < i4_num_valid_parts; i++)
4889*c83a76b0SSuyog Pawar {
4890*c83a76b0SSuyog Pawar pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks(
4891*c83a76b0SSuyog Pawar au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]);
4892*c83a76b0SSuyog Pawar }
4893*c83a76b0SSuyog Pawar }
4894*c83a76b0SSuyog Pawar
hme_compute_final_sigma_of_pu_from_base_blocks(U32 * pu4_SigmaX,U32 * pu4_SigmaXSquared,ULWORD64 * pu8_final_sigmaX,ULWORD64 * pu8_final_sigmaX_Squared,U08 u1_cu_size,U08 u1_base_block_size,S32 i4_part_id,U08 u1_base_blk_array_stride)4895*c83a76b0SSuyog Pawar void hme_compute_final_sigma_of_pu_from_base_blocks(
4896*c83a76b0SSuyog Pawar U32 *pu4_SigmaX,
4897*c83a76b0SSuyog Pawar U32 *pu4_SigmaXSquared,
4898*c83a76b0SSuyog Pawar ULWORD64 *pu8_final_sigmaX,
4899*c83a76b0SSuyog Pawar ULWORD64 *pu8_final_sigmaX_Squared,
4900*c83a76b0SSuyog Pawar U08 u1_cu_size,
4901*c83a76b0SSuyog Pawar U08 u1_base_block_size,
4902*c83a76b0SSuyog Pawar S32 i4_part_id,
4903*c83a76b0SSuyog Pawar U08 u1_base_blk_array_stride)
4904*c83a76b0SSuyog Pawar {
4905*c83a76b0SSuyog Pawar U08 i, j;
4906*c83a76b0SSuyog Pawar //U08 u1_num_base_blocks_in_cu_row;
4907*c83a76b0SSuyog Pawar
4908*c83a76b0SSuyog Pawar U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4909*c83a76b0SSuyog Pawar S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4910*c83a76b0SSuyog Pawar S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4911*c83a76b0SSuyog Pawar U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4912*c83a76b0SSuyog Pawar U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4913*c83a76b0SSuyog Pawar U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4914*c83a76b0SSuyog Pawar U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4915*c83a76b0SSuyog Pawar U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block);
4916*c83a76b0SSuyog Pawar
4917*c83a76b0SSuyog Pawar /*if (u1_is_for_src)
4918*c83a76b0SSuyog Pawar {
4919*c83a76b0SSuyog Pawar u1_num_base_blocks_in_cu_row = 16;
4920*c83a76b0SSuyog Pawar }
4921*c83a76b0SSuyog Pawar else
4922*c83a76b0SSuyog Pawar {
4923*c83a76b0SSuyog Pawar u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4924*c83a76b0SSuyog Pawar }*/
4925*c83a76b0SSuyog Pawar
4926*c83a76b0SSuyog Pawar pu8_final_sigmaX[i4_part_id] = 0;
4927*c83a76b0SSuyog Pawar pu8_final_sigmaX_Squared[i4_part_id] = 0;
4928*c83a76b0SSuyog Pawar
4929*c83a76b0SSuyog Pawar if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4930*c83a76b0SSuyog Pawar {
4931*c83a76b0SSuyog Pawar U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4932*c83a76b0SSuyog Pawar ? (gai1_is_part_vertical[i4_part_id]
4933*c83a76b0SSuyog Pawar ? 0
4934*c83a76b0SSuyog Pawar : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4935*c83a76b0SSuyog Pawar : 0;
4936*c83a76b0SSuyog Pawar U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4937*c83a76b0SSuyog Pawar ? (gai1_is_part_vertical[i4_part_id]
4938*c83a76b0SSuyog Pawar ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4939*c83a76b0SSuyog Pawar : 0)
4940*c83a76b0SSuyog Pawar : 0;
4941*c83a76b0SSuyog Pawar U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4942*c83a76b0SSuyog Pawar U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4943*c83a76b0SSuyog Pawar
4944*c83a76b0SSuyog Pawar for(i = u1_row_start_index; i < u1_row_end_index; i++)
4945*c83a76b0SSuyog Pawar {
4946*c83a76b0SSuyog Pawar for(j = u1_column_start_index; j < u1_column_end_index; j++)
4947*c83a76b0SSuyog Pawar {
4948*c83a76b0SSuyog Pawar pu8_final_sigmaX_Squared[i4_part_id] +=
4949*c83a76b0SSuyog Pawar pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
4950*c83a76b0SSuyog Pawar pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
4951*c83a76b0SSuyog Pawar }
4952*c83a76b0SSuyog Pawar }
4953*c83a76b0SSuyog Pawar }
4954*c83a76b0SSuyog Pawar else
4955*c83a76b0SSuyog Pawar {
4956*c83a76b0SSuyog Pawar U08 u1_row_start_index;
4957*c83a76b0SSuyog Pawar U08 u1_column_start_index;
4958*c83a76b0SSuyog Pawar U08 u1_row_end_index;
4959*c83a76b0SSuyog Pawar U08 u1_column_end_index;
4960*c83a76b0SSuyog Pawar
4961*c83a76b0SSuyog Pawar switch(gau1_part_id_to_part_num[i4_part_id])
4962*c83a76b0SSuyog Pawar {
4963*c83a76b0SSuyog Pawar case 0:
4964*c83a76b0SSuyog Pawar {
4965*c83a76b0SSuyog Pawar u1_row_start_index = 0;
4966*c83a76b0SSuyog Pawar u1_column_start_index = 0;
4967*c83a76b0SSuyog Pawar
4968*c83a76b0SSuyog Pawar break;
4969*c83a76b0SSuyog Pawar }
4970*c83a76b0SSuyog Pawar case 1:
4971*c83a76b0SSuyog Pawar {
4972*c83a76b0SSuyog Pawar u1_row_start_index = 0;
4973*c83a76b0SSuyog Pawar u1_column_start_index = u1_num_base_blocks_in_pu_row;
4974*c83a76b0SSuyog Pawar
4975*c83a76b0SSuyog Pawar break;
4976*c83a76b0SSuyog Pawar }
4977*c83a76b0SSuyog Pawar case 2:
4978*c83a76b0SSuyog Pawar {
4979*c83a76b0SSuyog Pawar u1_row_start_index = u1_num_base_blocks_in_pu_column;
4980*c83a76b0SSuyog Pawar u1_column_start_index = 0;
4981*c83a76b0SSuyog Pawar
4982*c83a76b0SSuyog Pawar break;
4983*c83a76b0SSuyog Pawar }
4984*c83a76b0SSuyog Pawar case 3:
4985*c83a76b0SSuyog Pawar {
4986*c83a76b0SSuyog Pawar u1_row_start_index = u1_num_base_blocks_in_pu_column;
4987*c83a76b0SSuyog Pawar u1_column_start_index = u1_num_base_blocks_in_pu_row;
4988*c83a76b0SSuyog Pawar
4989*c83a76b0SSuyog Pawar break;
4990*c83a76b0SSuyog Pawar }
4991*c83a76b0SSuyog Pawar }
4992*c83a76b0SSuyog Pawar
4993*c83a76b0SSuyog Pawar u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4994*c83a76b0SSuyog Pawar u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4995*c83a76b0SSuyog Pawar
4996*c83a76b0SSuyog Pawar for(i = u1_row_start_index; i < u1_row_end_index; i++)
4997*c83a76b0SSuyog Pawar {
4998*c83a76b0SSuyog Pawar for(j = u1_column_start_index; j < u1_column_end_index; j++)
4999*c83a76b0SSuyog Pawar {
5000*c83a76b0SSuyog Pawar pu8_final_sigmaX_Squared[i4_part_id] +=
5001*c83a76b0SSuyog Pawar pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
5002*c83a76b0SSuyog Pawar pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
5003*c83a76b0SSuyog Pawar }
5004*c83a76b0SSuyog Pawar }
5005*c83a76b0SSuyog Pawar }
5006*c83a76b0SSuyog Pawar
5007*c83a76b0SSuyog Pawar pu8_final_sigmaX_Squared[i4_part_id] *= u4_N;
5008*c83a76b0SSuyog Pawar }
5009*c83a76b0SSuyog Pawar
hme_compute_stim_injected_distortion_for_all_parts(U08 * pu1_pred,S32 i4_pred_stride,S32 * pi4_valid_part_array,ULWORD64 * pu8_src_sigmaX,ULWORD64 * pu8_src_sigmaXSquared,S32 * pi4_sad_array,S32 i4_alpha_stim_multiplier,S32 i4_inv_wt,S32 i4_inv_wt_shift_val,S32 i4_num_valid_parts,S32 i4_wpred_log_wdc,U08 u1_cu_size)5010*c83a76b0SSuyog Pawar void hme_compute_stim_injected_distortion_for_all_parts(
5011*c83a76b0SSuyog Pawar U08 *pu1_pred,
5012*c83a76b0SSuyog Pawar S32 i4_pred_stride,
5013*c83a76b0SSuyog Pawar S32 *pi4_valid_part_array,
5014*c83a76b0SSuyog Pawar ULWORD64 *pu8_src_sigmaX,
5015*c83a76b0SSuyog Pawar ULWORD64 *pu8_src_sigmaXSquared,
5016*c83a76b0SSuyog Pawar S32 *pi4_sad_array,
5017*c83a76b0SSuyog Pawar S32 i4_alpha_stim_multiplier,
5018*c83a76b0SSuyog Pawar S32 i4_inv_wt,
5019*c83a76b0SSuyog Pawar S32 i4_inv_wt_shift_val,
5020*c83a76b0SSuyog Pawar S32 i4_num_valid_parts,
5021*c83a76b0SSuyog Pawar S32 i4_wpred_log_wdc,
5022*c83a76b0SSuyog Pawar U08 u1_cu_size)
5023*c83a76b0SSuyog Pawar {
5024*c83a76b0SSuyog Pawar U32 au4_sigmaX[16], au4_sigmaXSquared[16];
5025*c83a76b0SSuyog Pawar ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17];
5026*c83a76b0SSuyog Pawar S32 i4_noise_term;
5027*c83a76b0SSuyog Pawar U16 i2_count;
5028*c83a76b0SSuyog Pawar
5029*c83a76b0SSuyog Pawar ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist;
5030*c83a76b0SSuyog Pawar ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var;
5031*c83a76b0SSuyog Pawar
5032*c83a76b0SSuyog Pawar U08 u1_base_block_size;
5033*c83a76b0SSuyog Pawar
5034*c83a76b0SSuyog Pawar WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
5035*c83a76b0SSuyog Pawar
5036*c83a76b0SSuyog Pawar u1_base_block_size =
5037*c83a76b0SSuyog Pawar hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
5038*c83a76b0SSuyog Pawar
5039*c83a76b0SSuyog Pawar ASSERT(u1_cu_size >= 16);
5040*c83a76b0SSuyog Pawar
5041*c83a76b0SSuyog Pawar hme_compute_sigmaX_and_sigmaXSquared(
5042*c83a76b0SSuyog Pawar pu1_pred,
5043*c83a76b0SSuyog Pawar i4_pred_stride,
5044*c83a76b0SSuyog Pawar au4_sigmaX,
5045*c83a76b0SSuyog Pawar au4_sigmaXSquared,
5046*c83a76b0SSuyog Pawar u1_base_block_size,
5047*c83a76b0SSuyog Pawar u1_base_block_size,
5048*c83a76b0SSuyog Pawar u1_cu_size,
5049*c83a76b0SSuyog Pawar u1_cu_size,
5050*c83a76b0SSuyog Pawar 1,
5051*c83a76b0SSuyog Pawar u1_cu_size / u1_base_block_size);
5052*c83a76b0SSuyog Pawar
5053*c83a76b0SSuyog Pawar /* Noise Term Computation */
5054*c83a76b0SSuyog Pawar for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++)
5055*c83a76b0SSuyog Pawar {
5056*c83a76b0SSuyog Pawar unsigned long u4_shift_val;
5057*c83a76b0SSuyog Pawar S32 i4_bits_req;
5058*c83a76b0SSuyog Pawar S32 part_id = pi4_valid_part_array[i2_count];
5059*c83a76b0SSuyog Pawar
5060*c83a76b0SSuyog Pawar if(i4_alpha_stim_multiplier)
5061*c83a76b0SSuyog Pawar {
5062*c83a76b0SSuyog Pawar /* Final SigmaX and SigmaX-Squared Calculation */
5063*c83a76b0SSuyog Pawar hme_compute_final_sigma_of_pu_from_base_blocks(
5064*c83a76b0SSuyog Pawar au4_sigmaX,
5065*c83a76b0SSuyog Pawar au4_sigmaXSquared,
5066*c83a76b0SSuyog Pawar au8_final_ref_sigmaX,
5067*c83a76b0SSuyog Pawar au8_final_ref_sigmaXSquared,
5068*c83a76b0SSuyog Pawar u1_cu_size,
5069*c83a76b0SSuyog Pawar u1_base_block_size,
5070*c83a76b0SSuyog Pawar part_id,
5071*c83a76b0SSuyog Pawar (u1_cu_size / u1_base_block_size));
5072*c83a76b0SSuyog Pawar
5073*c83a76b0SSuyog Pawar u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
5074*c83a76b0SSuyog Pawar u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
5075*c83a76b0SSuyog Pawar
5076*c83a76b0SSuyog Pawar u4_shift_val = ihevce_calc_stim_injected_variance(
5077*c83a76b0SSuyog Pawar pu8_src_sigmaX,
5078*c83a76b0SSuyog Pawar pu8_src_sigmaXSquared,
5079*c83a76b0SSuyog Pawar &u8_src_var,
5080*c83a76b0SSuyog Pawar i4_inv_wt,
5081*c83a76b0SSuyog Pawar i4_inv_wt_shift_val,
5082*c83a76b0SSuyog Pawar i4_wpred_log_wdc,
5083*c83a76b0SSuyog Pawar part_id);
5084*c83a76b0SSuyog Pawar
5085*c83a76b0SSuyog Pawar u8_ref_var = u8_ref_var >> u4_shift_val;
5086*c83a76b0SSuyog Pawar
5087*c83a76b0SSuyog Pawar GETRANGE64(i4_bits_req, u8_ref_var);
5088*c83a76b0SSuyog Pawar
5089*c83a76b0SSuyog Pawar if(i4_bits_req > 27)
5090*c83a76b0SSuyog Pawar {
5091*c83a76b0SSuyog Pawar u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
5092*c83a76b0SSuyog Pawar u8_src_var = u8_src_var >> (i4_bits_req - 27);
5093*c83a76b0SSuyog Pawar }
5094*c83a76b0SSuyog Pawar
5095*c83a76b0SSuyog Pawar if(u8_src_var == u8_ref_var)
5096*c83a76b0SSuyog Pawar {
5097*c83a76b0SSuyog Pawar u8_temp_var = (1 << STIM_Q_FORMAT);
5098*c83a76b0SSuyog Pawar }
5099*c83a76b0SSuyog Pawar else
5100*c83a76b0SSuyog Pawar {
5101*c83a76b0SSuyog Pawar u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT));
5102*c83a76b0SSuyog Pawar u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
5103*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
5104*c83a76b0SSuyog Pawar u8_temp_var = (u8_temp_var / u8_temp_var1);
5105*c83a76b0SSuyog Pawar u8_temp_var = (2 * u8_temp_var);
5106*c83a76b0SSuyog Pawar }
5107*c83a76b0SSuyog Pawar
5108*c83a76b0SSuyog Pawar i4_noise_term = (UWORD32)u8_temp_var;
5109*c83a76b0SSuyog Pawar
5110*c83a76b0SSuyog Pawar ASSERT(i4_noise_term >= 0);
5111*c83a76b0SSuyog Pawar
5112*c83a76b0SSuyog Pawar i4_noise_term *= i4_alpha_stim_multiplier;
5113*c83a76b0SSuyog Pawar }
5114*c83a76b0SSuyog Pawar else
5115*c83a76b0SSuyog Pawar {
5116*c83a76b0SSuyog Pawar i4_noise_term = 0;
5117*c83a76b0SSuyog Pawar }
5118*c83a76b0SSuyog Pawar
5119*c83a76b0SSuyog Pawar u8_pure_dist = pi4_sad_array[part_id];
5120*c83a76b0SSuyog Pawar u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
5121*c83a76b0SSuyog Pawar u8_pure_dist += (1 << ((i4_q_level)-1));
5122*c83a76b0SSuyog Pawar pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level));
5123*c83a76b0SSuyog Pawar }
5124*c83a76b0SSuyog Pawar }
5125*c83a76b0SSuyog Pawar
hme_compute_sigmaX_and_sigmaXSquared(U08 * pu1_data,S32 i4_buf_stride,void * pv_sigmaX,void * pv_sigmaXSquared,U08 u1_base_blk_wd,U08 u1_base_blk_ht,U08 u1_blk_wd,U08 u1_blk_ht,U08 u1_is_sigma_pointer_size_32_bit,U08 u1_array_stride)5126*c83a76b0SSuyog Pawar void hme_compute_sigmaX_and_sigmaXSquared(
5127*c83a76b0SSuyog Pawar U08 *pu1_data,
5128*c83a76b0SSuyog Pawar S32 i4_buf_stride,
5129*c83a76b0SSuyog Pawar void *pv_sigmaX,
5130*c83a76b0SSuyog Pawar void *pv_sigmaXSquared,
5131*c83a76b0SSuyog Pawar U08 u1_base_blk_wd,
5132*c83a76b0SSuyog Pawar U08 u1_base_blk_ht,
5133*c83a76b0SSuyog Pawar U08 u1_blk_wd,
5134*c83a76b0SSuyog Pawar U08 u1_blk_ht,
5135*c83a76b0SSuyog Pawar U08 u1_is_sigma_pointer_size_32_bit,
5136*c83a76b0SSuyog Pawar U08 u1_array_stride)
5137*c83a76b0SSuyog Pawar {
5138*c83a76b0SSuyog Pawar U08 i, j, k, l;
5139*c83a76b0SSuyog Pawar U08 u1_num_base_blks_in_row;
5140*c83a76b0SSuyog Pawar U08 u1_num_base_blks_in_column;
5141*c83a76b0SSuyog Pawar
5142*c83a76b0SSuyog Pawar u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd;
5143*c83a76b0SSuyog Pawar u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht;
5144*c83a76b0SSuyog Pawar
5145*c83a76b0SSuyog Pawar if(u1_is_sigma_pointer_size_32_bit)
5146*c83a76b0SSuyog Pawar {
5147*c83a76b0SSuyog Pawar U32 *sigmaX, *sigmaXSquared;
5148*c83a76b0SSuyog Pawar
5149*c83a76b0SSuyog Pawar sigmaX = (U32 *)pv_sigmaX;
5150*c83a76b0SSuyog Pawar sigmaXSquared = (U32 *)pv_sigmaXSquared;
5151*c83a76b0SSuyog Pawar
5152*c83a76b0SSuyog Pawar /* Loop to compute the sigma_X and sigma_X_Squared */
5153*c83a76b0SSuyog Pawar for(i = 0; i < u1_num_base_blks_in_column; i++)
5154*c83a76b0SSuyog Pawar {
5155*c83a76b0SSuyog Pawar for(j = 0; j < u1_num_base_blks_in_row; j++)
5156*c83a76b0SSuyog Pawar {
5157*c83a76b0SSuyog Pawar U32 u4_sigmaX = 0, u4_sigmaXSquared = 0;
5158*c83a76b0SSuyog Pawar U08 *pu1_buf =
5159*c83a76b0SSuyog Pawar pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5160*c83a76b0SSuyog Pawar
5161*c83a76b0SSuyog Pawar for(k = 0; k < u1_base_blk_ht; k++)
5162*c83a76b0SSuyog Pawar {
5163*c83a76b0SSuyog Pawar for(l = 0; l < u1_base_blk_wd; l++)
5164*c83a76b0SSuyog Pawar {
5165*c83a76b0SSuyog Pawar u4_sigmaX += pu1_buf[l + k * i4_buf_stride];
5166*c83a76b0SSuyog Pawar u4_sigmaXSquared +=
5167*c83a76b0SSuyog Pawar (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5168*c83a76b0SSuyog Pawar }
5169*c83a76b0SSuyog Pawar }
5170*c83a76b0SSuyog Pawar
5171*c83a76b0SSuyog Pawar sigmaX[j + i * u1_array_stride] = u4_sigmaX;
5172*c83a76b0SSuyog Pawar sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared;
5173*c83a76b0SSuyog Pawar }
5174*c83a76b0SSuyog Pawar }
5175*c83a76b0SSuyog Pawar }
5176*c83a76b0SSuyog Pawar else
5177*c83a76b0SSuyog Pawar {
5178*c83a76b0SSuyog Pawar ULWORD64 *sigmaX, *sigmaXSquared;
5179*c83a76b0SSuyog Pawar
5180*c83a76b0SSuyog Pawar sigmaX = (ULWORD64 *)pv_sigmaX;
5181*c83a76b0SSuyog Pawar sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared;
5182*c83a76b0SSuyog Pawar
5183*c83a76b0SSuyog Pawar /* Loop to compute the sigma_X and sigma_X_Squared */
5184*c83a76b0SSuyog Pawar for(i = 0; i < u1_num_base_blks_in_column; i++)
5185*c83a76b0SSuyog Pawar {
5186*c83a76b0SSuyog Pawar for(j = 0; j < u1_num_base_blks_in_row; j++)
5187*c83a76b0SSuyog Pawar {
5188*c83a76b0SSuyog Pawar ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0;
5189*c83a76b0SSuyog Pawar U08 *pu1_buf =
5190*c83a76b0SSuyog Pawar pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5191*c83a76b0SSuyog Pawar
5192*c83a76b0SSuyog Pawar for(k = 0; k < u1_base_blk_ht; k++)
5193*c83a76b0SSuyog Pawar {
5194*c83a76b0SSuyog Pawar for(l = 0; l < u1_base_blk_wd; l++)
5195*c83a76b0SSuyog Pawar {
5196*c83a76b0SSuyog Pawar u8_sigmaX += pu1_buf[l + k * i4_buf_stride];
5197*c83a76b0SSuyog Pawar u8_sigmaXSquared +=
5198*c83a76b0SSuyog Pawar (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5199*c83a76b0SSuyog Pawar }
5200*c83a76b0SSuyog Pawar }
5201*c83a76b0SSuyog Pawar
5202*c83a76b0SSuyog Pawar u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht;
5203*c83a76b0SSuyog Pawar
5204*c83a76b0SSuyog Pawar sigmaX[j + i * u1_array_stride] = u8_sigmaX;
5205*c83a76b0SSuyog Pawar sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared;
5206*c83a76b0SSuyog Pawar }
5207*c83a76b0SSuyog Pawar }
5208*c83a76b0SSuyog Pawar }
5209*c83a76b0SSuyog Pawar }
5210*c83a76b0SSuyog Pawar
5211*c83a76b0SSuyog Pawar #if TEMPORAL_NOISE_DETECT
ihevce_16x16block_temporal_noise_detect(WORD32 had_block_size,WORD32 ctb_width,WORD32 ctb_height,ihevce_ctb_noise_params * ps_ctb_noise_params,fpel_srch_cand_init_data_t * s_proj_srch_cand_init_data,hme_search_prms_t * s_search_prms_blk,me_frm_ctxt_t * ps_ctxt,WORD32 num_pred_dir,WORD32 i4_num_act_ref_l0,WORD32 i4_num_act_ref_l1,WORD32 i4_cu_x_off,WORD32 i4_cu_y_off,wgt_pred_ctxt_t * ps_wt_inp_prms,WORD32 input_stride,WORD32 index_8x8_block,WORD32 num_horz_blocks,WORD32 num_8x8_in_ctb_row,WORD32 i4_16x16_index)5212*c83a76b0SSuyog Pawar WORD32 ihevce_16x16block_temporal_noise_detect(
5213*c83a76b0SSuyog Pawar WORD32 had_block_size,
5214*c83a76b0SSuyog Pawar WORD32 ctb_width,
5215*c83a76b0SSuyog Pawar WORD32 ctb_height,
5216*c83a76b0SSuyog Pawar ihevce_ctb_noise_params *ps_ctb_noise_params,
5217*c83a76b0SSuyog Pawar fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
5218*c83a76b0SSuyog Pawar hme_search_prms_t *s_search_prms_blk,
5219*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt,
5220*c83a76b0SSuyog Pawar WORD32 num_pred_dir,
5221*c83a76b0SSuyog Pawar WORD32 i4_num_act_ref_l0,
5222*c83a76b0SSuyog Pawar WORD32 i4_num_act_ref_l1,
5223*c83a76b0SSuyog Pawar WORD32 i4_cu_x_off,
5224*c83a76b0SSuyog Pawar WORD32 i4_cu_y_off,
5225*c83a76b0SSuyog Pawar wgt_pred_ctxt_t *ps_wt_inp_prms,
5226*c83a76b0SSuyog Pawar WORD32 input_stride,
5227*c83a76b0SSuyog Pawar WORD32 index_8x8_block,
5228*c83a76b0SSuyog Pawar WORD32 num_horz_blocks,
5229*c83a76b0SSuyog Pawar WORD32 num_8x8_in_ctb_row,
5230*c83a76b0SSuyog Pawar WORD32 i4_16x16_index)
5231*c83a76b0SSuyog Pawar {
5232*c83a76b0SSuyog Pawar WORD32 i;
5233*c83a76b0SSuyog Pawar WORD32 noise_detected;
5234*c83a76b0SSuyog Pawar
5235*c83a76b0SSuyog Pawar UWORD8 *pu1_l0_block;
5236*c83a76b0SSuyog Pawar UWORD8 *pu1_l1_block;
5237*c83a76b0SSuyog Pawar
5238*c83a76b0SSuyog Pawar WORD32 mean;
5239*c83a76b0SSuyog Pawar UWORD32 variance_8x8;
5240*c83a76b0SSuyog Pawar
5241*c83a76b0SSuyog Pawar /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
5242*c83a76b0SSuyog Pawar WORD16 pi2_residue_16x16[256];
5243*c83a76b0SSuyog Pawar WORD32 mean_16x16;
5244*c83a76b0SSuyog Pawar UWORD32 variance_16x16[2];
5245*c83a76b0SSuyog Pawar
5246*c83a76b0SSuyog Pawar /* throw errors in case of un- supported arguments */
5247*c83a76b0SSuyog Pawar /* assumptions size is 8 or 16 or 32 */
5248*c83a76b0SSuyog Pawar assert(
5249*c83a76b0SSuyog Pawar (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32)); //ihevc_assert
5250*c83a76b0SSuyog Pawar
5251*c83a76b0SSuyog Pawar /* initialize the variables */
5252*c83a76b0SSuyog Pawar noise_detected = 0;
5253*c83a76b0SSuyog Pawar variance_8x8 = 0;
5254*c83a76b0SSuyog Pawar
5255*c83a76b0SSuyog Pawar mean = 0;
5256*c83a76b0SSuyog Pawar
5257*c83a76b0SSuyog Pawar {
5258*c83a76b0SSuyog Pawar i = 0;
5259*c83a76b0SSuyog Pawar /* get the ref/pred and source using the MV of both directions */
5260*c83a76b0SSuyog Pawar /* pick the best candidates in each direction */
5261*c83a76b0SSuyog Pawar /* Colocated cands */
5262*c83a76b0SSuyog Pawar {
5263*c83a76b0SSuyog Pawar // steps to be done
5264*c83a76b0SSuyog Pawar /* pick the candidates */
5265*c83a76b0SSuyog Pawar /* do motion compoensation using the candidates got from prev step : pick from the offset */
5266*c83a76b0SSuyog Pawar /* get the ref or the pred from the offset*/
5267*c83a76b0SSuyog Pawar /* get the source data */
5268*c83a76b0SSuyog Pawar /* send the pred - source to noise detect */
5269*c83a76b0SSuyog Pawar /* do noise detect on the residue of source and pred */
5270*c83a76b0SSuyog Pawar
5271*c83a76b0SSuyog Pawar layer_mv_t *ps_layer_mvbank;
5272*c83a76b0SSuyog Pawar hme_mv_t *ps_mv;
5273*c83a76b0SSuyog Pawar
5274*c83a76b0SSuyog Pawar //S32 i;
5275*c83a76b0SSuyog Pawar S32 wd_c, ht_c, wd_p, ht_p;
5276*c83a76b0SSuyog Pawar S32 blksize_p, blk_x, blk_y, i4_offset;
5277*c83a76b0SSuyog Pawar S08 *pi1_ref_idx;
5278*c83a76b0SSuyog Pawar fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data;
5279*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer;
5280*c83a76b0SSuyog Pawar layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer;
5281*c83a76b0SSuyog Pawar err_prms_t s_err_prms;
5282*c83a76b0SSuyog Pawar S32 i4_blk_wd;
5283*c83a76b0SSuyog Pawar S32 i4_blk_ht;
5284*c83a76b0SSuyog Pawar BLK_SIZE_T e_blk_size;
5285*c83a76b0SSuyog Pawar hme_search_prms_t *ps_search_prms;
5286*c83a76b0SSuyog Pawar S32 i4_part_mask;
5287*c83a76b0SSuyog Pawar S32 *pi4_valid_part_ids;
5288*c83a76b0SSuyog Pawar
5289*c83a76b0SSuyog Pawar /* has list of valid partition to search terminated by -1 */
5290*c83a76b0SSuyog Pawar S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
5291*c83a76b0SSuyog Pawar
5292*c83a76b0SSuyog Pawar /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/
5293*c83a76b0SSuyog Pawar
5294*c83a76b0SSuyog Pawar S32 i4_pos_x;
5295*c83a76b0SSuyog Pawar S32 i4_pos_y;
5296*c83a76b0SSuyog Pawar U08 u1_pred_dir; // = ps_ctxt_2->u1_pred_dir;
5297*c83a76b0SSuyog Pawar U08 u1_default_ref_id = 0; //ps_ctxt_2->u1_default_ref_id;
5298*c83a76b0SSuyog Pawar S32 i4_inp_off, i4_ref_offset, i4_ref_stride;
5299*c83a76b0SSuyog Pawar
5300*c83a76b0SSuyog Pawar /* The reference is actually an array of ptrs since there are several */
5301*c83a76b0SSuyog Pawar /* reference id. So an array gets passed form calling function */
5302*c83a76b0SSuyog Pawar U08 **ppu1_ref;
5303*c83a76b0SSuyog Pawar
5304*c83a76b0SSuyog Pawar /* Atributes of input candidates */
5305*c83a76b0SSuyog Pawar search_node_t as_search_node[2];
5306*c83a76b0SSuyog Pawar wgt_pred_ctxt_t *ps_wt_inp_prms;
5307*c83a76b0SSuyog Pawar
5308*c83a76b0SSuyog Pawar S32 posx;
5309*c83a76b0SSuyog Pawar S32 posy;
5310*c83a76b0SSuyog Pawar S32 i4_num_results_to_proj;
5311*c83a76b0SSuyog Pawar S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
5312*c83a76b0SSuyog Pawar S32 i4_inp_stride;
5313*c83a76b0SSuyog Pawar
5314*c83a76b0SSuyog Pawar /* intialize variables */
5315*c83a76b0SSuyog Pawar /* Width and ht of current and prev layers */
5316*c83a76b0SSuyog Pawar wd_c = ps_curr_layer->i4_wd;
5317*c83a76b0SSuyog Pawar ht_c = ps_curr_layer->i4_ht;
5318*c83a76b0SSuyog Pawar wd_p = ps_coarse_layer->i4_wd;
5319*c83a76b0SSuyog Pawar ht_p = ps_coarse_layer->i4_ht;
5320*c83a76b0SSuyog Pawar
5321*c83a76b0SSuyog Pawar ps_search_prms = s_search_prms_blk;
5322*c83a76b0SSuyog Pawar
5323*c83a76b0SSuyog Pawar ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
5324*c83a76b0SSuyog Pawar e_blk_size = ps_search_prms->e_blk_size;
5325*c83a76b0SSuyog Pawar i4_part_mask = ps_search_prms->i4_part_mask;
5326*c83a76b0SSuyog Pawar
5327*c83a76b0SSuyog Pawar i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
5328*c83a76b0SSuyog Pawar i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
5329*c83a76b0SSuyog Pawar
5330*c83a76b0SSuyog Pawar ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
5331*c83a76b0SSuyog Pawar blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
5332*c83a76b0SSuyog Pawar
5333*c83a76b0SSuyog Pawar /* ASSERT for valid sizes */
5334*c83a76b0SSuyog Pawar ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
5335*c83a76b0SSuyog Pawar
5336*c83a76b0SSuyog Pawar i4_pos_x = i4_cu_x_off;
5337*c83a76b0SSuyog Pawar i4_pos_y = i4_cu_y_off;
5338*c83a76b0SSuyog Pawar posx = i4_pos_x + 2;
5339*c83a76b0SSuyog Pawar posy = i4_pos_y + 2;
5340*c83a76b0SSuyog Pawar
5341*c83a76b0SSuyog Pawar i4_inp_stride = ps_search_prms->i4_inp_stride;
5342*c83a76b0SSuyog Pawar /* Move to the location of the search blk in inp buffer */
5343*c83a76b0SSuyog Pawar //i4_inp_off = i4_cu_x_off;
5344*c83a76b0SSuyog Pawar //i4_inp_off += i4_cu_y_off * i4_inp_stride;
5345*c83a76b0SSuyog Pawar i4_inp_off = (i4_16x16_index % 4) * 16;
5346*c83a76b0SSuyog Pawar i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride;
5347*c83a76b0SSuyog Pawar
5348*c83a76b0SSuyog Pawar /***********pick the candidates**************************************/
5349*c83a76b0SSuyog Pawar for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++)
5350*c83a76b0SSuyog Pawar {
5351*c83a76b0SSuyog Pawar WORD32 actual_pred_dir = 0;
5352*c83a76b0SSuyog Pawar
5353*c83a76b0SSuyog Pawar if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0)
5354*c83a76b0SSuyog Pawar {
5355*c83a76b0SSuyog Pawar actual_pred_dir = 1;
5356*c83a76b0SSuyog Pawar }
5357*c83a76b0SSuyog Pawar else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0)
5358*c83a76b0SSuyog Pawar {
5359*c83a76b0SSuyog Pawar actual_pred_dir = 0;
5360*c83a76b0SSuyog Pawar }
5361*c83a76b0SSuyog Pawar else if(u1_pred_dir == 1)
5362*c83a76b0SSuyog Pawar {
5363*c83a76b0SSuyog Pawar actual_pred_dir = 1;
5364*c83a76b0SSuyog Pawar }
5365*c83a76b0SSuyog Pawar
5366*c83a76b0SSuyog Pawar i4_num_results_to_proj = 1; // only the best proj
5367*c83a76b0SSuyog Pawar
5368*c83a76b0SSuyog Pawar /* Safety check to avoid uninitialized access across temporal layers */
5369*c83a76b0SSuyog Pawar posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */
5370*c83a76b0SSuyog Pawar posy = CLIP3(posy, 0, (ht_c - blksize_p));
5371*c83a76b0SSuyog Pawar
5372*c83a76b0SSuyog Pawar /* Project the positions to prev layer */
5373*c83a76b0SSuyog Pawar blk_x = posx >> blksize_p;
5374*c83a76b0SSuyog Pawar blk_y = posy >> blksize_p;
5375*c83a76b0SSuyog Pawar
5376*c83a76b0SSuyog Pawar /* Pick up the mvs from the location */
5377*c83a76b0SSuyog Pawar i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
5378*c83a76b0SSuyog Pawar i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
5379*c83a76b0SSuyog Pawar
5380*c83a76b0SSuyog Pawar ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
5381*c83a76b0SSuyog Pawar pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
5382*c83a76b0SSuyog Pawar
5383*c83a76b0SSuyog Pawar if(actual_pred_dir == 1)
5384*c83a76b0SSuyog Pawar {
5385*c83a76b0SSuyog Pawar ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5386*c83a76b0SSuyog Pawar pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5387*c83a76b0SSuyog Pawar }
5388*c83a76b0SSuyog Pawar
5389*c83a76b0SSuyog Pawar {
5390*c83a76b0SSuyog Pawar as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1;
5391*c83a76b0SSuyog Pawar as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1;
5392*c83a76b0SSuyog Pawar as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0];
5393*c83a76b0SSuyog Pawar
5394*c83a76b0SSuyog Pawar if((as_search_node[actual_pred_dir].i1_ref_idx < 0) ||
5395*c83a76b0SSuyog Pawar (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV))
5396*c83a76b0SSuyog Pawar {
5397*c83a76b0SSuyog Pawar as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id;
5398*c83a76b0SSuyog Pawar as_search_node[actual_pred_dir].s_mv.i2_mvx = 0;
5399*c83a76b0SSuyog Pawar as_search_node[actual_pred_dir].s_mv.i2_mvy = 0;
5400*c83a76b0SSuyog Pawar }
5401*c83a76b0SSuyog Pawar }
5402*c83a76b0SSuyog Pawar
5403*c83a76b0SSuyog Pawar /********************************************************************************************/
5404*c83a76b0SSuyog Pawar {
5405*c83a76b0SSuyog Pawar /* declare the variables */
5406*c83a76b0SSuyog Pawar //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
5407*c83a76b0SSuyog Pawar
5408*c83a76b0SSuyog Pawar pi4_valid_part_ids = ai4_valid_part_ids;
5409*c83a76b0SSuyog Pawar i4_ref_stride = ps_curr_layer->i4_rec_stride;
5410*c83a76b0SSuyog Pawar s_err_prms.i4_inp_stride = i4_inp_stride;
5411*c83a76b0SSuyog Pawar s_err_prms.i4_ref_stride = i4_ref_stride;
5412*c83a76b0SSuyog Pawar s_err_prms.i4_part_mask = i4_part_mask;
5413*c83a76b0SSuyog Pawar s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
5414*c83a76b0SSuyog Pawar s_err_prms.i4_blk_wd = i4_blk_wd;
5415*c83a76b0SSuyog Pawar s_err_prms.i4_blk_ht = i4_blk_ht;
5416*c83a76b0SSuyog Pawar s_err_prms.i4_step = 1;
5417*c83a76b0SSuyog Pawar s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
5418*c83a76b0SSuyog Pawar //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
5419*c83a76b0SSuyog Pawar
5420*c83a76b0SSuyog Pawar /*************************************************************************/
5421*c83a76b0SSuyog Pawar /* Depending on flag i4_use_rec, we use either input of previously */
5422*c83a76b0SSuyog Pawar /* encoded pictures or we use recon of previously encoded pictures. */
5423*c83a76b0SSuyog Pawar i4_ref_stride = ps_curr_layer->i4_rec_stride;
5424*c83a76b0SSuyog Pawar ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy; // pointer to the pred
5425*c83a76b0SSuyog Pawar
5426*c83a76b0SSuyog Pawar i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off; //i4_x_off;
5427*c83a76b0SSuyog Pawar
5428*c83a76b0SSuyog Pawar s_err_prms.pu1_ref =
5429*c83a76b0SSuyog Pawar ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset;
5430*c83a76b0SSuyog Pawar s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx;
5431*c83a76b0SSuyog Pawar s_err_prms.pu1_ref +=
5432*c83a76b0SSuyog Pawar as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride;
5433*c83a76b0SSuyog Pawar
5434*c83a76b0SSuyog Pawar /*get the source */
5435*c83a76b0SSuyog Pawar s_err_prms.pu1_inp =
5436*c83a76b0SSuyog Pawar ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] +
5437*c83a76b0SSuyog Pawar i4_inp_off; //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off;
5438*c83a76b0SSuyog Pawar
5439*c83a76b0SSuyog Pawar /* send the pred - source to noise detect */
5440*c83a76b0SSuyog Pawar // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref);
5441*c83a76b0SSuyog Pawar }
5442*c83a76b0SSuyog Pawar /* change the l0/l1 blcok pointer names accrodingle */
5443*c83a76b0SSuyog Pawar
5444*c83a76b0SSuyog Pawar /* get memory pointers the input and the reference */
5445*c83a76b0SSuyog Pawar pu1_l0_block = s_err_prms.pu1_inp;
5446*c83a76b0SSuyog Pawar pu1_l1_block = s_err_prms.pu1_ref;
5447*c83a76b0SSuyog Pawar
5448*c83a76b0SSuyog Pawar {
5449*c83a76b0SSuyog Pawar WORD32 i2, j2;
5450*c83a76b0SSuyog Pawar WORD32 dim = 16;
5451*c83a76b0SSuyog Pawar UWORD8 *buf1;
5452*c83a76b0SSuyog Pawar UWORD8 *buf2;
5453*c83a76b0SSuyog Pawar for(i2 = 0; i2 < dim; i2++)
5454*c83a76b0SSuyog Pawar {
5455*c83a76b0SSuyog Pawar buf1 = pu1_l0_block + i2 * i4_inp_stride;
5456*c83a76b0SSuyog Pawar buf2 = pu1_l1_block + i2 * i4_ref_stride;
5457*c83a76b0SSuyog Pawar
5458*c83a76b0SSuyog Pawar for(j2 = 0; j2 < dim; j2++)
5459*c83a76b0SSuyog Pawar {
5460*c83a76b0SSuyog Pawar pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]);
5461*c83a76b0SSuyog Pawar }
5462*c83a76b0SSuyog Pawar }
5463*c83a76b0SSuyog Pawar
5464*c83a76b0SSuyog Pawar ihevce_calc_variance_signed(
5465*c83a76b0SSuyog Pawar pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16);
5466*c83a76b0SSuyog Pawar
5467*c83a76b0SSuyog Pawar /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */
5468*c83a76b0SSuyog Pawar if(variance_16x16[u1_pred_dir] >
5469*c83a76b0SSuyog Pawar ((TEMPORAL_VARIANCE_FACTOR *
5470*c83a76b0SSuyog Pawar ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >>
5471*c83a76b0SSuyog Pawar Q_TEMPORAL_VARIANCE_FACTOR))
5472*c83a76b0SSuyog Pawar {
5473*c83a76b0SSuyog Pawar /* update noisy block count only if all best MV in diff directions indicates noise */
5474*c83a76b0SSuyog Pawar if(u1_pred_dir == num_pred_dir - 1)
5475*c83a76b0SSuyog Pawar {
5476*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1;
5477*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1;
5478*c83a76b0SSuyog Pawar ps_ctb_noise_params
5479*c83a76b0SSuyog Pawar ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1;
5480*c83a76b0SSuyog Pawar ps_ctb_noise_params
5481*c83a76b0SSuyog Pawar ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1;
5482*c83a76b0SSuyog Pawar noise_detected = 1;
5483*c83a76b0SSuyog Pawar }
5484*c83a76b0SSuyog Pawar }
5485*c83a76b0SSuyog Pawar else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/
5486*c83a76b0SSuyog Pawar {
5487*c83a76b0SSuyog Pawar noise_detected = 0;
5488*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0;
5489*c83a76b0SSuyog Pawar ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0;
5490*c83a76b0SSuyog Pawar ps_ctb_noise_params
5491*c83a76b0SSuyog Pawar ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0;
5492*c83a76b0SSuyog Pawar ps_ctb_noise_params
5493*c83a76b0SSuyog Pawar ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0;
5494*c83a76b0SSuyog Pawar break;
5495*c83a76b0SSuyog Pawar }
5496*c83a76b0SSuyog Pawar } // variance analysis and calculation
5497*c83a76b0SSuyog Pawar } // for each direction
5498*c83a76b0SSuyog Pawar } // HME code
5499*c83a76b0SSuyog Pawar
5500*c83a76b0SSuyog Pawar } // for each 16x16 block
5501*c83a76b0SSuyog Pawar
5502*c83a76b0SSuyog Pawar return (noise_detected);
5503*c83a76b0SSuyog Pawar }
5504*c83a76b0SSuyog Pawar #endif
5505*c83a76b0SSuyog Pawar
hme_qpel_interp_avg_1pt(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,S32 i4_buf_id,U08 ** ppu1_final,S32 * pi4_final_stride)5506*c83a76b0SSuyog Pawar void hme_qpel_interp_avg_1pt(
5507*c83a76b0SSuyog Pawar interp_prms_t *ps_prms,
5508*c83a76b0SSuyog Pawar S32 i4_mv_x,
5509*c83a76b0SSuyog Pawar S32 i4_mv_y,
5510*c83a76b0SSuyog Pawar S32 i4_buf_id,
5511*c83a76b0SSuyog Pawar U08 **ppu1_final,
5512*c83a76b0SSuyog Pawar S32 *pi4_final_stride)
5513*c83a76b0SSuyog Pawar {
5514*c83a76b0SSuyog Pawar U08 *pu1_src1, *pu1_src2, *pu1_dst;
5515*c83a76b0SSuyog Pawar qpel_input_buf_cfg_t *ps_inp_cfg;
5516*c83a76b0SSuyog Pawar S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
5517*c83a76b0SSuyog Pawar
5518*c83a76b0SSuyog Pawar /*************************************************************************/
5519*c83a76b0SSuyog Pawar /* For a given QPEL pt, we need to determine the 2 source pts that are */
5520*c83a76b0SSuyog Pawar /* needed to do the QPEL averaging. The logic to do this is as follows */
5521*c83a76b0SSuyog Pawar /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are */
5522*c83a76b0SSuyog Pawar /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0 */
5523*c83a76b0SSuyog Pawar /* pt of th reference blk that is colocated to the inp blk. */
5524*c83a76b0SSuyog Pawar /* A j E k B */
5525*c83a76b0SSuyog Pawar /* l m n o p */
5526*c83a76b0SSuyog Pawar /* F q G r H */
5527*c83a76b0SSuyog Pawar /* s t u v w */
5528*c83a76b0SSuyog Pawar /* C x I y D */
5529*c83a76b0SSuyog Pawar /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/
5530*c83a76b0SSuyog Pawar /* and (1,1) respectively in the fpel buffer (id = 0) */
5531*c83a76b0SSuyog Pawar /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf */
5532*c83a76b0SSuyog Pawar /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf */
5533*c83a76b0SSuyog Pawar /* G is hxhy pt in offset 0,0 in hxhy buf */
5534*c83a76b0SSuyog Pawar /* All above offsets are computed w.r.t. motion displaced pt in */
5535*c83a76b0SSuyog Pawar /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and */
5536*c83a76b0SSuyog Pawar /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G */
5537*c83a76b0SSuyog Pawar /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3 */
5538*c83a76b0SSuyog Pawar /* If we consider pt v to be derived. v has a fractional comp of 3, 3 */
5539*c83a76b0SSuyog Pawar /* v is avg of H and I. So the table look up of v should give following */
5540*c83a76b0SSuyog Pawar /* buf 1 (H) : offset = (1, 0) buf id = 2. */
5541*c83a76b0SSuyog Pawar /* buf 2 (I) : offset = 0 , 1) buf id = 1. */
5542*c83a76b0SSuyog Pawar /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1. */
5543*c83a76b0SSuyog Pawar /*************************************************************************/
5544*c83a76b0SSuyog Pawar i4_mv_x_frac = i4_mv_x & 3;
5545*c83a76b0SSuyog Pawar i4_mv_y_frac = i4_mv_y & 3;
5546*c83a76b0SSuyog Pawar
5547*c83a76b0SSuyog Pawar i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride;
5548*c83a76b0SSuyog Pawar
5549*c83a76b0SSuyog Pawar /* Derive the descriptor that has all offset and size info */
5550*c83a76b0SSuyog Pawar ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
5551*c83a76b0SSuyog Pawar
5552*c83a76b0SSuyog Pawar pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
5553*c83a76b0SSuyog Pawar pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
5554*c83a76b0SSuyog Pawar pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
5555*c83a76b0SSuyog Pawar
5556*c83a76b0SSuyog Pawar pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
5557*c83a76b0SSuyog Pawar pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
5558*c83a76b0SSuyog Pawar pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride);
5559*c83a76b0SSuyog Pawar
5560*c83a76b0SSuyog Pawar pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
5561*c83a76b0SSuyog Pawar hevc_avg_2d(
5562*c83a76b0SSuyog Pawar pu1_src1,
5563*c83a76b0SSuyog Pawar pu1_src2,
5564*c83a76b0SSuyog Pawar ps_prms->i4_ref_stride,
5565*c83a76b0SSuyog Pawar ps_prms->i4_ref_stride,
5566*c83a76b0SSuyog Pawar ps_prms->i4_blk_wd,
5567*c83a76b0SSuyog Pawar ps_prms->i4_blk_ht,
5568*c83a76b0SSuyog Pawar pu1_dst,
5569*c83a76b0SSuyog Pawar ps_prms->i4_out_stride);
5570*c83a76b0SSuyog Pawar ppu1_final[i4_buf_id] = pu1_dst;
5571*c83a76b0SSuyog Pawar pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride;
5572*c83a76b0SSuyog Pawar }
5573*c83a76b0SSuyog Pawar
hme_qpel_interp_avg_2pt_vert_with_reuse(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,U08 ** ppu1_final,S32 * pi4_final_stride)5574*c83a76b0SSuyog Pawar void hme_qpel_interp_avg_2pt_vert_with_reuse(
5575*c83a76b0SSuyog Pawar interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5576*c83a76b0SSuyog Pawar {
5577*c83a76b0SSuyog Pawar hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
5578*c83a76b0SSuyog Pawar
5579*c83a76b0SSuyog Pawar hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
5580*c83a76b0SSuyog Pawar }
5581*c83a76b0SSuyog Pawar
hme_qpel_interp_avg_2pt_horz_with_reuse(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,U08 ** ppu1_final,S32 * pi4_final_stride)5582*c83a76b0SSuyog Pawar void hme_qpel_interp_avg_2pt_horz_with_reuse(
5583*c83a76b0SSuyog Pawar interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5584*c83a76b0SSuyog Pawar {
5585*c83a76b0SSuyog Pawar hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
5586*c83a76b0SSuyog Pawar
5587*c83a76b0SSuyog Pawar hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
5588*c83a76b0SSuyog Pawar }
5589*c83a76b0SSuyog Pawar
hme_set_mv_limit_using_dvsr_data(me_frm_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,range_prms_t * ps_mv_limit,S16 * pi2_prev_enc_frm_max_mv_y,U08 u1_num_act_ref_pics)5590*c83a76b0SSuyog Pawar void hme_set_mv_limit_using_dvsr_data(
5591*c83a76b0SSuyog Pawar me_frm_ctxt_t *ps_ctxt,
5592*c83a76b0SSuyog Pawar layer_ctxt_t *ps_curr_layer,
5593*c83a76b0SSuyog Pawar range_prms_t *ps_mv_limit,
5594*c83a76b0SSuyog Pawar S16 *pi2_prev_enc_frm_max_mv_y,
5595*c83a76b0SSuyog Pawar U08 u1_num_act_ref_pics)
5596*c83a76b0SSuyog Pawar {
5597*c83a76b0SSuyog Pawar WORD32 ref_ctr;
5598*c83a76b0SSuyog Pawar
5599*c83a76b0SSuyog Pawar /* Only for B/b pic. */
5600*c83a76b0SSuyog Pawar if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
5601*c83a76b0SSuyog Pawar {
5602*c83a76b0SSuyog Pawar WORD16 i2_mv_y_per_poc, i2_max_mv_y;
5603*c83a76b0SSuyog Pawar WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff;
5604*c83a76b0SSuyog Pawar WORD32 prev_poc_count = 0;
5605*c83a76b0SSuyog Pawar WORD32 i4_p_idx;
5606*c83a76b0SSuyog Pawar
5607*c83a76b0SSuyog Pawar pi2_prev_enc_frm_max_mv_y[0] = 0;
5608*c83a76b0SSuyog Pawar
5609*c83a76b0SSuyog Pawar cur_poc = ps_ctxt->i4_curr_poc;
5610*c83a76b0SSuyog Pawar
5611*c83a76b0SSuyog Pawar i4_p_idx = 0;
5612*c83a76b0SSuyog Pawar
5613*c83a76b0SSuyog Pawar /* Get abs MAX for symmetric search */
5614*c83a76b0SSuyog Pawar i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y;
5615*c83a76b0SSuyog Pawar /* Assuming P to P distance as 4 */
5616*c83a76b0SSuyog Pawar i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2;
5617*c83a76b0SSuyog Pawar
5618*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++)
5619*c83a76b0SSuyog Pawar {
5620*c83a76b0SSuyog Pawar /* Get the prev. encoded frame POC */
5621*c83a76b0SSuyog Pawar prev_poc = ps_ctxt->i4_prev_poc;
5622*c83a76b0SSuyog Pawar
5623*c83a76b0SSuyog Pawar ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
5624*c83a76b0SSuyog Pawar abs_poc_diff = ABS((cur_poc - ref_poc));
5625*c83a76b0SSuyog Pawar /* Get the cur. max MV based on POC distance */
5626*c83a76b0SSuyog Pawar i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
5627*c83a76b0SSuyog Pawar i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
5628*c83a76b0SSuyog Pawar
5629*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5630*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
5631*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5632*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
5633*c83a76b0SSuyog Pawar
5634*c83a76b0SSuyog Pawar /* Find the MAX MV for the prev. encoded frame to optimize */
5635*c83a76b0SSuyog Pawar /* the reverse dependency of ME on Enc.Loop */
5636*c83a76b0SSuyog Pawar if(ref_poc == prev_poc)
5637*c83a76b0SSuyog Pawar {
5638*c83a76b0SSuyog Pawar /* TO DO : Same thing for horz. search also */
5639*c83a76b0SSuyog Pawar pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y;
5640*c83a76b0SSuyog Pawar prev_poc_count++;
5641*c83a76b0SSuyog Pawar }
5642*c83a76b0SSuyog Pawar }
5643*c83a76b0SSuyog Pawar }
5644*c83a76b0SSuyog Pawar else
5645*c83a76b0SSuyog Pawar {
5646*c83a76b0SSuyog Pawar ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
5647*c83a76b0SSuyog Pawar
5648*c83a76b0SSuyog Pawar /* Set the Config. File Params for P pic. */
5649*c83a76b0SSuyog Pawar for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
5650*c83a76b0SSuyog Pawar {
5651*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5652*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
5653*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5654*c83a76b0SSuyog Pawar ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
5655*c83a76b0SSuyog Pawar }
5656*c83a76b0SSuyog Pawar
5657*c83a76b0SSuyog Pawar /* For P PIC., go with Config. File Params */
5658*c83a76b0SSuyog Pawar pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y;
5659*c83a76b0SSuyog Pawar }
5660*c83a76b0SSuyog Pawar }
5661*c83a76b0SSuyog Pawar
hme_part_mask_populator(U08 * pu1_inp,S32 i4_inp_stride,U08 u1_limit_active_partitions,U08 u1_is_bPic,U08 u1_is_refPic,U08 u1_blk_8x8_mask,ME_QUALITY_PRESETS_T e_me_quality_preset)5662*c83a76b0SSuyog Pawar S32 hme_part_mask_populator(
5663*c83a76b0SSuyog Pawar U08 *pu1_inp,
5664*c83a76b0SSuyog Pawar S32 i4_inp_stride,
5665*c83a76b0SSuyog Pawar U08 u1_limit_active_partitions,
5666*c83a76b0SSuyog Pawar U08 u1_is_bPic,
5667*c83a76b0SSuyog Pawar U08 u1_is_refPic,
5668*c83a76b0SSuyog Pawar U08 u1_blk_8x8_mask,
5669*c83a76b0SSuyog Pawar ME_QUALITY_PRESETS_T e_me_quality_preset)
5670*c83a76b0SSuyog Pawar {
5671*c83a76b0SSuyog Pawar if(15 != u1_blk_8x8_mask)
5672*c83a76b0SSuyog Pawar {
5673*c83a76b0SSuyog Pawar return ENABLE_NxN;
5674*c83a76b0SSuyog Pawar }
5675*c83a76b0SSuyog Pawar else
5676*c83a76b0SSuyog Pawar {
5677*c83a76b0SSuyog Pawar U08 u1_call_inp_segmentation_based_part_mask_populator =
5678*c83a76b0SSuyog Pawar (ME_XTREME_SPEED_25 != e_me_quality_preset) ||
5679*c83a76b0SSuyog Pawar (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) ||
5680*c83a76b0SSuyog Pawar (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) ||
5681*c83a76b0SSuyog Pawar (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6);
5682*c83a76b0SSuyog Pawar
5683*c83a76b0SSuyog Pawar if(u1_call_inp_segmentation_based_part_mask_populator)
5684*c83a76b0SSuyog Pawar {
5685*c83a76b0SSuyog Pawar S32 i4_part_mask =
5686*c83a76b0SSuyog Pawar hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions);
5687*c83a76b0SSuyog Pawar
5688*c83a76b0SSuyog Pawar if(e_me_quality_preset == ME_XTREME_SPEED)
5689*c83a76b0SSuyog Pawar {
5690*c83a76b0SSuyog Pawar i4_part_mask &= ~ENABLE_AMP;
5691*c83a76b0SSuyog Pawar }
5692*c83a76b0SSuyog Pawar
5693*c83a76b0SSuyog Pawar if(e_me_quality_preset == ME_XTREME_SPEED_25)
5694*c83a76b0SSuyog Pawar {
5695*c83a76b0SSuyog Pawar i4_part_mask &= ~ENABLE_AMP;
5696*c83a76b0SSuyog Pawar
5697*c83a76b0SSuyog Pawar i4_part_mask &= ~ENABLE_SMP;
5698*c83a76b0SSuyog Pawar }
5699*c83a76b0SSuyog Pawar
5700*c83a76b0SSuyog Pawar return i4_part_mask;
5701*c83a76b0SSuyog Pawar }
5702*c83a76b0SSuyog Pawar else
5703*c83a76b0SSuyog Pawar {
5704*c83a76b0SSuyog Pawar return ENABLE_2Nx2N;
5705*c83a76b0SSuyog Pawar }
5706*c83a76b0SSuyog Pawar }
5707*c83a76b0SSuyog Pawar }
5708