xref: /aosp_15_r20/external/libhevc/encoder/ihevce_enc_loop_pass.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar  *
3*c83a76b0SSuyog Pawar  * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar  *
5*c83a76b0SSuyog Pawar  * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar  * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar  * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar  *
9*c83a76b0SSuyog Pawar  * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar  *
11*c83a76b0SSuyog Pawar  * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar  * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar  * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar  * limitations under the License.
16*c83a76b0SSuyog Pawar  *
17*c83a76b0SSuyog Pawar  *****************************************************************************
18*c83a76b0SSuyog Pawar  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar 
21*c83a76b0SSuyog Pawar /*!
22*c83a76b0SSuyog Pawar ******************************************************************************
23*c83a76b0SSuyog Pawar * \file ihevce_enc_loop_pass.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * \brief
26*c83a76b0SSuyog Pawar *    This file contains Encoder normative loop pass related functions
27*c83a76b0SSuyog Pawar *
28*c83a76b0SSuyog Pawar * \date
29*c83a76b0SSuyog Pawar *    18/09/2012
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar * \author
32*c83a76b0SSuyog Pawar *    Ittiam
33*c83a76b0SSuyog Pawar *
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar * List of Functions
36*c83a76b0SSuyog Pawar *
37*c83a76b0SSuyog Pawar *
38*c83a76b0SSuyog Pawar ******************************************************************************
39*c83a76b0SSuyog Pawar */
40*c83a76b0SSuyog Pawar 
41*c83a76b0SSuyog Pawar /*****************************************************************************/
42*c83a76b0SSuyog Pawar /* File Includes                                                             */
43*c83a76b0SSuyog Pawar /*****************************************************************************/
44*c83a76b0SSuyog Pawar /* System include files */
45*c83a76b0SSuyog Pawar #include <stdio.h>
46*c83a76b0SSuyog Pawar #include <string.h>
47*c83a76b0SSuyog Pawar #include <stdlib.h>
48*c83a76b0SSuyog Pawar #include <assert.h>
49*c83a76b0SSuyog Pawar #include <stdarg.h>
50*c83a76b0SSuyog Pawar #include <math.h>
51*c83a76b0SSuyog Pawar #include <limits.h>
52*c83a76b0SSuyog Pawar 
53*c83a76b0SSuyog Pawar /* User include files */
54*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
55*c83a76b0SSuyog Pawar #include "itt_video_api.h"
56*c83a76b0SSuyog Pawar #include "ihevce_api.h"
57*c83a76b0SSuyog Pawar 
58*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
59*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
60*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
61*c83a76b0SSuyog Pawar 
62*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
63*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
64*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
65*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
66*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
67*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
68*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
69*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
70*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
71*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
72*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
73*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
74*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
75*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
76*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
77*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
78*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
79*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
80*c83a76b0SSuyog Pawar #include "ihevc_common_tables.h"
81*c83a76b0SSuyog Pawar #include "ihevc_quant_tables.h"
82*c83a76b0SSuyog Pawar 
83*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
84*c83a76b0SSuyog Pawar #include "ihevce_hle_interface.h"
85*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
86*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
87*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_funcs.h"
88*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
89*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
90*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
91*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
92*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
93*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
94*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
95*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
96*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
97*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
98*c83a76b0SSuyog Pawar #include "ihevce_ipe_instr_set_router.h"
99*c83a76b0SSuyog Pawar #include "ihevce_decomp_pre_intra_structs.h"
100*c83a76b0SSuyog Pawar #include "ihevce_decomp_pre_intra_pass.h"
101*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
102*c83a76b0SSuyog Pawar #include "ihevce_nbr_avail.h"
103*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_utils.h"
104*c83a76b0SSuyog Pawar #include "ihevce_sub_pic_rc.h"
105*c83a76b0SSuyog Pawar #include "ihevce_global_tables.h"
106*c83a76b0SSuyog Pawar #include "ihevce_bs_compute_ctb.h"
107*c83a76b0SSuyog Pawar #include "ihevce_cabac_rdo.h"
108*c83a76b0SSuyog Pawar #include "ihevce_deblk.h"
109*c83a76b0SSuyog Pawar #include "ihevce_frame_process.h"
110*c83a76b0SSuyog Pawar #include "ihevce_rc_enc_structs.h"
111*c83a76b0SSuyog Pawar #include "hme_datatype.h"
112*c83a76b0SSuyog Pawar #include "hme_interface.h"
113*c83a76b0SSuyog Pawar #include "hme_common_defs.h"
114*c83a76b0SSuyog Pawar #include "hme_defs.h"
115*c83a76b0SSuyog Pawar #include "ihevce_me_instr_set_router.h"
116*c83a76b0SSuyog Pawar #include "ihevce_enc_subpel_gen.h"
117*c83a76b0SSuyog Pawar #include "ihevce_inter_pred.h"
118*c83a76b0SSuyog Pawar #include "ihevce_mv_pred.h"
119*c83a76b0SSuyog Pawar #include "ihevce_mv_pred_merge.h"
120*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_inter_mode_sifter.h"
121*c83a76b0SSuyog Pawar #include "ihevce_enc_cu_recursion.h"
122*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_pass.h"
123*c83a76b0SSuyog Pawar #include "ihevce_common_utils.h"
124*c83a76b0SSuyog Pawar #include "ihevce_dep_mngr_interface.h"
125*c83a76b0SSuyog Pawar #include "ihevce_sao.h"
126*c83a76b0SSuyog Pawar #include "ihevce_tile_interface.h"
127*c83a76b0SSuyog Pawar #include "ihevce_profile.h"
128*c83a76b0SSuyog Pawar 
129*c83a76b0SSuyog Pawar #include "cast_types.h"
130*c83a76b0SSuyog Pawar #include "osal.h"
131*c83a76b0SSuyog Pawar #include "osal_defaults.h"
132*c83a76b0SSuyog Pawar 
133*c83a76b0SSuyog Pawar /*****************************************************************************/
134*c83a76b0SSuyog Pawar /* Globals                                                                   */
135*c83a76b0SSuyog Pawar /*****************************************************************************/
136*c83a76b0SSuyog Pawar extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137*c83a76b0SSuyog Pawar 
138*c83a76b0SSuyog Pawar extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139*c83a76b0SSuyog Pawar 
140*c83a76b0SSuyog Pawar /*****************************************************************************/
141*c83a76b0SSuyog Pawar /* Constant Macros                                                           */
142*c83a76b0SSuyog Pawar /*****************************************************************************/
143*c83a76b0SSuyog Pawar #define UPDATE_QP_AT_CTB 6
144*c83a76b0SSuyog Pawar #define INTRAPRED_SIMD_LEFT_PADDING 16
145*c83a76b0SSuyog Pawar #define INTRAPRED_SIMD_RIGHT_PADDING 8
146*c83a76b0SSuyog Pawar 
147*c83a76b0SSuyog Pawar /*****************************************************************************/
148*c83a76b0SSuyog Pawar /* Function Definitions                                                      */
149*c83a76b0SSuyog Pawar /*****************************************************************************/
150*c83a76b0SSuyog Pawar 
151*c83a76b0SSuyog Pawar /*!
152*c83a76b0SSuyog Pawar ******************************************************************************
153*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
154*c83a76b0SSuyog Pawar *
155*c83a76b0SSuyog Pawar * \brief
156*c83a76b0SSuyog Pawar *    This function copy the right data of CTB to context buffers
157*c83a76b0SSuyog Pawar *
158*c83a76b0SSuyog Pawar * \date
159*c83a76b0SSuyog Pawar *    18/09/2012
160*c83a76b0SSuyog Pawar *
161*c83a76b0SSuyog Pawar * \author
162*c83a76b0SSuyog Pawar *    Ittiam
163*c83a76b0SSuyog Pawar *
164*c83a76b0SSuyog Pawar * \return
165*c83a76b0SSuyog Pawar *
166*c83a76b0SSuyog Pawar * List of Functions
167*c83a76b0SSuyog Pawar *
168*c83a76b0SSuyog Pawar *
169*c83a76b0SSuyog Pawar ******************************************************************************
170*c83a76b0SSuyog Pawar */
ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms)171*c83a76b0SSuyog Pawar void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
172*c83a76b0SSuyog Pawar {
173*c83a76b0SSuyog Pawar     /* ------------------------------------------------------------------ */
174*c83a76b0SSuyog Pawar     /* copy the right coloum data to the context buffers                  */
175*c83a76b0SSuyog Pawar     /* ------------------------------------------------------------------ */
176*c83a76b0SSuyog Pawar 
177*c83a76b0SSuyog Pawar     nbr_4x4_t *ps_left_nbr;
178*c83a76b0SSuyog Pawar     nbr_4x4_t *ps_nbr;
179*c83a76b0SSuyog Pawar     UWORD8 *pu1_buff;
180*c83a76b0SSuyog Pawar     WORD32 num_pels;
181*c83a76b0SSuyog Pawar     UWORD8 *pu1_luma_left, *pu1_chrm_left;
182*c83a76b0SSuyog Pawar 
183*c83a76b0SSuyog Pawar     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
184*c83a76b0SSuyog Pawar 
185*c83a76b0SSuyog Pawar     pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
186*c83a76b0SSuyog Pawar     pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
187*c83a76b0SSuyog Pawar     ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
188*c83a76b0SSuyog Pawar 
189*c83a76b0SSuyog Pawar     /* copy right luma data */
190*c83a76b0SSuyog Pawar     pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
191*c83a76b0SSuyog Pawar 
192*c83a76b0SSuyog Pawar     for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
193*c83a76b0SSuyog Pawar     {
194*c83a76b0SSuyog Pawar         WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
195*c83a76b0SSuyog Pawar 
196*c83a76b0SSuyog Pawar         pu1_luma_left[num_pels] = pu1_buff[i4_indx];
197*c83a76b0SSuyog Pawar     }
198*c83a76b0SSuyog Pawar 
199*c83a76b0SSuyog Pawar     /* copy right chroma data */
200*c83a76b0SSuyog Pawar     pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
201*c83a76b0SSuyog Pawar 
202*c83a76b0SSuyog Pawar     for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
203*c83a76b0SSuyog Pawar     {
204*c83a76b0SSuyog Pawar         WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
205*c83a76b0SSuyog Pawar 
206*c83a76b0SSuyog Pawar         *pu1_chrm_left++ = pu1_buff[i4_indx];
207*c83a76b0SSuyog Pawar         *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
208*c83a76b0SSuyog Pawar     }
209*c83a76b0SSuyog Pawar 
210*c83a76b0SSuyog Pawar     /* store the nbr 4x4 data at ctb level */
211*c83a76b0SSuyog Pawar     {
212*c83a76b0SSuyog Pawar         WORD32 ctr;
213*c83a76b0SSuyog Pawar         WORD32 nbr_strd;
214*c83a76b0SSuyog Pawar 
215*c83a76b0SSuyog Pawar         nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
216*c83a76b0SSuyog Pawar 
217*c83a76b0SSuyog Pawar         /* copy right nbr data */
218*c83a76b0SSuyog Pawar         ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
219*c83a76b0SSuyog Pawar         ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
220*c83a76b0SSuyog Pawar 
221*c83a76b0SSuyog Pawar         for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
222*c83a76b0SSuyog Pawar         {
223*c83a76b0SSuyog Pawar             WORD32 i4_indx = nbr_strd * ctr;
224*c83a76b0SSuyog Pawar 
225*c83a76b0SSuyog Pawar             ps_left_nbr[ctr] = ps_nbr[i4_indx];
226*c83a76b0SSuyog Pawar         }
227*c83a76b0SSuyog Pawar     }
228*c83a76b0SSuyog Pawar     return;
229*c83a76b0SSuyog Pawar }
230*c83a76b0SSuyog Pawar 
231*c83a76b0SSuyog Pawar /*!
232*c83a76b0SSuyog Pawar ******************************************************************************
233*c83a76b0SSuyog Pawar * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
234*c83a76b0SSuyog Pawar *
235*c83a76b0SSuyog Pawar * \brief
236*c83a76b0SSuyog Pawar *   Mark all modes for inter/intra for evaluation. This function will be
237*c83a76b0SSuyog Pawar *   called by ref instance
238*c83a76b0SSuyog Pawar *
239*c83a76b0SSuyog Pawar * \param[in] pv_ctxt : pointer to enc_loop module
240*c83a76b0SSuyog Pawar * \param[in] ps_cu_analyse : pointer to cu analyse
241*c83a76b0SSuyog Pawar *
242*c83a76b0SSuyog Pawar * \return
243*c83a76b0SSuyog Pawar *    None
244*c83a76b0SSuyog Pawar *
245*c83a76b0SSuyog Pawar * \author
246*c83a76b0SSuyog Pawar *  Ittiam
247*c83a76b0SSuyog Pawar *
248*c83a76b0SSuyog Pawar *****************************************************************************
249*c83a76b0SSuyog Pawar */
ihevce_mark_all_modes_to_evaluate(void * pv_ctxt,cu_analyse_t * ps_cu_analyse)250*c83a76b0SSuyog Pawar void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
251*c83a76b0SSuyog Pawar {
252*c83a76b0SSuyog Pawar     UWORD8 ctr;
253*c83a76b0SSuyog Pawar     WORD32 i4_part;
254*c83a76b0SSuyog Pawar 
255*c83a76b0SSuyog Pawar     (void)pv_ctxt;
256*c83a76b0SSuyog Pawar     /* run a loop over all Inter cands */
257*c83a76b0SSuyog Pawar     for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
258*c83a76b0SSuyog Pawar     {
259*c83a76b0SSuyog Pawar         ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
260*c83a76b0SSuyog Pawar     }
261*c83a76b0SSuyog Pawar 
262*c83a76b0SSuyog Pawar     /* run a loop over all intra candidates */
263*c83a76b0SSuyog Pawar     if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
264*c83a76b0SSuyog Pawar     {
265*c83a76b0SSuyog Pawar         for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
266*c83a76b0SSuyog Pawar         {
267*c83a76b0SSuyog Pawar             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
268*c83a76b0SSuyog Pawar             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
269*c83a76b0SSuyog Pawar 
270*c83a76b0SSuyog Pawar             for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
271*c83a76b0SSuyog Pawar             {
272*c83a76b0SSuyog Pawar                 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
273*c83a76b0SSuyog Pawar             }
274*c83a76b0SSuyog Pawar         }
275*c83a76b0SSuyog Pawar     }
276*c83a76b0SSuyog Pawar }
277*c83a76b0SSuyog Pawar 
278*c83a76b0SSuyog Pawar /*!
279*c83a76b0SSuyog Pawar ******************************************************************************
280*c83a76b0SSuyog Pawar * \if Function name : ihevce_cu_mode_decide \endif
281*c83a76b0SSuyog Pawar *
282*c83a76b0SSuyog Pawar * \brief
283*c83a76b0SSuyog Pawar *    Coding Unit mode decide function. Performs RD opt and decides the best mode
284*c83a76b0SSuyog Pawar *
285*c83a76b0SSuyog Pawar * \param[in] ps_ctxt : pointer to enc_loop module
286*c83a76b0SSuyog Pawar * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
287*c83a76b0SSuyog Pawar * \param[in] ps_cu_analyse : pointer to cu analyse
288*c83a76b0SSuyog Pawar * \param[out] ps_cu_final : pointer to cu final
289*c83a76b0SSuyog Pawar * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
290*c83a76b0SSuyog Pawar * \param[out]ps_row_col_pu; colocated pu buffer pointer
291*c83a76b0SSuyog Pawar * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
292*c83a76b0SSuyog Pawar * \param[in]col_start_pu_idx : pu index start value
293*c83a76b0SSuyog Pawar *
294*c83a76b0SSuyog Pawar * \return
295*c83a76b0SSuyog Pawar *    None
296*c83a76b0SSuyog Pawar *
297*c83a76b0SSuyog Pawar *
298*c83a76b0SSuyog Pawar * \author
299*c83a76b0SSuyog Pawar *  Ittiam
300*c83a76b0SSuyog Pawar *
301*c83a76b0SSuyog Pawar *****************************************************************************
302*c83a76b0SSuyog Pawar */
ihevce_cu_mode_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cu_analyse_t * ps_cu_analyse,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_ecd_data,pu_col_mv_t * ps_col_pu,UWORD8 * pu1_col_pu_map,WORD32 col_start_pu_idx)303*c83a76b0SSuyog Pawar LWORD64 ihevce_cu_mode_decide(
304*c83a76b0SSuyog Pawar     ihevce_enc_loop_ctxt_t *ps_ctxt,
305*c83a76b0SSuyog Pawar     enc_loop_cu_prms_t *ps_cu_prms,
306*c83a76b0SSuyog Pawar     cu_analyse_t *ps_cu_analyse,
307*c83a76b0SSuyog Pawar     final_mode_state_t *ps_final_mode_state,
308*c83a76b0SSuyog Pawar     UWORD8 *pu1_ecd_data,
309*c83a76b0SSuyog Pawar     pu_col_mv_t *ps_col_pu,
310*c83a76b0SSuyog Pawar     UWORD8 *pu1_col_pu_map,
311*c83a76b0SSuyog Pawar     WORD32 col_start_pu_idx)
312*c83a76b0SSuyog Pawar {
313*c83a76b0SSuyog Pawar     enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
314*c83a76b0SSuyog Pawar     cu_nbr_prms_t s_cu_nbr_prms;
315*c83a76b0SSuyog Pawar     inter_cu_mode_info_t s_inter_cu_mode_info;
316*c83a76b0SSuyog Pawar     cu_inter_cand_t *ps_best_inter_cand = NULL;
317*c83a76b0SSuyog Pawar     UWORD8 *pu1_cu_top;
318*c83a76b0SSuyog Pawar     UWORD8 *pu1_cu_top_left;
319*c83a76b0SSuyog Pawar     UWORD8 *pu1_cu_left;
320*c83a76b0SSuyog Pawar     UWORD8 *pu1_final_recon = NULL;
321*c83a76b0SSuyog Pawar     UWORD8 *pu1_curr_src = NULL;
322*c83a76b0SSuyog Pawar     void *pv_curr_src = NULL;
323*c83a76b0SSuyog Pawar     void *pv_cu_left = NULL;
324*c83a76b0SSuyog Pawar     void *pv_cu_top = NULL;
325*c83a76b0SSuyog Pawar     void *pv_cu_top_left = NULL;
326*c83a76b0SSuyog Pawar 
327*c83a76b0SSuyog Pawar     WORD32 cu_left_stride = 0;
328*c83a76b0SSuyog Pawar     WORD32 ctr;
329*c83a76b0SSuyog Pawar     WORD32 rd_opt_best_idx;
330*c83a76b0SSuyog Pawar     LWORD64 rd_opt_least_cost;
331*c83a76b0SSuyog Pawar     WORD32 rd_opt_curr_idx;
332*c83a76b0SSuyog Pawar     WORD32 num_4x4_in_ctb;
333*c83a76b0SSuyog Pawar     WORD32 nbr_4x4_left_strd = 0;
334*c83a76b0SSuyog Pawar 
335*c83a76b0SSuyog Pawar     nbr_4x4_t *ps_topleft_nbr_4x4;
336*c83a76b0SSuyog Pawar     nbr_4x4_t *ps_left_nbr_4x4 = NULL;
337*c83a76b0SSuyog Pawar     nbr_4x4_t *ps_top_nbr_4x4 = NULL;
338*c83a76b0SSuyog Pawar     nbr_4x4_t *ps_curr_nbr_4x4;
339*c83a76b0SSuyog Pawar     WORD32 enable_intra_eval_flag;
340*c83a76b0SSuyog Pawar     WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
341*c83a76b0SSuyog Pawar     WORD32 curr_cu_pos_in_row;
342*c83a76b0SSuyog Pawar     WORD32 cu_top_right_offset;
343*c83a76b0SSuyog Pawar     WORD32 cu_top_right_dep_pos;
344*c83a76b0SSuyog Pawar     WORD32 i4_ctb_x_off, i4_ctb_y_off;
345*c83a76b0SSuyog Pawar 
346*c83a76b0SSuyog Pawar     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
347*c83a76b0SSuyog Pawar     (void)ps_final_mode_state;
348*c83a76b0SSuyog Pawar     /* default init */
349*c83a76b0SSuyog Pawar     rd_opt_least_cost = MAX_COST_64;
350*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
351*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
352*c83a76b0SSuyog Pawar 
353*c83a76b0SSuyog Pawar     /* Zero cbf tool is enabled by default for all presets */
354*c83a76b0SSuyog Pawar     ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
355*c83a76b0SSuyog Pawar 
356*c83a76b0SSuyog Pawar     rd_opt_best_idx = 1;
357*c83a76b0SSuyog Pawar     rd_opt_curr_idx = 0;
358*c83a76b0SSuyog Pawar     enable_intra_eval_flag = 1;
359*c83a76b0SSuyog Pawar 
360*c83a76b0SSuyog Pawar     /* CU params in enc ctxt*/
361*c83a76b0SSuyog Pawar     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
362*c83a76b0SSuyog Pawar     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
363*c83a76b0SSuyog Pawar     ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
364*c83a76b0SSuyog Pawar 
365*c83a76b0SSuyog Pawar     num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
366*c83a76b0SSuyog Pawar     ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
367*c83a76b0SSuyog Pawar     ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
368*c83a76b0SSuyog Pawar     ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
369*c83a76b0SSuyog Pawar 
370*c83a76b0SSuyog Pawar     /* CB and Cr are pixel interleaved */
371*c83a76b0SSuyog Pawar     s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
372*c83a76b0SSuyog Pawar 
373*c83a76b0SSuyog Pawar     s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
374*c83a76b0SSuyog Pawar 
375*c83a76b0SSuyog Pawar     if(!ps_ctxt->u1_is_input_data_hbd)
376*c83a76b0SSuyog Pawar     {
377*c83a76b0SSuyog Pawar         /* --------------------------------------- */
378*c83a76b0SSuyog Pawar         /* ----- Luma Pointers Derivation -------- */
379*c83a76b0SSuyog Pawar         /* --------------------------------------- */
380*c83a76b0SSuyog Pawar 
381*c83a76b0SSuyog Pawar         /* based on CU position derive the pointers */
382*c83a76b0SSuyog Pawar         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
383*c83a76b0SSuyog Pawar 
384*c83a76b0SSuyog Pawar         pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
385*c83a76b0SSuyog Pawar 
386*c83a76b0SSuyog Pawar         pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
387*c83a76b0SSuyog Pawar 
388*c83a76b0SSuyog Pawar         pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
389*c83a76b0SSuyog Pawar 
390*c83a76b0SSuyog Pawar         pv_curr_src = pu1_curr_src;
391*c83a76b0SSuyog Pawar 
392*c83a76b0SSuyog Pawar         /* CU left */
393*c83a76b0SSuyog Pawar         if(0 == ps_cu_analyse->b3_cu_pos_x)
394*c83a76b0SSuyog Pawar         {
395*c83a76b0SSuyog Pawar             /* CTB boundary */
396*c83a76b0SSuyog Pawar             pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
397*c83a76b0SSuyog Pawar             pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
398*c83a76b0SSuyog Pawar             cu_left_stride = 1;
399*c83a76b0SSuyog Pawar 
400*c83a76b0SSuyog Pawar             ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
401*c83a76b0SSuyog Pawar             ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
402*c83a76b0SSuyog Pawar             nbr_4x4_left_strd = 1;
403*c83a76b0SSuyog Pawar         }
404*c83a76b0SSuyog Pawar         else
405*c83a76b0SSuyog Pawar         {
406*c83a76b0SSuyog Pawar             /* inside CTB */
407*c83a76b0SSuyog Pawar             pu1_cu_left = pu1_final_recon - 1;
408*c83a76b0SSuyog Pawar             cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
409*c83a76b0SSuyog Pawar 
410*c83a76b0SSuyog Pawar             ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
411*c83a76b0SSuyog Pawar             nbr_4x4_left_strd = num_4x4_in_ctb;
412*c83a76b0SSuyog Pawar         }
413*c83a76b0SSuyog Pawar 
414*c83a76b0SSuyog Pawar         pv_cu_left = pu1_cu_left;
415*c83a76b0SSuyog Pawar 
416*c83a76b0SSuyog Pawar         /* CU top */
417*c83a76b0SSuyog Pawar         if(0 == ps_cu_analyse->b3_cu_pos_y)
418*c83a76b0SSuyog Pawar         {
419*c83a76b0SSuyog Pawar             /* CTB boundary */
420*c83a76b0SSuyog Pawar             pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
421*c83a76b0SSuyog Pawar             pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
422*c83a76b0SSuyog Pawar             pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
423*c83a76b0SSuyog Pawar 
424*c83a76b0SSuyog Pawar             ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
425*c83a76b0SSuyog Pawar             ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
426*c83a76b0SSuyog Pawar             ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
427*c83a76b0SSuyog Pawar         }
428*c83a76b0SSuyog Pawar         else
429*c83a76b0SSuyog Pawar         {
430*c83a76b0SSuyog Pawar             /* inside CTB */
431*c83a76b0SSuyog Pawar             pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
432*c83a76b0SSuyog Pawar 
433*c83a76b0SSuyog Pawar             ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
434*c83a76b0SSuyog Pawar         }
435*c83a76b0SSuyog Pawar 
436*c83a76b0SSuyog Pawar         pv_cu_top = pu1_cu_top;
437*c83a76b0SSuyog Pawar 
438*c83a76b0SSuyog Pawar         /* CU top left */
439*c83a76b0SSuyog Pawar         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
440*c83a76b0SSuyog Pawar         {
441*c83a76b0SSuyog Pawar             /* left ctb boundary but not first row */
442*c83a76b0SSuyog Pawar             pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
443*c83a76b0SSuyog Pawar             ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
444*c83a76b0SSuyog Pawar         }
445*c83a76b0SSuyog Pawar         else
446*c83a76b0SSuyog Pawar         {
447*c83a76b0SSuyog Pawar             /* rest all cases topleft is top -1 */
448*c83a76b0SSuyog Pawar             pu1_cu_top_left = pu1_cu_top - 1;
449*c83a76b0SSuyog Pawar             ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
450*c83a76b0SSuyog Pawar         }
451*c83a76b0SSuyog Pawar 
452*c83a76b0SSuyog Pawar         pv_cu_top_left = pu1_cu_top_left;
453*c83a76b0SSuyog Pawar 
454*c83a76b0SSuyog Pawar         /* Store the CU nbr information in the ctxt for final reconstruction fun. */
455*c83a76b0SSuyog Pawar         s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
456*c83a76b0SSuyog Pawar         s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
457*c83a76b0SSuyog Pawar         s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
458*c83a76b0SSuyog Pawar         s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
459*c83a76b0SSuyog Pawar         s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
460*c83a76b0SSuyog Pawar         s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
461*c83a76b0SSuyog Pawar         s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
462*c83a76b0SSuyog Pawar         s_cu_nbr_prms.cu_left_stride = cu_left_stride;
463*c83a76b0SSuyog Pawar 
464*c83a76b0SSuyog Pawar         /* ------------------------------------------------------------ */
465*c83a76b0SSuyog Pawar         /* -- Initialize the number of neigbour skip cu count for rdo --*/
466*c83a76b0SSuyog Pawar         /* ------------------------------------------------------------ */
467*c83a76b0SSuyog Pawar         {
468*c83a76b0SSuyog Pawar             nbr_avail_flags_t s_nbr;
469*c83a76b0SSuyog Pawar             WORD32 i4_num_nbr_skip_cus = 0;
470*c83a76b0SSuyog Pawar 
471*c83a76b0SSuyog Pawar             /* get the neighbour availability flags for current cu  */
472*c83a76b0SSuyog Pawar             ihevce_get_nbr_intra(
473*c83a76b0SSuyog Pawar                 &s_nbr,
474*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
475*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
476*c83a76b0SSuyog Pawar                 (ps_cu_analyse->b3_cu_pos_x << 1),
477*c83a76b0SSuyog Pawar                 (ps_cu_analyse->b3_cu_pos_y << 1),
478*c83a76b0SSuyog Pawar                 (ps_cu_analyse->u1_cu_size >> 2));
479*c83a76b0SSuyog Pawar             if(s_nbr.u1_top_avail)
480*c83a76b0SSuyog Pawar             {
481*c83a76b0SSuyog Pawar                 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
482*c83a76b0SSuyog Pawar             }
483*c83a76b0SSuyog Pawar 
484*c83a76b0SSuyog Pawar             if(s_nbr.u1_left_avail)
485*c83a76b0SSuyog Pawar             {
486*c83a76b0SSuyog Pawar                 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
487*c83a76b0SSuyog Pawar             }
488*c83a76b0SSuyog Pawar             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
489*c83a76b0SSuyog Pawar                 i4_num_nbr_skip_cus;
490*c83a76b0SSuyog Pawar             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
491*c83a76b0SSuyog Pawar                 i4_num_nbr_skip_cus;
492*c83a76b0SSuyog Pawar         }
493*c83a76b0SSuyog Pawar 
494*c83a76b0SSuyog Pawar         /* --------------------------------------- */
495*c83a76b0SSuyog Pawar         /* --- Chroma Pointers Derivation -------- */
496*c83a76b0SSuyog Pawar         /* --------------------------------------- */
497*c83a76b0SSuyog Pawar 
498*c83a76b0SSuyog Pawar         /* based on CU position derive the pointers */
499*c83a76b0SSuyog Pawar         s_chrm_cu_buf_prms.pu1_final_recon =
500*c83a76b0SSuyog Pawar             ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
501*c83a76b0SSuyog Pawar 
502*c83a76b0SSuyog Pawar         s_chrm_cu_buf_prms.pu1_curr_src =
503*c83a76b0SSuyog Pawar             ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
504*c83a76b0SSuyog Pawar 
505*c83a76b0SSuyog Pawar         s_chrm_cu_buf_prms.pu1_final_recon +=
506*c83a76b0SSuyog Pawar             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
507*c83a76b0SSuyog Pawar 
508*c83a76b0SSuyog Pawar         s_chrm_cu_buf_prms.pu1_curr_src +=
509*c83a76b0SSuyog Pawar             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
510*c83a76b0SSuyog Pawar 
511*c83a76b0SSuyog Pawar         /* CU left */
512*c83a76b0SSuyog Pawar         if(0 == ps_cu_analyse->b3_cu_pos_x)
513*c83a76b0SSuyog Pawar         {
514*c83a76b0SSuyog Pawar             /* CTB boundary */
515*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
516*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
517*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
518*c83a76b0SSuyog Pawar         }
519*c83a76b0SSuyog Pawar         else
520*c83a76b0SSuyog Pawar         {
521*c83a76b0SSuyog Pawar             /* inside CTB */
522*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
523*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
524*c83a76b0SSuyog Pawar         }
525*c83a76b0SSuyog Pawar 
526*c83a76b0SSuyog Pawar         /* CU top */
527*c83a76b0SSuyog Pawar         if(0 == ps_cu_analyse->b3_cu_pos_y)
528*c83a76b0SSuyog Pawar         {
529*c83a76b0SSuyog Pawar             /* CTB boundary */
530*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
531*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
532*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
533*c83a76b0SSuyog Pawar         }
534*c83a76b0SSuyog Pawar         else
535*c83a76b0SSuyog Pawar         {
536*c83a76b0SSuyog Pawar             /* inside CTB */
537*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_top =
538*c83a76b0SSuyog Pawar                 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
539*c83a76b0SSuyog Pawar         }
540*c83a76b0SSuyog Pawar 
541*c83a76b0SSuyog Pawar         /* CU top left */
542*c83a76b0SSuyog Pawar         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
543*c83a76b0SSuyog Pawar         {
544*c83a76b0SSuyog Pawar             /* left ctb boundary but not first row */
545*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_top_left =
546*c83a76b0SSuyog Pawar                 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
547*c83a76b0SSuyog Pawar         }
548*c83a76b0SSuyog Pawar         else
549*c83a76b0SSuyog Pawar         {
550*c83a76b0SSuyog Pawar             /* rest all cases topleft is top -2 */
551*c83a76b0SSuyog Pawar             s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
552*c83a76b0SSuyog Pawar         }
553*c83a76b0SSuyog Pawar     }
554*c83a76b0SSuyog Pawar 
555*c83a76b0SSuyog Pawar     /* Set Variables for Dep. Checking and Setting */
556*c83a76b0SSuyog Pawar     i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
557*c83a76b0SSuyog Pawar 
558*c83a76b0SSuyog Pawar     i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
559*c83a76b0SSuyog Pawar     ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
560*c83a76b0SSuyog Pawar 
561*c83a76b0SSuyog Pawar     /* Set the pred pointer count for ME/intra to 0 to start */
562*c83a76b0SSuyog Pawar     ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
563*c83a76b0SSuyog Pawar 
564*c83a76b0SSuyog Pawar     ASSERT(
565*c83a76b0SSuyog Pawar         (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
566*c83a76b0SSuyog Pawar 
567*c83a76b0SSuyog Pawar     ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
568*c83a76b0SSuyog Pawar     s_inter_cu_mode_info.u1_num_inter_cands = 0;
569*c83a76b0SSuyog Pawar     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
570*c83a76b0SSuyog Pawar     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
571*c83a76b0SSuyog Pawar 
572*c83a76b0SSuyog Pawar     ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
573*c83a76b0SSuyog Pawar     ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
574*c83a76b0SSuyog Pawar     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
575*c83a76b0SSuyog Pawar     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
576*c83a76b0SSuyog Pawar     ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
577*c83a76b0SSuyog Pawar     if(0 != ps_cu_analyse->u1_num_inter_cands)
578*c83a76b0SSuyog Pawar     {
579*c83a76b0SSuyog Pawar         ihevce_inter_cand_sifter_prms_t s_prms;
580*c83a76b0SSuyog Pawar 
581*c83a76b0SSuyog Pawar         UWORD8 u1_enable_top_row_sync;
582*c83a76b0SSuyog Pawar 
583*c83a76b0SSuyog Pawar         if(ps_ctxt->u1_disable_intra_eval)
584*c83a76b0SSuyog Pawar         {
585*c83a76b0SSuyog Pawar             u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
586*c83a76b0SSuyog Pawar         }
587*c83a76b0SSuyog Pawar         else
588*c83a76b0SSuyog Pawar         {
589*c83a76b0SSuyog Pawar             u1_enable_top_row_sync = 1;
590*c83a76b0SSuyog Pawar         }
591*c83a76b0SSuyog Pawar 
592*c83a76b0SSuyog Pawar         if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
593*c83a76b0SSuyog Pawar         {
594*c83a76b0SSuyog Pawar             /* Wait till top data is ready          */
595*c83a76b0SSuyog Pawar             /* Currently checking till top right CU */
596*c83a76b0SSuyog Pawar             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
597*c83a76b0SSuyog Pawar 
598*c83a76b0SSuyog Pawar             if(i4_ctb_y_off == 0)
599*c83a76b0SSuyog Pawar             {
600*c83a76b0SSuyog Pawar                 /* No wait for 1st row */
601*c83a76b0SSuyog Pawar                 cu_top_right_offset = -(MAX_CTB_SIZE);
602*c83a76b0SSuyog Pawar                 {
603*c83a76b0SSuyog Pawar                     ihevce_tile_params_t *ps_col_tile_params =
604*c83a76b0SSuyog Pawar                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
605*c83a76b0SSuyog Pawar                          ps_ctxt->i4_tile_col_idx);
606*c83a76b0SSuyog Pawar                     /* No wait for 1st row */
607*c83a76b0SSuyog Pawar                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
608*c83a76b0SSuyog Pawar                 }
609*c83a76b0SSuyog Pawar                 cu_top_right_dep_pos = 0;
610*c83a76b0SSuyog Pawar             }
611*c83a76b0SSuyog Pawar             else
612*c83a76b0SSuyog Pawar             {
613*c83a76b0SSuyog Pawar                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
614*c83a76b0SSuyog Pawar                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
615*c83a76b0SSuyog Pawar             }
616*c83a76b0SSuyog Pawar 
617*c83a76b0SSuyog Pawar             if(0 == ps_cu_analyse->b3_cu_pos_y)
618*c83a76b0SSuyog Pawar             {
619*c83a76b0SSuyog Pawar                 ihevce_dmgr_chk_row_row_sync(
620*c83a76b0SSuyog Pawar                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
621*c83a76b0SSuyog Pawar                     curr_cu_pos_in_row,
622*c83a76b0SSuyog Pawar                     cu_top_right_offset,
623*c83a76b0SSuyog Pawar                     cu_top_right_dep_pos,
624*c83a76b0SSuyog Pawar                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
625*c83a76b0SSuyog Pawar                     ps_ctxt->thrd_id);
626*c83a76b0SSuyog Pawar             }
627*c83a76b0SSuyog Pawar         }
628*c83a76b0SSuyog Pawar 
629*c83a76b0SSuyog Pawar         if(ps_ctxt->i1_cu_qp_delta_enable)
630*c83a76b0SSuyog Pawar         {
631*c83a76b0SSuyog Pawar             ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, 4, 0);
632*c83a76b0SSuyog Pawar         }
633*c83a76b0SSuyog Pawar 
634*c83a76b0SSuyog Pawar         s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
635*c83a76b0SSuyog Pawar         s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
636*c83a76b0SSuyog Pawar         s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
637*c83a76b0SSuyog Pawar         s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
638*c83a76b0SSuyog Pawar         s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
639*c83a76b0SSuyog Pawar         s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
640*c83a76b0SSuyog Pawar         s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
641*c83a76b0SSuyog Pawar         s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
642*c83a76b0SSuyog Pawar         s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
643*c83a76b0SSuyog Pawar         s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
644*c83a76b0SSuyog Pawar         s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
645*c83a76b0SSuyog Pawar         s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
646*c83a76b0SSuyog Pawar         s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
647*c83a76b0SSuyog Pawar         s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
648*c83a76b0SSuyog Pawar         s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
649*c83a76b0SSuyog Pawar         s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
650*c83a76b0SSuyog Pawar         s_prms.pv_src = pv_curr_src;
651*c83a76b0SSuyog Pawar         s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
652*c83a76b0SSuyog Pawar         s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
653*c83a76b0SSuyog Pawar         s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
654*c83a76b0SSuyog Pawar         s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
655*c83a76b0SSuyog Pawar         s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
656*c83a76b0SSuyog Pawar         s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
657*c83a76b0SSuyog Pawar         s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
658*c83a76b0SSuyog Pawar         s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
659*c83a76b0SSuyog Pawar         s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
660*c83a76b0SSuyog Pawar         s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
661*c83a76b0SSuyog Pawar         s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
662*c83a76b0SSuyog Pawar         s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
663*c83a76b0SSuyog Pawar         s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
664*c83a76b0SSuyog Pawar         s_prms.u1_use_merge_cand_from_top_row =
665*c83a76b0SSuyog Pawar             (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
666*c83a76b0SSuyog Pawar         s_prms.u1_merge_idx_cabac_model =
667*c83a76b0SSuyog Pawar             ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
668*c83a76b0SSuyog Pawar #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
669*c83a76b0SSuyog Pawar         s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
670*c83a76b0SSuyog Pawar         s_prms.u1_reuse_me_sad = 1;
671*c83a76b0SSuyog Pawar #else
672*c83a76b0SSuyog Pawar         s_prms.u1_reuse_me_sad = 0;
673*c83a76b0SSuyog Pawar #endif
674*c83a76b0SSuyog Pawar 
675*c83a76b0SSuyog Pawar         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
676*c83a76b0SSuyog Pawar         {
677*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_temporal_layer == 1)
678*c83a76b0SSuyog Pawar             {
679*c83a76b0SSuyog Pawar                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
680*c83a76b0SSuyog Pawar             }
681*c83a76b0SSuyog Pawar             else
682*c83a76b0SSuyog Pawar             {
683*c83a76b0SSuyog Pawar                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
684*c83a76b0SSuyog Pawar             }
685*c83a76b0SSuyog Pawar         }
686*c83a76b0SSuyog Pawar         else
687*c83a76b0SSuyog Pawar         {
688*c83a76b0SSuyog Pawar             s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
689*c83a76b0SSuyog Pawar         }
690*c83a76b0SSuyog Pawar         s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
691*c83a76b0SSuyog Pawar 
692*c83a76b0SSuyog Pawar         if(s_prms.u1_is_cu_noisy)
693*c83a76b0SSuyog Pawar         {
694*c83a76b0SSuyog Pawar             s_prms.i4_lambda_qf =
695*c83a76b0SSuyog Pawar                 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
696*c83a76b0SSuyog Pawar         }
697*c83a76b0SSuyog Pawar         s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
698*c83a76b0SSuyog Pawar 
699*c83a76b0SSuyog Pawar         s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
700*c83a76b0SSuyog Pawar 
701*c83a76b0SSuyog Pawar         s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
702*c83a76b0SSuyog Pawar         ihevce_inter_cand_sifter(&s_prms);
703*c83a76b0SSuyog Pawar     }
704*c83a76b0SSuyog Pawar     if(u1_is_422)
705*c83a76b0SSuyog Pawar     {
706*c83a76b0SSuyog Pawar         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
707*c83a76b0SSuyog Pawar         UWORD8 u1_num_bufs_allocated;
708*c83a76b0SSuyog Pawar 
709*c83a76b0SSuyog Pawar         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
710*c83a76b0SSuyog Pawar             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
711*c83a76b0SSuyog Pawar 
712*c83a76b0SSuyog Pawar         ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
713*c83a76b0SSuyog Pawar 
714*c83a76b0SSuyog Pawar         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
715*c83a76b0SSuyog Pawar             ctr++)
716*c83a76b0SSuyog Pawar         {
717*c83a76b0SSuyog Pawar             {
718*c83a76b0SSuyog Pawar                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
719*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
720*c83a76b0SSuyog Pawar             }
721*c83a76b0SSuyog Pawar 
722*c83a76b0SSuyog Pawar             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
723*c83a76b0SSuyog Pawar 
724*c83a76b0SSuyog Pawar             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
725*c83a76b0SSuyog Pawar         }
726*c83a76b0SSuyog Pawar 
727*c83a76b0SSuyog Pawar         {
728*c83a76b0SSuyog Pawar             ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
729*c83a76b0SSuyog Pawar                 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
730*c83a76b0SSuyog Pawar         }
731*c83a76b0SSuyog Pawar 
732*c83a76b0SSuyog Pawar         ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
733*c83a76b0SSuyog Pawar 
734*c83a76b0SSuyog Pawar         ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
735*c83a76b0SSuyog Pawar     }
736*c83a76b0SSuyog Pawar     else
737*c83a76b0SSuyog Pawar     {
738*c83a76b0SSuyog Pawar         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
739*c83a76b0SSuyog Pawar         UWORD8 u1_num_bufs_allocated;
740*c83a76b0SSuyog Pawar 
741*c83a76b0SSuyog Pawar         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
742*c83a76b0SSuyog Pawar             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
743*c83a76b0SSuyog Pawar 
744*c83a76b0SSuyog Pawar         ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
745*c83a76b0SSuyog Pawar 
746*c83a76b0SSuyog Pawar         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
747*c83a76b0SSuyog Pawar             ctr++)
748*c83a76b0SSuyog Pawar         {
749*c83a76b0SSuyog Pawar             {
750*c83a76b0SSuyog Pawar                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
751*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
752*c83a76b0SSuyog Pawar             }
753*c83a76b0SSuyog Pawar 
754*c83a76b0SSuyog Pawar             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
755*c83a76b0SSuyog Pawar 
756*c83a76b0SSuyog Pawar             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
757*c83a76b0SSuyog Pawar         }
758*c83a76b0SSuyog Pawar     }
759*c83a76b0SSuyog Pawar 
760*c83a76b0SSuyog Pawar     ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
761*c83a76b0SSuyog Pawar 
762*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
763*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
764*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
765*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
766*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
767*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
768*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
769*c83a76b0SSuyog Pawar     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
770*c83a76b0SSuyog Pawar     /* --------------------------------------- */
771*c83a76b0SSuyog Pawar     /* ------ Inter RD OPT stage ------------- */
772*c83a76b0SSuyog Pawar     /* --------------------------------------- */
773*c83a76b0SSuyog Pawar     if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
774*c83a76b0SSuyog Pawar     {
775*c83a76b0SSuyog Pawar         UWORD8 u1_ssd_bit_info_ctr = 0;
776*c83a76b0SSuyog Pawar 
777*c83a76b0SSuyog Pawar         /* -- run a loop over all Inter rd opt cands ------ */
778*c83a76b0SSuyog Pawar         for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
779*c83a76b0SSuyog Pawar         {
780*c83a76b0SSuyog Pawar             cu_inter_cand_t *ps_inter_cand;
781*c83a76b0SSuyog Pawar 
782*c83a76b0SSuyog Pawar             LWORD64 rd_opt_cost = 0;
783*c83a76b0SSuyog Pawar 
784*c83a76b0SSuyog Pawar             ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
785*c83a76b0SSuyog Pawar 
786*c83a76b0SSuyog Pawar             if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
787*c83a76b0SSuyog Pawar                (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
788*c83a76b0SSuyog Pawar             {
789*c83a76b0SSuyog Pawar                 ps_inter_cand->b1_eval_mark = 1;
790*c83a76b0SSuyog Pawar             }
791*c83a76b0SSuyog Pawar 
792*c83a76b0SSuyog Pawar             /****************************************************************/
793*c83a76b0SSuyog Pawar             /* This check is only valid for derived instances.              */
794*c83a76b0SSuyog Pawar             /* check if this mode needs to be evaluated or not.             */
795*c83a76b0SSuyog Pawar             /* if it is a skip candidate, go ahead and evaluate it even if  */
796*c83a76b0SSuyog Pawar             /* it has not been marked while sorting.                        */
797*c83a76b0SSuyog Pawar             /****************************************************************/
798*c83a76b0SSuyog Pawar             if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
799*c83a76b0SSuyog Pawar             {
800*c83a76b0SSuyog Pawar                 continue;
801*c83a76b0SSuyog Pawar             }
802*c83a76b0SSuyog Pawar 
803*c83a76b0SSuyog Pawar             /* RDOPT related copies and settings */
804*c83a76b0SSuyog Pawar             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
805*c83a76b0SSuyog Pawar 
806*c83a76b0SSuyog Pawar             /* RDOPT copy States : Prev Cu best to current init */
807*c83a76b0SSuyog Pawar             COPY_CABAC_STATES(
808*c83a76b0SSuyog Pawar                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
809*c83a76b0SSuyog Pawar                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
810*c83a76b0SSuyog Pawar                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
811*c83a76b0SSuyog Pawar             /* MVP ,MVD calc and Motion compensation */
812*c83a76b0SSuyog Pawar             rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
813*c83a76b0SSuyog Pawar                 ps_ctxt,
814*c83a76b0SSuyog Pawar                 ps_inter_cand,
815*c83a76b0SSuyog Pawar                 ps_cu_analyse->u1_cu_size,
816*c83a76b0SSuyog Pawar                 ps_cu_analyse->b3_cu_pos_x,
817*c83a76b0SSuyog Pawar                 ps_cu_analyse->b3_cu_pos_y,
818*c83a76b0SSuyog Pawar                 ps_left_nbr_4x4,
819*c83a76b0SSuyog Pawar                 ps_top_nbr_4x4,
820*c83a76b0SSuyog Pawar                 ps_topleft_nbr_4x4,
821*c83a76b0SSuyog Pawar                 nbr_4x4_left_strd,
822*c83a76b0SSuyog Pawar                 rd_opt_curr_idx);
823*c83a76b0SSuyog Pawar 
824*c83a76b0SSuyog Pawar #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
825*c83a76b0SSuyog Pawar             if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
826*c83a76b0SSuyog Pawar             {
827*c83a76b0SSuyog Pawar                 ihevce_determine_tu_tree_distribution(
828*c83a76b0SSuyog Pawar                     ps_inter_cand,
829*c83a76b0SSuyog Pawar                     (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
830*c83a76b0SSuyog Pawar                     ps_ctxt->ai2_scratch,
831*c83a76b0SSuyog Pawar                     (UWORD8 *)pv_curr_src,
832*c83a76b0SSuyog Pawar                     ps_cu_prms->i4_luma_src_stride,
833*c83a76b0SSuyog Pawar                     ps_ctxt->i4_satd_lamda,
834*c83a76b0SSuyog Pawar                     LAMBDA_Q_SHIFT,
835*c83a76b0SSuyog Pawar                     ps_cu_analyse->u1_cu_size,
836*c83a76b0SSuyog Pawar                     ps_ctxt->u1_max_tr_depth);
837*c83a76b0SSuyog Pawar             }
838*c83a76b0SSuyog Pawar #endif
839*c83a76b0SSuyog Pawar #if DISABLE_ZERO_ZBF_IN_INTER
840*c83a76b0SSuyog Pawar             ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
841*c83a76b0SSuyog Pawar #else
842*c83a76b0SSuyog Pawar             ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
843*c83a76b0SSuyog Pawar #endif
844*c83a76b0SSuyog Pawar             /* Recon loop with different TUs based on partition type*/
845*c83a76b0SSuyog Pawar             rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
846*c83a76b0SSuyog Pawar                 ps_ctxt,
847*c83a76b0SSuyog Pawar                 ps_cu_prms,
848*c83a76b0SSuyog Pawar                 pv_curr_src,
849*c83a76b0SSuyog Pawar                 ps_cu_analyse->u1_cu_size,
850*c83a76b0SSuyog Pawar                 ps_cu_analyse->b3_cu_pos_x,
851*c83a76b0SSuyog Pawar                 ps_cu_analyse->b3_cu_pos_y,
852*c83a76b0SSuyog Pawar                 rd_opt_curr_idx,
853*c83a76b0SSuyog Pawar                 &s_chrm_cu_buf_prms,
854*c83a76b0SSuyog Pawar                 ps_inter_cand,
855*c83a76b0SSuyog Pawar                 ps_cu_analyse,
856*c83a76b0SSuyog Pawar                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
857*c83a76b0SSuyog Pawar                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
858*c83a76b0SSuyog Pawar                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
859*c83a76b0SSuyog Pawar                                              100.0);
860*c83a76b0SSuyog Pawar 
861*c83a76b0SSuyog Pawar #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
862*c83a76b0SSuyog Pawar             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
863*c83a76b0SSuyog Pawar             {
864*c83a76b0SSuyog Pawar                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
865*c83a76b0SSuyog Pawar                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
866*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
867*c83a76b0SSuyog Pawar             }
868*c83a76b0SSuyog Pawar #endif
869*c83a76b0SSuyog Pawar 
870*c83a76b0SSuyog Pawar             /* based on the rd opt cost choose the best and current index */
871*c83a76b0SSuyog Pawar             if(rd_opt_cost < rd_opt_least_cost)
872*c83a76b0SSuyog Pawar             {
873*c83a76b0SSuyog Pawar                 /* swap the best and current indx */
874*c83a76b0SSuyog Pawar                 rd_opt_best_idx = !rd_opt_best_idx;
875*c83a76b0SSuyog Pawar                 rd_opt_curr_idx = !rd_opt_curr_idx;
876*c83a76b0SSuyog Pawar 
877*c83a76b0SSuyog Pawar                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
878*c83a76b0SSuyog Pawar                 rd_opt_least_cost = rd_opt_cost;
879*c83a76b0SSuyog Pawar                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
880*c83a76b0SSuyog Pawar 
881*c83a76b0SSuyog Pawar                 /* Store the best Inter cand. for final_recon function */
882*c83a76b0SSuyog Pawar                 ps_best_inter_cand = ps_inter_cand;
883*c83a76b0SSuyog Pawar             }
884*c83a76b0SSuyog Pawar 
885*c83a76b0SSuyog Pawar             /* set the neighbour map to 0 */
886*c83a76b0SSuyog Pawar             ihevce_set_nbr_map(
887*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
888*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
889*c83a76b0SSuyog Pawar                 (ps_cu_analyse->b3_cu_pos_x << 1),
890*c83a76b0SSuyog Pawar                 (ps_cu_analyse->b3_cu_pos_y << 1),
891*c83a76b0SSuyog Pawar                 (ps_cu_analyse->u1_cu_size >> 2),
892*c83a76b0SSuyog Pawar                 0);
893*c83a76b0SSuyog Pawar 
894*c83a76b0SSuyog Pawar         } /* end of loop for all the Inter RD OPT cand */
895*c83a76b0SSuyog Pawar     }
896*c83a76b0SSuyog Pawar     /* --------------------------------------- */
897*c83a76b0SSuyog Pawar     /* ---- Conditional Eval of Intra -------- */
898*c83a76b0SSuyog Pawar     /* --------------------------------------- */
899*c83a76b0SSuyog Pawar     {
900*c83a76b0SSuyog Pawar         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
901*c83a76b0SSuyog Pawar         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
902*c83a76b0SSuyog Pawar 
903*c83a76b0SSuyog Pawar         /* check if inter candidates are valid */
904*c83a76b0SSuyog Pawar         if(0 != ps_cu_analyse->u1_num_inter_cands)
905*c83a76b0SSuyog Pawar         {
906*c83a76b0SSuyog Pawar             /* if skip or no residual inter candidates has won then */
907*c83a76b0SSuyog Pawar             /* evaluation of intra candidates is disabled           */
908*c83a76b0SSuyog Pawar             if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
909*c83a76b0SSuyog Pawar                (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
910*c83a76b0SSuyog Pawar             {
911*c83a76b0SSuyog Pawar                 enable_intra_eval_flag = 0;
912*c83a76b0SSuyog Pawar             }
913*c83a76b0SSuyog Pawar         }
914*c83a76b0SSuyog Pawar         /* Disable Intra Gating for HIGH QUALITY PRESET */
915*c83a76b0SSuyog Pawar #if !ENABLE_INTRA_GATING_FOR_HQ
916*c83a76b0SSuyog Pawar         if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
917*c83a76b0SSuyog Pawar         {
918*c83a76b0SSuyog Pawar             enable_intra_eval_flag = 1;
919*c83a76b0SSuyog Pawar 
920*c83a76b0SSuyog Pawar #if DISABLE_LARGE_INTRA_PQ
921*c83a76b0SSuyog Pawar             if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
922*c83a76b0SSuyog Pawar                (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
923*c83a76b0SSuyog Pawar             {
924*c83a76b0SSuyog Pawar                 if(ps_cu_analyse->u1_cu_size > 16)
925*c83a76b0SSuyog Pawar                 {
926*c83a76b0SSuyog Pawar                     /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
927*c83a76b0SSuyog Pawar                     enable_intra_eval_flag = 0;
928*c83a76b0SSuyog Pawar                 }
929*c83a76b0SSuyog Pawar                 else if(ps_cu_analyse->u1_cu_size == 16)
930*c83a76b0SSuyog Pawar                 {
931*c83a76b0SSuyog Pawar                     /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
932*c83a76b0SSuyog Pawar                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
933*c83a76b0SSuyog Pawar                 }
934*c83a76b0SSuyog Pawar             }
935*c83a76b0SSuyog Pawar #endif
936*c83a76b0SSuyog Pawar         }
937*c83a76b0SSuyog Pawar #endif
938*c83a76b0SSuyog Pawar     }
939*c83a76b0SSuyog Pawar 
940*c83a76b0SSuyog Pawar     /* --------------------------------------- */
941*c83a76b0SSuyog Pawar     /* ------ Intra RD OPT stage ------------- */
942*c83a76b0SSuyog Pawar     /* --------------------------------------- */
943*c83a76b0SSuyog Pawar 
944*c83a76b0SSuyog Pawar     /* -- run a loop over all Intra rd opt cands ------ */
945*c83a76b0SSuyog Pawar     if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
946*c83a76b0SSuyog Pawar     {
947*c83a76b0SSuyog Pawar         LWORD64 rd_opt_cost;
948*c83a76b0SSuyog Pawar         WORD32 end_flag = 0;
949*c83a76b0SSuyog Pawar         WORD32 cu_eval_done = 0;
950*c83a76b0SSuyog Pawar         WORD32 subcu_eval_done = 0;
951*c83a76b0SSuyog Pawar         WORD32 subpu_eval_done = 0;
952*c83a76b0SSuyog Pawar         WORD32 max_trans_size;
953*c83a76b0SSuyog Pawar         WORD32 sync_wait_stride;
954*c83a76b0SSuyog Pawar         max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
955*c83a76b0SSuyog Pawar         sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
956*c83a76b0SSuyog Pawar 
957*c83a76b0SSuyog Pawar         if(!ps_ctxt->u1_use_top_at_ctb_boundary)
958*c83a76b0SSuyog Pawar         {
959*c83a76b0SSuyog Pawar             /* Wait till top data is ready          */
960*c83a76b0SSuyog Pawar             /* Currently checking till top right CU */
961*c83a76b0SSuyog Pawar             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
962*c83a76b0SSuyog Pawar 
963*c83a76b0SSuyog Pawar             if(i4_ctb_y_off == 0)
964*c83a76b0SSuyog Pawar             {
965*c83a76b0SSuyog Pawar                 /* No wait for 1st row */
966*c83a76b0SSuyog Pawar                 cu_top_right_offset = -(MAX_CTB_SIZE);
967*c83a76b0SSuyog Pawar                 {
968*c83a76b0SSuyog Pawar                     ihevce_tile_params_t *ps_col_tile_params =
969*c83a76b0SSuyog Pawar                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
970*c83a76b0SSuyog Pawar                          ps_ctxt->i4_tile_col_idx);
971*c83a76b0SSuyog Pawar                     /* No wait for 1st row */
972*c83a76b0SSuyog Pawar                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
973*c83a76b0SSuyog Pawar                 }
974*c83a76b0SSuyog Pawar                 cu_top_right_dep_pos = 0;
975*c83a76b0SSuyog Pawar             }
976*c83a76b0SSuyog Pawar             else
977*c83a76b0SSuyog Pawar             {
978*c83a76b0SSuyog Pawar                 cu_top_right_offset = sync_wait_stride;
979*c83a76b0SSuyog Pawar                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
980*c83a76b0SSuyog Pawar             }
981*c83a76b0SSuyog Pawar 
982*c83a76b0SSuyog Pawar             if(0 == ps_cu_analyse->b3_cu_pos_y)
983*c83a76b0SSuyog Pawar             {
984*c83a76b0SSuyog Pawar                 ihevce_dmgr_chk_row_row_sync(
985*c83a76b0SSuyog Pawar                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
986*c83a76b0SSuyog Pawar                     curr_cu_pos_in_row,
987*c83a76b0SSuyog Pawar                     cu_top_right_offset,
988*c83a76b0SSuyog Pawar                     cu_top_right_dep_pos,
989*c83a76b0SSuyog Pawar                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
990*c83a76b0SSuyog Pawar                     ps_ctxt->thrd_id);
991*c83a76b0SSuyog Pawar             }
992*c83a76b0SSuyog Pawar         }
993*c83a76b0SSuyog Pawar         ctr = 0;
994*c83a76b0SSuyog Pawar 
995*c83a76b0SSuyog Pawar         /* Zero cbf tool is disabled for intra CUs */
996*c83a76b0SSuyog Pawar #if ENABLE_ZERO_CBF_IN_INTRA
997*c83a76b0SSuyog Pawar         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
998*c83a76b0SSuyog Pawar #else
999*c83a76b0SSuyog Pawar         ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
1000*c83a76b0SSuyog Pawar #endif
1001*c83a76b0SSuyog Pawar 
1002*c83a76b0SSuyog Pawar         /* Intra Mode gating based on MPM cand list and encoder quality preset */
1003*c83a76b0SSuyog Pawar         if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
1004*c83a76b0SSuyog Pawar         {
1005*c83a76b0SSuyog Pawar             ihevce_mpm_idx_based_filter_RDOPT_cand(
1006*c83a76b0SSuyog Pawar                 ps_ctxt,
1007*c83a76b0SSuyog Pawar                 ps_cu_analyse,
1008*c83a76b0SSuyog Pawar                 ps_left_nbr_4x4,
1009*c83a76b0SSuyog Pawar                 ps_top_nbr_4x4,
1010*c83a76b0SSuyog Pawar                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1011*c83a76b0SSuyog Pawar                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1012*c83a76b0SSuyog Pawar 
1013*c83a76b0SSuyog Pawar             ihevce_mpm_idx_based_filter_RDOPT_cand(
1014*c83a76b0SSuyog Pawar                 ps_ctxt,
1015*c83a76b0SSuyog Pawar                 ps_cu_analyse,
1016*c83a76b0SSuyog Pawar                 ps_left_nbr_4x4,
1017*c83a76b0SSuyog Pawar                 ps_top_nbr_4x4,
1018*c83a76b0SSuyog Pawar                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1019*c83a76b0SSuyog Pawar                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1020*c83a76b0SSuyog Pawar         }
1021*c83a76b0SSuyog Pawar 
1022*c83a76b0SSuyog Pawar         /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1023*c83a76b0SSuyog Pawar         if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1024*c83a76b0SSuyog Pawar         {
1025*c83a76b0SSuyog Pawar             /* For cu_size = 64, there won't be any TU_EQ_CU case */
1026*c83a76b0SSuyog Pawar             if(64 != ps_cu_analyse->u1_cu_size)
1027*c83a76b0SSuyog Pawar             {
1028*c83a76b0SSuyog Pawar                 /* RDOPT copy States : Prev Cu best to current init */
1029*c83a76b0SSuyog Pawar                 COPY_CABAC_STATES(
1030*c83a76b0SSuyog Pawar                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1031*c83a76b0SSuyog Pawar                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1032*c83a76b0SSuyog Pawar                     IHEVC_CAB_CTXT_END);
1033*c83a76b0SSuyog Pawar 
1034*c83a76b0SSuyog Pawar                 /* RDOPT related copies and settings */
1035*c83a76b0SSuyog Pawar                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1036*c83a76b0SSuyog Pawar 
1037*c83a76b0SSuyog Pawar                 /* Calc. best SATD mode for TU_EQ_CU case */
1038*c83a76b0SSuyog Pawar                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1039*c83a76b0SSuyog Pawar                     ps_ctxt,
1040*c83a76b0SSuyog Pawar                     &s_chrm_cu_buf_prms,
1041*c83a76b0SSuyog Pawar                     ps_cu_analyse,
1042*c83a76b0SSuyog Pawar                     rd_opt_curr_idx,
1043*c83a76b0SSuyog Pawar                     TU_EQ_CU,
1044*c83a76b0SSuyog Pawar                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1045*c83a76b0SSuyog Pawar                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1046*c83a76b0SSuyog Pawar                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1047*c83a76b0SSuyog Pawar                                                  100.0,
1048*c83a76b0SSuyog Pawar                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1049*c83a76b0SSuyog Pawar 
1050*c83a76b0SSuyog Pawar #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1051*c83a76b0SSuyog Pawar                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1052*c83a76b0SSuyog Pawar                 {
1053*c83a76b0SSuyog Pawar                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1054*c83a76b0SSuyog Pawar                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1055*c83a76b0SSuyog Pawar                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1056*c83a76b0SSuyog Pawar                 }
1057*c83a76b0SSuyog Pawar #endif
1058*c83a76b0SSuyog Pawar             }
1059*c83a76b0SSuyog Pawar 
1060*c83a76b0SSuyog Pawar             /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1061*c83a76b0SSuyog Pawar             TU_EQ_CU_DIV2 case */
1062*c83a76b0SSuyog Pawar 
1063*c83a76b0SSuyog Pawar             if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1064*c83a76b0SSuyog Pawar                 255) &&
1065*c83a76b0SSuyog Pawar                (8 != ps_cu_analyse->u1_cu_size))
1066*c83a76b0SSuyog Pawar             {
1067*c83a76b0SSuyog Pawar                 /* RDOPT copy States : Prev Cu best to current init */
1068*c83a76b0SSuyog Pawar                 COPY_CABAC_STATES(
1069*c83a76b0SSuyog Pawar                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1070*c83a76b0SSuyog Pawar                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1071*c83a76b0SSuyog Pawar                     IHEVC_CAB_CTXT_END);
1072*c83a76b0SSuyog Pawar 
1073*c83a76b0SSuyog Pawar                 /* RDOPT related copies and settings */
1074*c83a76b0SSuyog Pawar                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1075*c83a76b0SSuyog Pawar 
1076*c83a76b0SSuyog Pawar                 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1077*c83a76b0SSuyog Pawar                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1078*c83a76b0SSuyog Pawar                     ps_ctxt,
1079*c83a76b0SSuyog Pawar                     &s_chrm_cu_buf_prms,
1080*c83a76b0SSuyog Pawar                     ps_cu_analyse,
1081*c83a76b0SSuyog Pawar                     rd_opt_curr_idx,
1082*c83a76b0SSuyog Pawar                     TU_EQ_CU_DIV2,
1083*c83a76b0SSuyog Pawar                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1084*c83a76b0SSuyog Pawar                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1085*c83a76b0SSuyog Pawar                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1086*c83a76b0SSuyog Pawar                                                  100.0,
1087*c83a76b0SSuyog Pawar                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1088*c83a76b0SSuyog Pawar 
1089*c83a76b0SSuyog Pawar #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1090*c83a76b0SSuyog Pawar                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1091*c83a76b0SSuyog Pawar                 {
1092*c83a76b0SSuyog Pawar                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1093*c83a76b0SSuyog Pawar                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1094*c83a76b0SSuyog Pawar                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1095*c83a76b0SSuyog Pawar                 }
1096*c83a76b0SSuyog Pawar #endif
1097*c83a76b0SSuyog Pawar             }
1098*c83a76b0SSuyog Pawar         }
1099*c83a76b0SSuyog Pawar 
1100*c83a76b0SSuyog Pawar         while(0 == end_flag)
1101*c83a76b0SSuyog Pawar         {
1102*c83a76b0SSuyog Pawar             UWORD8 *pu1_mode = NULL;
1103*c83a76b0SSuyog Pawar             WORD32 curr_func_mode = 0;
1104*c83a76b0SSuyog Pawar             void *pv_pred;
1105*c83a76b0SSuyog Pawar 
1106*c83a76b0SSuyog Pawar             ASSERT(ctr < 36);
1107*c83a76b0SSuyog Pawar 
1108*c83a76b0SSuyog Pawar             /* TU equal to CU size evaluation of different modes */
1109*c83a76b0SSuyog Pawar             if(0 == cu_eval_done)
1110*c83a76b0SSuyog Pawar             {
1111*c83a76b0SSuyog Pawar                 /* check if the all the modes have been evaluated */
1112*c83a76b0SSuyog Pawar                 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1113*c83a76b0SSuyog Pawar                 {
1114*c83a76b0SSuyog Pawar                     cu_eval_done = 1;
1115*c83a76b0SSuyog Pawar                     ctr = 0;
1116*c83a76b0SSuyog Pawar                 }
1117*c83a76b0SSuyog Pawar                 else if(
1118*c83a76b0SSuyog Pawar                     (1 == ctr) &&
1119*c83a76b0SSuyog Pawar                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1120*c83a76b0SSuyog Pawar                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1121*c83a76b0SSuyog Pawar                     (ps_ctxt->i1_slice_type != ISLICE))
1122*c83a76b0SSuyog Pawar                 {
1123*c83a76b0SSuyog Pawar                     ctr = 0;
1124*c83a76b0SSuyog Pawar                     cu_eval_done = 1;
1125*c83a76b0SSuyog Pawar                     subcu_eval_done = 1;
1126*c83a76b0SSuyog Pawar                     subpu_eval_done = 1;
1127*c83a76b0SSuyog Pawar                 }
1128*c83a76b0SSuyog Pawar                 else
1129*c83a76b0SSuyog Pawar                 {
1130*c83a76b0SSuyog Pawar                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1131*c83a76b0SSuyog Pawar                     {
1132*c83a76b0SSuyog Pawar                         ctr++;
1133*c83a76b0SSuyog Pawar                         continue;
1134*c83a76b0SSuyog Pawar                     }
1135*c83a76b0SSuyog Pawar 
1136*c83a76b0SSuyog Pawar                     pu1_mode =
1137*c83a76b0SSuyog Pawar                         &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1138*c83a76b0SSuyog Pawar                     ctr++;
1139*c83a76b0SSuyog Pawar                     curr_func_mode = TU_EQ_CU;
1140*c83a76b0SSuyog Pawar                 }
1141*c83a76b0SSuyog Pawar             }
1142*c83a76b0SSuyog Pawar             /* Sub CU (NXN) mode evaluation of different pred modes */
1143*c83a76b0SSuyog Pawar             if((0 == subpu_eval_done) && (1 == cu_eval_done))
1144*c83a76b0SSuyog Pawar             {
1145*c83a76b0SSuyog Pawar                 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1146*c83a76b0SSuyog Pawar                 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1147*c83a76b0SSuyog Pawar                 {
1148*c83a76b0SSuyog Pawar                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1149*c83a76b0SSuyog Pawar 
1150*c83a76b0SSuyog Pawar                     curr_func_mode = TU_EQ_SUBCU;
1151*c83a76b0SSuyog Pawar                     /* check if the any modes have to be evaluated */
1152*c83a76b0SSuyog Pawar                     if(255 == *pu1_mode)
1153*c83a76b0SSuyog Pawar                     {
1154*c83a76b0SSuyog Pawar                         subpu_eval_done = 1;
1155*c83a76b0SSuyog Pawar                         ctr = 0;
1156*c83a76b0SSuyog Pawar                     }
1157*c83a76b0SSuyog Pawar                     else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1158*c83a76b0SSuyog Pawar                     {
1159*c83a76b0SSuyog Pawar                         subpu_eval_done = 1;
1160*c83a76b0SSuyog Pawar                         ctr = 0;
1161*c83a76b0SSuyog Pawar                     }
1162*c83a76b0SSuyog Pawar                     else
1163*c83a76b0SSuyog Pawar                     {
1164*c83a76b0SSuyog Pawar                         ctr++;
1165*c83a76b0SSuyog Pawar                     }
1166*c83a76b0SSuyog Pawar                 }
1167*c83a76b0SSuyog Pawar             }
1168*c83a76b0SSuyog Pawar 
1169*c83a76b0SSuyog Pawar             /* TU size equal to CU div2 mode evaluation of different pred modes */
1170*c83a76b0SSuyog Pawar             if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1171*c83a76b0SSuyog Pawar             {
1172*c83a76b0SSuyog Pawar                 /* check if the all the modes have been evaluated */
1173*c83a76b0SSuyog Pawar                 if(255 ==
1174*c83a76b0SSuyog Pawar                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1175*c83a76b0SSuyog Pawar                 {
1176*c83a76b0SSuyog Pawar                     subcu_eval_done = 1;
1177*c83a76b0SSuyog Pawar                 }
1178*c83a76b0SSuyog Pawar                 else if(
1179*c83a76b0SSuyog Pawar                     (1 == ctr) &&
1180*c83a76b0SSuyog Pawar                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1181*c83a76b0SSuyog Pawar                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1182*c83a76b0SSuyog Pawar                     (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1183*c83a76b0SSuyog Pawar                 {
1184*c83a76b0SSuyog Pawar                     subcu_eval_done = 1;
1185*c83a76b0SSuyog Pawar                 }
1186*c83a76b0SSuyog Pawar                 else
1187*c83a76b0SSuyog Pawar                 {
1188*c83a76b0SSuyog Pawar                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1189*c83a76b0SSuyog Pawar                     {
1190*c83a76b0SSuyog Pawar                         ctr++;
1191*c83a76b0SSuyog Pawar                         continue;
1192*c83a76b0SSuyog Pawar                     }
1193*c83a76b0SSuyog Pawar 
1194*c83a76b0SSuyog Pawar                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1195*c83a76b0SSuyog Pawar                                     .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1196*c83a76b0SSuyog Pawar 
1197*c83a76b0SSuyog Pawar                     ctr++;
1198*c83a76b0SSuyog Pawar                     curr_func_mode = TU_EQ_CU_DIV2;
1199*c83a76b0SSuyog Pawar                 }
1200*c83a76b0SSuyog Pawar             }
1201*c83a76b0SSuyog Pawar 
1202*c83a76b0SSuyog Pawar             /* check if all CU option have been evalueted */
1203*c83a76b0SSuyog Pawar             if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1204*c83a76b0SSuyog Pawar             {
1205*c83a76b0SSuyog Pawar                 break;
1206*c83a76b0SSuyog Pawar             }
1207*c83a76b0SSuyog Pawar 
1208*c83a76b0SSuyog Pawar             /* RDOPT related copies and settings */
1209*c83a76b0SSuyog Pawar             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1210*c83a76b0SSuyog Pawar 
1211*c83a76b0SSuyog Pawar             /* Assign ME/Intra pred buf. to the current intra cand. since we
1212*c83a76b0SSuyog Pawar             are storing pred data for final_reon function */
1213*c83a76b0SSuyog Pawar             {
1214*c83a76b0SSuyog Pawar                 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1215*c83a76b0SSuyog Pawar             }
1216*c83a76b0SSuyog Pawar 
1217*c83a76b0SSuyog Pawar             /* RDOPT copy States : Prev Cu best to current init */
1218*c83a76b0SSuyog Pawar             COPY_CABAC_STATES(
1219*c83a76b0SSuyog Pawar                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1220*c83a76b0SSuyog Pawar                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1221*c83a76b0SSuyog Pawar                 IHEVC_CAB_CTXT_END);
1222*c83a76b0SSuyog Pawar 
1223*c83a76b0SSuyog Pawar             /* call the function which performs the normative Intra encode */
1224*c83a76b0SSuyog Pawar             rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1225*c83a76b0SSuyog Pawar                 ps_ctxt,
1226*c83a76b0SSuyog Pawar                 ps_cu_prms,
1227*c83a76b0SSuyog Pawar                 pv_pred,
1228*c83a76b0SSuyog Pawar                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1229*c83a76b0SSuyog Pawar                 &s_chrm_cu_buf_prms,
1230*c83a76b0SSuyog Pawar                 pu1_mode,
1231*c83a76b0SSuyog Pawar                 ps_cu_analyse,
1232*c83a76b0SSuyog Pawar                 pv_curr_src,
1233*c83a76b0SSuyog Pawar                 pv_cu_left,
1234*c83a76b0SSuyog Pawar                 pv_cu_top,
1235*c83a76b0SSuyog Pawar                 pv_cu_top_left,
1236*c83a76b0SSuyog Pawar                 ps_left_nbr_4x4,
1237*c83a76b0SSuyog Pawar                 ps_top_nbr_4x4,
1238*c83a76b0SSuyog Pawar                 nbr_4x4_left_strd,
1239*c83a76b0SSuyog Pawar                 cu_left_stride,
1240*c83a76b0SSuyog Pawar                 rd_opt_curr_idx,
1241*c83a76b0SSuyog Pawar                 curr_func_mode,
1242*c83a76b0SSuyog Pawar                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1243*c83a76b0SSuyog Pawar                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1244*c83a76b0SSuyog Pawar                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1245*c83a76b0SSuyog Pawar                                              100.0);
1246*c83a76b0SSuyog Pawar 
1247*c83a76b0SSuyog Pawar #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1248*c83a76b0SSuyog Pawar             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1249*c83a76b0SSuyog Pawar             {
1250*c83a76b0SSuyog Pawar                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1251*c83a76b0SSuyog Pawar                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1252*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1253*c83a76b0SSuyog Pawar             }
1254*c83a76b0SSuyog Pawar #endif
1255*c83a76b0SSuyog Pawar 
1256*c83a76b0SSuyog Pawar             /* based on the rd opt cost choose the best and current index */
1257*c83a76b0SSuyog Pawar             if(rd_opt_cost < rd_opt_least_cost)
1258*c83a76b0SSuyog Pawar             {
1259*c83a76b0SSuyog Pawar                 /* swap the best and current indx */
1260*c83a76b0SSuyog Pawar                 rd_opt_best_idx = !rd_opt_best_idx;
1261*c83a76b0SSuyog Pawar                 rd_opt_curr_idx = !rd_opt_curr_idx;
1262*c83a76b0SSuyog Pawar                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1263*c83a76b0SSuyog Pawar 
1264*c83a76b0SSuyog Pawar                 rd_opt_least_cost = rd_opt_cost;
1265*c83a76b0SSuyog Pawar                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1266*c83a76b0SSuyog Pawar             }
1267*c83a76b0SSuyog Pawar 
1268*c83a76b0SSuyog Pawar             if((TU_EQ_SUBCU == curr_func_mode) &&
1269*c83a76b0SSuyog Pawar                (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1270*c83a76b0SSuyog Pawar                (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1271*c83a76b0SSuyog Pawar             {
1272*c83a76b0SSuyog Pawar                 UWORD8 au1_tu_eq_cu_div2_modes[4];
1273*c83a76b0SSuyog Pawar                 UWORD8 au1_freq_of_mode[4];
1274*c83a76b0SSuyog Pawar 
1275*c83a76b0SSuyog Pawar                 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1276*c83a76b0SSuyog Pawar                 {
1277*c83a76b0SSuyog Pawar                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1278*c83a76b0SSuyog Pawar                         255;  //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1279*c83a76b0SSuyog Pawar                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1280*c83a76b0SSuyog Pawar                         255;
1281*c83a76b0SSuyog Pawar                 }
1282*c83a76b0SSuyog Pawar                 else
1283*c83a76b0SSuyog Pawar                 {
1284*c83a76b0SSuyog Pawar                     WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1285*c83a76b0SSuyog Pawar                         ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1286*c83a76b0SSuyog Pawar                         au1_tu_eq_cu_div2_modes,
1287*c83a76b0SSuyog Pawar                         au1_freq_of_mode,
1288*c83a76b0SSuyog Pawar                         4);
1289*c83a76b0SSuyog Pawar 
1290*c83a76b0SSuyog Pawar                     if(2 == i4_num_clusters)
1291*c83a76b0SSuyog Pawar                     {
1292*c83a76b0SSuyog Pawar                         if(au1_freq_of_mode[0] == 3)
1293*c83a76b0SSuyog Pawar                         {
1294*c83a76b0SSuyog Pawar                             ps_cu_analyse->s_cu_intra_cand
1295*c83a76b0SSuyog Pawar                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1296*c83a76b0SSuyog Pawar                                 au1_tu_eq_cu_div2_modes[0];
1297*c83a76b0SSuyog Pawar                             ps_cu_analyse->s_cu_intra_cand
1298*c83a76b0SSuyog Pawar                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1299*c83a76b0SSuyog Pawar                         }
1300*c83a76b0SSuyog Pawar                         else if(au1_freq_of_mode[1] == 3)
1301*c83a76b0SSuyog Pawar                         {
1302*c83a76b0SSuyog Pawar                             ps_cu_analyse->s_cu_intra_cand
1303*c83a76b0SSuyog Pawar                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1304*c83a76b0SSuyog Pawar                                 au1_tu_eq_cu_div2_modes[1];
1305*c83a76b0SSuyog Pawar                             ps_cu_analyse->s_cu_intra_cand
1306*c83a76b0SSuyog Pawar                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1307*c83a76b0SSuyog Pawar                         }
1308*c83a76b0SSuyog Pawar                         else
1309*c83a76b0SSuyog Pawar                         {
1310*c83a76b0SSuyog Pawar                             ps_cu_analyse->s_cu_intra_cand
1311*c83a76b0SSuyog Pawar                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1312*c83a76b0SSuyog Pawar                                 au1_tu_eq_cu_div2_modes[0];
1313*c83a76b0SSuyog Pawar                             ps_cu_analyse->s_cu_intra_cand
1314*c83a76b0SSuyog Pawar                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1315*c83a76b0SSuyog Pawar                                 au1_tu_eq_cu_div2_modes[1];
1316*c83a76b0SSuyog Pawar                             ps_cu_analyse->s_cu_intra_cand
1317*c83a76b0SSuyog Pawar                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1318*c83a76b0SSuyog Pawar                         }
1319*c83a76b0SSuyog Pawar                     }
1320*c83a76b0SSuyog Pawar                 }
1321*c83a76b0SSuyog Pawar             }
1322*c83a76b0SSuyog Pawar 
1323*c83a76b0SSuyog Pawar             /* set the neighbour map to 0 */
1324*c83a76b0SSuyog Pawar             ihevce_set_nbr_map(
1325*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
1326*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
1327*c83a76b0SSuyog Pawar                 (ps_cu_analyse->b3_cu_pos_x << 1),
1328*c83a76b0SSuyog Pawar                 (ps_cu_analyse->b3_cu_pos_y << 1),
1329*c83a76b0SSuyog Pawar                 (ps_cu_analyse->u1_cu_size >> 2),
1330*c83a76b0SSuyog Pawar                 0);
1331*c83a76b0SSuyog Pawar         }
1332*c83a76b0SSuyog Pawar 
1333*c83a76b0SSuyog Pawar     } /* end of Intra RD OPT cand evaluation */
1334*c83a76b0SSuyog Pawar 
1335*c83a76b0SSuyog Pawar     ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1336*c83a76b0SSuyog Pawar     ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1337*c83a76b0SSuyog Pawar     ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1338*c83a76b0SSuyog Pawar 
1339*c83a76b0SSuyog Pawar     /* --------------------------------------- */
1340*c83a76b0SSuyog Pawar     /* --------Final mode Recon ---------- */
1341*c83a76b0SSuyog Pawar     /* --------------------------------------- */
1342*c83a76b0SSuyog Pawar     {
1343*c83a76b0SSuyog Pawar         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1344*c83a76b0SSuyog Pawar         void *pv_final_pred = NULL;
1345*c83a76b0SSuyog Pawar         WORD32 final_pred_strd = 0;
1346*c83a76b0SSuyog Pawar         void *pv_final_pred_chrm = NULL;
1347*c83a76b0SSuyog Pawar         WORD32 final_pred_strd_chrm = 0;
1348*c83a76b0SSuyog Pawar         WORD32 packed_pred_mode;
1349*c83a76b0SSuyog Pawar 
1350*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1351*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1352*c83a76b0SSuyog Pawar         {
1353*c83a76b0SSuyog Pawar             pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1354*c83a76b0SSuyog Pawar         }
1355*c83a76b0SSuyog Pawar #else
1356*c83a76b0SSuyog Pawar         pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1357*c83a76b0SSuyog Pawar #endif
1358*c83a76b0SSuyog Pawar 
1359*c83a76b0SSuyog Pawar         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1360*c83a76b0SSuyog Pawar         packed_pred_mode =
1361*c83a76b0SSuyog Pawar             ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1362*c83a76b0SSuyog Pawar 
1363*c83a76b0SSuyog Pawar         if(!ps_ctxt->u1_is_input_data_hbd)
1364*c83a76b0SSuyog Pawar         {
1365*c83a76b0SSuyog Pawar             if(ps_enc_loop_bestprms->u1_intra_flag)
1366*c83a76b0SSuyog Pawar             {
1367*c83a76b0SSuyog Pawar                 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1368*c83a76b0SSuyog Pawar                 final_pred_strd =
1369*c83a76b0SSuyog Pawar                     ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1370*c83a76b0SSuyog Pawar             }
1371*c83a76b0SSuyog Pawar             else
1372*c83a76b0SSuyog Pawar             {
1373*c83a76b0SSuyog Pawar                 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1374*c83a76b0SSuyog Pawar                 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1375*c83a76b0SSuyog Pawar             }
1376*c83a76b0SSuyog Pawar 
1377*c83a76b0SSuyog Pawar             pv_final_pred_chrm =
1378*c83a76b0SSuyog Pawar                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1379*c83a76b0SSuyog Pawar                 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1380*c83a76b0SSuyog Pawar                                    (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1381*c83a76b0SSuyog Pawar             final_pred_strd_chrm =
1382*c83a76b0SSuyog Pawar                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1383*c83a76b0SSuyog Pawar         }
1384*c83a76b0SSuyog Pawar 
1385*c83a76b0SSuyog Pawar         ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1386*c83a76b0SSuyog Pawar 
1387*c83a76b0SSuyog Pawar         {
1388*c83a76b0SSuyog Pawar             final_mode_process_prms_t s_prms;
1389*c83a76b0SSuyog Pawar 
1390*c83a76b0SSuyog Pawar             void *pv_cu_luma_recon;
1391*c83a76b0SSuyog Pawar             void *pv_cu_chroma_recon;
1392*c83a76b0SSuyog Pawar             WORD32 luma_stride, chroma_stride;
1393*c83a76b0SSuyog Pawar 
1394*c83a76b0SSuyog Pawar             if(!ps_ctxt->u1_is_input_data_hbd)
1395*c83a76b0SSuyog Pawar             {
1396*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1397*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1398*c83a76b0SSuyog Pawar                 {
1399*c83a76b0SSuyog Pawar                     pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1400*c83a76b0SSuyog Pawar                     pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1401*c83a76b0SSuyog Pawar                     luma_stride = ps_cu_analyse->u1_cu_size;
1402*c83a76b0SSuyog Pawar                     chroma_stride = ps_cu_analyse->u1_cu_size;
1403*c83a76b0SSuyog Pawar                 }
1404*c83a76b0SSuyog Pawar                 else
1405*c83a76b0SSuyog Pawar                 {
1406*c83a76b0SSuyog Pawar                     /* based on CU position derive the luma pointers */
1407*c83a76b0SSuyog Pawar                     pv_cu_luma_recon = pu1_final_recon;
1408*c83a76b0SSuyog Pawar 
1409*c83a76b0SSuyog Pawar                     /* based on CU position derive the chroma pointers */
1410*c83a76b0SSuyog Pawar                     pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1411*c83a76b0SSuyog Pawar 
1412*c83a76b0SSuyog Pawar                     luma_stride = ps_cu_prms->i4_luma_recon_stride;
1413*c83a76b0SSuyog Pawar 
1414*c83a76b0SSuyog Pawar                     chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1415*c83a76b0SSuyog Pawar                 }
1416*c83a76b0SSuyog Pawar #else
1417*c83a76b0SSuyog Pawar                 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1418*c83a76b0SSuyog Pawar                 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1419*c83a76b0SSuyog Pawar                 luma_stride = ps_cu_analyse->u1_cu_size;
1420*c83a76b0SSuyog Pawar                 chroma_stride = ps_cu_analyse->u1_cu_size;
1421*c83a76b0SSuyog Pawar #endif
1422*c83a76b0SSuyog Pawar 
1423*c83a76b0SSuyog Pawar                 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1424*c83a76b0SSuyog Pawar                 s_prms.ps_best_inter_cand = ps_best_inter_cand;
1425*c83a76b0SSuyog Pawar                 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1426*c83a76b0SSuyog Pawar                 s_prms.packed_pred_mode = packed_pred_mode;
1427*c83a76b0SSuyog Pawar                 s_prms.rd_opt_best_idx = rd_opt_best_idx;
1428*c83a76b0SSuyog Pawar                 s_prms.pv_src = pu1_curr_src;
1429*c83a76b0SSuyog Pawar                 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1430*c83a76b0SSuyog Pawar                 s_prms.pv_pred = pv_final_pred;
1431*c83a76b0SSuyog Pawar                 s_prms.pred_strd = final_pred_strd;
1432*c83a76b0SSuyog Pawar                 s_prms.pv_pred_chrm = pv_final_pred_chrm;
1433*c83a76b0SSuyog Pawar                 s_prms.pred_chrm_strd = final_pred_strd_chrm;
1434*c83a76b0SSuyog Pawar                 s_prms.pu1_final_ecd_data = pu1_ecd_data;
1435*c83a76b0SSuyog Pawar                 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1436*c83a76b0SSuyog Pawar                 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1437*c83a76b0SSuyog Pawar                 s_prms.pv_luma_recon = pv_cu_luma_recon;
1438*c83a76b0SSuyog Pawar                 s_prms.recon_luma_strd = luma_stride;
1439*c83a76b0SSuyog Pawar                 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1440*c83a76b0SSuyog Pawar                 s_prms.recon_chrma_strd = chroma_stride;
1441*c83a76b0SSuyog Pawar                 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1442*c83a76b0SSuyog Pawar                 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1443*c83a76b0SSuyog Pawar                 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1444*c83a76b0SSuyog Pawar                 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1445*c83a76b0SSuyog Pawar                 s_prms.u1_will_cabac_state_change = 1;
1446*c83a76b0SSuyog Pawar                 s_prms.u1_recompute_sbh_and_rdoq = 0;
1447*c83a76b0SSuyog Pawar                 s_prms.u1_is_first_pass = 1;
1448*c83a76b0SSuyog Pawar             }
1449*c83a76b0SSuyog Pawar 
1450*c83a76b0SSuyog Pawar #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1451*c83a76b0SSuyog Pawar             s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1452*c83a76b0SSuyog Pawar                                         ? ps_cu_prms->u1_is_cu_noisy
1453*c83a76b0SSuyog Pawar                                         : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1454*c83a76b0SSuyog Pawar #endif
1455*c83a76b0SSuyog Pawar 
1456*c83a76b0SSuyog Pawar             ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1457*c83a76b0SSuyog Pawar 
1458*c83a76b0SSuyog Pawar #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1459*c83a76b0SSuyog Pawar             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1460*c83a76b0SSuyog Pawar             {
1461*c83a76b0SSuyog Pawar                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1462*c83a76b0SSuyog Pawar                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1463*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1464*c83a76b0SSuyog Pawar             }
1465*c83a76b0SSuyog Pawar #endif
1466*c83a76b0SSuyog Pawar         }
1467*c83a76b0SSuyog Pawar     }
1468*c83a76b0SSuyog Pawar 
1469*c83a76b0SSuyog Pawar     /* --------------------------------------- */
1470*c83a76b0SSuyog Pawar     /* --------Populate CU out prms ---------- */
1471*c83a76b0SSuyog Pawar     /* --------------------------------------- */
1472*c83a76b0SSuyog Pawar     {
1473*c83a76b0SSuyog Pawar         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1474*c83a76b0SSuyog Pawar         UWORD8 *pu1_pu_map;
1475*c83a76b0SSuyog Pawar         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1476*c83a76b0SSuyog Pawar 
1477*c83a76b0SSuyog Pawar         /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1478*c83a76b0SSuyog Pawar         /* then it has to be coded as skip CU */
1479*c83a76b0SSuyog Pawar         if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1480*c83a76b0SSuyog Pawar            (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1481*c83a76b0SSuyog Pawar            (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1482*c83a76b0SSuyog Pawar         {
1483*c83a76b0SSuyog Pawar             ps_enc_loop_bestprms->u1_skip_flag = 1;
1484*c83a76b0SSuyog Pawar         }
1485*c83a76b0SSuyog Pawar 
1486*c83a76b0SSuyog Pawar         /* update number PUs in CU */
1487*c83a76b0SSuyog Pawar         ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1488*c83a76b0SSuyog Pawar 
1489*c83a76b0SSuyog Pawar         /* ---- populate the colocated pu map index --- */
1490*c83a76b0SSuyog Pawar         for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1491*c83a76b0SSuyog Pawar         {
1492*c83a76b0SSuyog Pawar             WORD32 i;
1493*c83a76b0SSuyog Pawar             WORD32 vert_ht;
1494*c83a76b0SSuyog Pawar             WORD32 horz_wd;
1495*c83a76b0SSuyog Pawar 
1496*c83a76b0SSuyog Pawar             if(ps_enc_loop_bestprms->u1_intra_flag)
1497*c83a76b0SSuyog Pawar             {
1498*c83a76b0SSuyog Pawar                 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1499*c83a76b0SSuyog Pawar                 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1500*c83a76b0SSuyog Pawar                 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1501*c83a76b0SSuyog Pawar             }
1502*c83a76b0SSuyog Pawar             else
1503*c83a76b0SSuyog Pawar             {
1504*c83a76b0SSuyog Pawar                 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1505*c83a76b0SSuyog Pawar                 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1506*c83a76b0SSuyog Pawar             }
1507*c83a76b0SSuyog Pawar 
1508*c83a76b0SSuyog Pawar             pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1509*c83a76b0SSuyog Pawar             pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1510*c83a76b0SSuyog Pawar 
1511*c83a76b0SSuyog Pawar             for(i = 0; i < vert_ht; i++)
1512*c83a76b0SSuyog Pawar             {
1513*c83a76b0SSuyog Pawar                 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1514*c83a76b0SSuyog Pawar                 pu1_pu_map += num_4x4_in_ctb;
1515*c83a76b0SSuyog Pawar             }
1516*c83a76b0SSuyog Pawar             /* increment the index */
1517*c83a76b0SSuyog Pawar             col_start_pu_idx++;
1518*c83a76b0SSuyog Pawar         }
1519*c83a76b0SSuyog Pawar         /* ---- copy the colocated PUs to frm pu ----- */
1520*c83a76b0SSuyog Pawar         memcpy(
1521*c83a76b0SSuyog Pawar             ps_col_pu,
1522*c83a76b0SSuyog Pawar             &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1523*c83a76b0SSuyog Pawar             ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1524*c83a76b0SSuyog Pawar 
1525*c83a76b0SSuyog Pawar         /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1526*c83a76b0SSuyog Pawar         {
1527*c83a76b0SSuyog Pawar             entropy_context_t *ps_entropy_ctxt;
1528*c83a76b0SSuyog Pawar 
1529*c83a76b0SSuyog Pawar             WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1530*c83a76b0SSuyog Pawar 
1531*c83a76b0SSuyog Pawar             WORD32 log2_min_cu_qp_delta_size;
1532*c83a76b0SSuyog Pawar             UWORD32 block_addr_align;
1533*c83a76b0SSuyog Pawar             ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1534*c83a76b0SSuyog Pawar 
1535*c83a76b0SSuyog Pawar             log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1536*c83a76b0SSuyog Pawar             diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1537*c83a76b0SSuyog Pawar 
1538*c83a76b0SSuyog Pawar             log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1539*c83a76b0SSuyog Pawar             block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1540*c83a76b0SSuyog Pawar 
1541*c83a76b0SSuyog Pawar             ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1542*c83a76b0SSuyog Pawar             ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1543*c83a76b0SSuyog Pawar             /*Update the Qp value used. It will not have a valid value iff
1544*c83a76b0SSuyog Pawar             current CU is (skipped/no_cbf). In that case the Qp needed for
1545*c83a76b0SSuyog Pawar             deblocking is calculated from top/left/previous coded CU*/
1546*c83a76b0SSuyog Pawar 
1547*c83a76b0SSuyog Pawar             ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1548*c83a76b0SSuyog Pawar 
1549*c83a76b0SSuyog Pawar             if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1550*c83a76b0SSuyog Pawar                ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1551*c83a76b0SSuyog Pawar             {
1552*c83a76b0SSuyog Pawar                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1553*c83a76b0SSuyog Pawar             }
1554*c83a76b0SSuyog Pawar             else
1555*c83a76b0SSuyog Pawar             {
1556*c83a76b0SSuyog Pawar                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1557*c83a76b0SSuyog Pawar             }
1558*c83a76b0SSuyog Pawar         }
1559*c83a76b0SSuyog Pawar 
1560*c83a76b0SSuyog Pawar         /* -- at the end of CU set the neighbour map to 1 -- */
1561*c83a76b0SSuyog Pawar         ihevce_set_nbr_map(
1562*c83a76b0SSuyog Pawar             ps_ctxt->pu1_ctb_nbr_map,
1563*c83a76b0SSuyog Pawar             ps_ctxt->i4_nbr_map_strd,
1564*c83a76b0SSuyog Pawar             (ps_cu_analyse->b3_cu_pos_x << 1),
1565*c83a76b0SSuyog Pawar             (ps_cu_analyse->b3_cu_pos_y << 1),
1566*c83a76b0SSuyog Pawar             (ps_cu_analyse->u1_cu_size >> 2),
1567*c83a76b0SSuyog Pawar             1);
1568*c83a76b0SSuyog Pawar 
1569*c83a76b0SSuyog Pawar         /* -- at the end of CU update best cabac rdopt states -- */
1570*c83a76b0SSuyog Pawar         /* -- and also set the top row skip flags  ------------- */
1571*c83a76b0SSuyog Pawar         ihevce_entropy_update_best_cu_states(
1572*c83a76b0SSuyog Pawar             &ps_ctxt->s_rdopt_entropy_ctxt,
1573*c83a76b0SSuyog Pawar             ps_cu_analyse->b3_cu_pos_x,
1574*c83a76b0SSuyog Pawar             ps_cu_analyse->b3_cu_pos_y,
1575*c83a76b0SSuyog Pawar             ps_cu_analyse->u1_cu_size,
1576*c83a76b0SSuyog Pawar             0,
1577*c83a76b0SSuyog Pawar             rd_opt_best_idx);
1578*c83a76b0SSuyog Pawar     }
1579*c83a76b0SSuyog Pawar 
1580*c83a76b0SSuyog Pawar     /* Store Output struct */
1581*c83a76b0SSuyog Pawar #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1582*c83a76b0SSuyog Pawar     {
1583*c83a76b0SSuyog Pawar         {
1584*c83a76b0SSuyog Pawar             memcpy(
1585*c83a76b0SSuyog Pawar                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1586*c83a76b0SSuyog Pawar                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1587*c83a76b0SSuyog Pawar                 sizeof(enc_loop_cu_final_prms_t));
1588*c83a76b0SSuyog Pawar         }
1589*c83a76b0SSuyog Pawar 
1590*c83a76b0SSuyog Pawar         memcpy(
1591*c83a76b0SSuyog Pawar             &ps_ctxt->as_cu_recur_nbr[0],
1592*c83a76b0SSuyog Pawar             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1593*c83a76b0SSuyog Pawar             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1594*c83a76b0SSuyog Pawar                 (ps_cu_analyse->u1_cu_size >> 2));
1595*c83a76b0SSuyog Pawar 
1596*c83a76b0SSuyog Pawar         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1597*c83a76b0SSuyog Pawar 
1598*c83a76b0SSuyog Pawar         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1599*c83a76b0SSuyog Pawar     }
1600*c83a76b0SSuyog Pawar #else
1601*c83a76b0SSuyog Pawar     if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1602*c83a76b0SSuyog Pawar     {
1603*c83a76b0SSuyog Pawar         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1604*c83a76b0SSuyog Pawar 
1605*c83a76b0SSuyog Pawar         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1606*c83a76b0SSuyog Pawar 
1607*c83a76b0SSuyog Pawar         if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1608*c83a76b0SSuyog Pawar         {
1609*c83a76b0SSuyog Pawar             /* Wait till top data is ready          */
1610*c83a76b0SSuyog Pawar             /* Currently checking till top right CU */
1611*c83a76b0SSuyog Pawar             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1612*c83a76b0SSuyog Pawar 
1613*c83a76b0SSuyog Pawar             if(i4_ctb_y_off == 0)
1614*c83a76b0SSuyog Pawar             {
1615*c83a76b0SSuyog Pawar                 /* No wait for 1st row */
1616*c83a76b0SSuyog Pawar                 cu_top_right_offset = -(MAX_CTB_SIZE);
1617*c83a76b0SSuyog Pawar                 {
1618*c83a76b0SSuyog Pawar                     ihevce_tile_params_t *ps_col_tile_params =
1619*c83a76b0SSuyog Pawar                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1620*c83a76b0SSuyog Pawar                          ps_ctxt->i4_tile_col_idx);
1621*c83a76b0SSuyog Pawar 
1622*c83a76b0SSuyog Pawar                     /* No wait for 1st row */
1623*c83a76b0SSuyog Pawar                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1624*c83a76b0SSuyog Pawar                 }
1625*c83a76b0SSuyog Pawar                 cu_top_right_dep_pos = 0;
1626*c83a76b0SSuyog Pawar             }
1627*c83a76b0SSuyog Pawar             else
1628*c83a76b0SSuyog Pawar             {
1629*c83a76b0SSuyog Pawar                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1630*c83a76b0SSuyog Pawar                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1631*c83a76b0SSuyog Pawar             }
1632*c83a76b0SSuyog Pawar 
1633*c83a76b0SSuyog Pawar             if(0 == ps_cu_analyse->b3_cu_pos_y)
1634*c83a76b0SSuyog Pawar             {
1635*c83a76b0SSuyog Pawar                 ihevce_dmgr_chk_row_row_sync(
1636*c83a76b0SSuyog Pawar                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1637*c83a76b0SSuyog Pawar                     curr_cu_pos_in_row,
1638*c83a76b0SSuyog Pawar                     cu_top_right_offset,
1639*c83a76b0SSuyog Pawar                     cu_top_right_dep_pos,
1640*c83a76b0SSuyog Pawar                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1641*c83a76b0SSuyog Pawar                     ps_ctxt->thrd_id);
1642*c83a76b0SSuyog Pawar             }
1643*c83a76b0SSuyog Pawar         }
1644*c83a76b0SSuyog Pawar     }
1645*c83a76b0SSuyog Pawar     else
1646*c83a76b0SSuyog Pawar     {
1647*c83a76b0SSuyog Pawar         {
1648*c83a76b0SSuyog Pawar             memcpy(
1649*c83a76b0SSuyog Pawar                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1650*c83a76b0SSuyog Pawar                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1651*c83a76b0SSuyog Pawar                 sizeof(enc_loop_cu_final_prms_t));
1652*c83a76b0SSuyog Pawar         }
1653*c83a76b0SSuyog Pawar 
1654*c83a76b0SSuyog Pawar         memcpy(
1655*c83a76b0SSuyog Pawar             &ps_ctxt->as_cu_recur_nbr[0],
1656*c83a76b0SSuyog Pawar             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1657*c83a76b0SSuyog Pawar             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1658*c83a76b0SSuyog Pawar                 (ps_cu_analyse->u1_cu_size >> 2));
1659*c83a76b0SSuyog Pawar 
1660*c83a76b0SSuyog Pawar         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1661*c83a76b0SSuyog Pawar 
1662*c83a76b0SSuyog Pawar         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1663*c83a76b0SSuyog Pawar     }
1664*c83a76b0SSuyog Pawar #endif
1665*c83a76b0SSuyog Pawar 
1666*c83a76b0SSuyog Pawar     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1667*c83a76b0SSuyog Pawar         ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1668*c83a76b0SSuyog Pawar 
1669*c83a76b0SSuyog Pawar     return rd_opt_least_cost;
1670*c83a76b0SSuyog Pawar }
1671*c83a76b0SSuyog Pawar 
1672*c83a76b0SSuyog Pawar /*!
1673*c83a76b0SSuyog Pawar ******************************************************************************
1674*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_process_row \endif
1675*c83a76b0SSuyog Pawar *
1676*c83a76b0SSuyog Pawar * \brief
1677*c83a76b0SSuyog Pawar *    Row level enc_loop pass function
1678*c83a76b0SSuyog Pawar *
1679*c83a76b0SSuyog Pawar * \param[in] pv_ctxt : pointer to enc_loop module
1680*c83a76b0SSuyog Pawar * \param[in] ps_curr_src_bufs  : pointer to input yuv buffer (row buffer)
1681*c83a76b0SSuyog Pawar * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1682*c83a76b0SSuyog Pawar * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1683*c83a76b0SSuyog Pawar * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1684*c83a76b0SSuyog Pawar * \param[out] ps_cu_out : pointer CU output structure (row buffer)
1685*c83a76b0SSuyog Pawar * \param[out] ps_tu_out : pointer TU output structure (row buffer)
1686*c83a76b0SSuyog Pawar * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1687*c83a76b0SSuyog Pawar * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1688*c83a76b0SSuyog Pawar *
1689*c83a76b0SSuyog Pawar * \return
1690*c83a76b0SSuyog Pawar *    None
1691*c83a76b0SSuyog Pawar *
1692*c83a76b0SSuyog Pawar * Note : Currently the frame level calcualtions done assumes that
1693*c83a76b0SSuyog Pawar *        framewidth of the input /recon are excat multiple of ctbsize
1694*c83a76b0SSuyog Pawar *
1695*c83a76b0SSuyog Pawar * \author
1696*c83a76b0SSuyog Pawar *  Ittiam
1697*c83a76b0SSuyog Pawar *
1698*c83a76b0SSuyog Pawar *****************************************************************************
1699*c83a76b0SSuyog Pawar */
ihevce_enc_loop_process_row(ihevce_enc_loop_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src_bufs,iv_enc_yuv_buf_t * ps_curr_recon_bufs,iv_enc_yuv_buf_src_t * ps_curr_recon_bufs_src,UWORD8 ** ppu1_y_subpel_planes,ctb_analyse_t * ps_ctb_in,ctb_enc_loop_out_t * ps_ctb_out,ipe_l0_ctb_analyse_for_me_t * ps_row_ipe_analyse,cur_ctb_cu_tree_t * ps_row_cu_tree,cu_enc_loop_out_t * ps_row_cu,tu_enc_loop_out_t * ps_row_tu,pu_t * ps_row_pu,pu_col_mv_t * ps_row_col_pu,UWORD16 * pu2_num_pu_map,UWORD8 * pu1_row_pu_map,UWORD8 * pu1_row_ecd_data,UWORD32 * pu4_pu_offsets,frm_ctb_ctxt_t * ps_frm_ctb_prms,WORD32 vert_ctr,recon_pic_buf_t * ps_frm_recon,void * pv_dep_mngr_encloop_dep_me,pad_interp_recon_frm_t * ps_pad_interp_recon,WORD32 i4_pass,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,ihevce_tile_params_t * ps_tile_params)1700*c83a76b0SSuyog Pawar void ihevce_enc_loop_process_row(
1701*c83a76b0SSuyog Pawar     ihevce_enc_loop_ctxt_t *ps_ctxt,
1702*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_t *ps_curr_src_bufs,
1703*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1704*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1705*c83a76b0SSuyog Pawar     UWORD8 **ppu1_y_subpel_planes,
1706*c83a76b0SSuyog Pawar     ctb_analyse_t *ps_ctb_in,
1707*c83a76b0SSuyog Pawar     ctb_enc_loop_out_t *ps_ctb_out,
1708*c83a76b0SSuyog Pawar     ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1709*c83a76b0SSuyog Pawar     cur_ctb_cu_tree_t *ps_row_cu_tree,
1710*c83a76b0SSuyog Pawar     cu_enc_loop_out_t *ps_row_cu,
1711*c83a76b0SSuyog Pawar     tu_enc_loop_out_t *ps_row_tu,
1712*c83a76b0SSuyog Pawar     pu_t *ps_row_pu,
1713*c83a76b0SSuyog Pawar     pu_col_mv_t *ps_row_col_pu,
1714*c83a76b0SSuyog Pawar     UWORD16 *pu2_num_pu_map,
1715*c83a76b0SSuyog Pawar     UWORD8 *pu1_row_pu_map,
1716*c83a76b0SSuyog Pawar     UWORD8 *pu1_row_ecd_data,
1717*c83a76b0SSuyog Pawar     UWORD32 *pu4_pu_offsets,
1718*c83a76b0SSuyog Pawar     frm_ctb_ctxt_t *ps_frm_ctb_prms,
1719*c83a76b0SSuyog Pawar     WORD32 vert_ctr,
1720*c83a76b0SSuyog Pawar     recon_pic_buf_t *ps_frm_recon,
1721*c83a76b0SSuyog Pawar     void *pv_dep_mngr_encloop_dep_me,
1722*c83a76b0SSuyog Pawar     pad_interp_recon_frm_t *ps_pad_interp_recon,
1723*c83a76b0SSuyog Pawar     WORD32 i4_pass,
1724*c83a76b0SSuyog Pawar     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1725*c83a76b0SSuyog Pawar     ihevce_tile_params_t *ps_tile_params)
1726*c83a76b0SSuyog Pawar {
1727*c83a76b0SSuyog Pawar     enc_loop_cu_prms_t s_cu_prms;
1728*c83a76b0SSuyog Pawar     ctb_enc_loop_out_t *ps_ctb_out_dblk;
1729*c83a76b0SSuyog Pawar 
1730*c83a76b0SSuyog Pawar     WORD32 ctb_ctr, ctb_start, ctb_end;
1731*c83a76b0SSuyog Pawar     WORD32 col_pu_map_idx;
1732*c83a76b0SSuyog Pawar     WORD32 num_ctbs_horz_pic;
1733*c83a76b0SSuyog Pawar     WORD32 ctb_size;
1734*c83a76b0SSuyog Pawar     WORD32 last_ctb_row_flag;
1735*c83a76b0SSuyog Pawar     WORD32 last_ctb_col_flag;
1736*c83a76b0SSuyog Pawar     WORD32 last_hz_ctb_wd;
1737*c83a76b0SSuyog Pawar     WORD32 last_vt_ctb_ht;
1738*c83a76b0SSuyog Pawar     void *pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1739*c83a76b0SSuyog Pawar     void *pv_dep_mngr_enc_loop_sao = ps_ctxt->pv_dep_mngr_enc_loop_sao;
1740*c83a76b0SSuyog Pawar     void *pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1741*c83a76b0SSuyog Pawar     WORD32 dblk_offset, dblk_check_dep_pos;
1742*c83a76b0SSuyog Pawar     WORD32 sao_offset, sao_check_dep_pos;
1743*c83a76b0SSuyog Pawar     WORD32 aux_offset, aux_check_dep_pos;
1744*c83a76b0SSuyog Pawar     void *pv_dep_mngr_me_dep_encloop;
1745*c83a76b0SSuyog Pawar     ctb_enc_loop_out_t *ps_ctb_out_sao;
1746*c83a76b0SSuyog Pawar     /*Structure to store deblocking parameters at CTB-row level*/
1747*c83a76b0SSuyog Pawar     deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1748*c83a76b0SSuyog Pawar     UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1749*c83a76b0SSuyog Pawar 
1750*c83a76b0SSuyog Pawar     pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1751*c83a76b0SSuyog Pawar     num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1752*c83a76b0SSuyog Pawar     ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1753*c83a76b0SSuyog Pawar 
1754*c83a76b0SSuyog Pawar     /* Store the num_ctb_horz in sao context*/
1755*c83a76b0SSuyog Pawar     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1756*c83a76b0SSuyog Pawar     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1757*c83a76b0SSuyog Pawar 
1758*c83a76b0SSuyog Pawar     /* Set Variables for Dep. Checking and Setting */
1759*c83a76b0SSuyog Pawar     aux_check_dep_pos = vert_ctr;
1760*c83a76b0SSuyog Pawar     aux_offset = 2; /* Should be there for 0th row also */
1761*c83a76b0SSuyog Pawar     if(vert_ctr > 0)
1762*c83a76b0SSuyog Pawar     {
1763*c83a76b0SSuyog Pawar         dblk_check_dep_pos = vert_ctr - 1;
1764*c83a76b0SSuyog Pawar         dblk_offset = 2;
1765*c83a76b0SSuyog Pawar     }
1766*c83a76b0SSuyog Pawar     else
1767*c83a76b0SSuyog Pawar     {
1768*c83a76b0SSuyog Pawar         /* First row should run without waiting */
1769*c83a76b0SSuyog Pawar         dblk_check_dep_pos = 0;
1770*c83a76b0SSuyog Pawar         dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1771*c83a76b0SSuyog Pawar     }
1772*c83a76b0SSuyog Pawar 
1773*c83a76b0SSuyog Pawar     /* Set sao_offset and sao_check_dep_pos */
1774*c83a76b0SSuyog Pawar     if(vert_ctr > 1)
1775*c83a76b0SSuyog Pawar     {
1776*c83a76b0SSuyog Pawar         sao_check_dep_pos = vert_ctr - 2;
1777*c83a76b0SSuyog Pawar         sao_offset = 2;
1778*c83a76b0SSuyog Pawar     }
1779*c83a76b0SSuyog Pawar     else
1780*c83a76b0SSuyog Pawar     {
1781*c83a76b0SSuyog Pawar         /* First row should run without waiting */
1782*c83a76b0SSuyog Pawar         sao_check_dep_pos = 0;
1783*c83a76b0SSuyog Pawar         sao_offset = -(ps_tile_params->i4_first_sample_x + 1);
1784*c83a76b0SSuyog Pawar     }
1785*c83a76b0SSuyog Pawar 
1786*c83a76b0SSuyog Pawar     /* check if the current row processed in last CTb row */
1787*c83a76b0SSuyog Pawar     last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1788*c83a76b0SSuyog Pawar 
1789*c83a76b0SSuyog Pawar     /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1790*c83a76b0SSuyog Pawar     last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1791*c83a76b0SSuyog Pawar 
1792*c83a76b0SSuyog Pawar     /* Valid Height (pixels) in the last CTB row (padding cases) */
1793*c83a76b0SSuyog Pawar     last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1794*c83a76b0SSuyog Pawar                      ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1795*c83a76b0SSuyog Pawar     /* reset the states copied flag */
1796*c83a76b0SSuyog Pawar     ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1797*c83a76b0SSuyog Pawar     ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1798*c83a76b0SSuyog Pawar 
1799*c83a76b0SSuyog Pawar     /* populate the cu prms which are common for entire ctb row */
1800*c83a76b0SSuyog Pawar     s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1801*c83a76b0SSuyog Pawar     s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1802*c83a76b0SSuyog Pawar     s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1803*c83a76b0SSuyog Pawar     s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1804*c83a76b0SSuyog Pawar     s_cu_prms.i4_ctb_size = ctb_size;
1805*c83a76b0SSuyog Pawar 
1806*c83a76b0SSuyog Pawar     ps_ctxt->i4_is_first_cu_qg_coded = 0;
1807*c83a76b0SSuyog Pawar 
1808*c83a76b0SSuyog Pawar     /* Initialize the number of PUs for the first CTB to 0 */
1809*c83a76b0SSuyog Pawar     *pu2_num_pu_map = 0;
1810*c83a76b0SSuyog Pawar 
1811*c83a76b0SSuyog Pawar     /*Getting the address of BS and Qp arrays and other info*/
1812*c83a76b0SSuyog Pawar     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1813*c83a76b0SSuyog Pawar     {
1814*c83a76b0SSuyog Pawar         WORD32 num_ctbs_horz_tile;
1815*c83a76b0SSuyog Pawar         /* Update the pointers which are accessed not by using ctb_ctr
1816*c83a76b0SSuyog Pawar         to the tile start here! */
1817*c83a76b0SSuyog Pawar         ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1818*c83a76b0SSuyog Pawar         ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1819*c83a76b0SSuyog Pawar 
1820*c83a76b0SSuyog Pawar         ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1821*c83a76b0SSuyog Pawar         ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1822*c83a76b0SSuyog Pawar         ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1823*c83a76b0SSuyog Pawar         pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1824*c83a76b0SSuyog Pawar         pu1_row_ecd_data +=
1825*c83a76b0SSuyog Pawar             (ps_tile_params->i4_first_ctb_x *
1826*c83a76b0SSuyog Pawar              ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1827*c83a76b0SSuyog Pawar                                 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1828*c83a76b0SSuyog Pawar              MAX_SCAN_COEFFS_BYTES_4x4);
1829*c83a76b0SSuyog Pawar 
1830*c83a76b0SSuyog Pawar         /* Update the pointers to the tile start */
1831*c83a76b0SSuyog Pawar         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1832*c83a76b0SSuyog Pawar             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
1833*c83a76b0SSuyog Pawar         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1834*c83a76b0SSuyog Pawar             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
1835*c83a76b0SSuyog Pawar         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1836*c83a76b0SSuyog Pawar 
1837*c83a76b0SSuyog Pawar         num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1838*c83a76b0SSuyog Pawar 
1839*c83a76b0SSuyog Pawar         ctb_start = ps_tile_params->i4_first_ctb_x;
1840*c83a76b0SSuyog Pawar         ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1841*c83a76b0SSuyog Pawar     }
1842*c83a76b0SSuyog Pawar     ps_ctb_out_dblk = ps_ctb_out;
1843*c83a76b0SSuyog Pawar 
1844*c83a76b0SSuyog Pawar     ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1845*c83a76b0SSuyog Pawar 
1846*c83a76b0SSuyog Pawar     /* --------- Loop over all the CTBs in a row --------------- */
1847*c83a76b0SSuyog Pawar     for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1848*c83a76b0SSuyog Pawar     {
1849*c83a76b0SSuyog Pawar         cu_final_update_prms s_cu_update_prms;
1850*c83a76b0SSuyog Pawar 
1851*c83a76b0SSuyog Pawar         cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1852*c83a76b0SSuyog Pawar         me_ctb_data_t *ps_cu_me_data;
1853*c83a76b0SSuyog Pawar         ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1854*c83a76b0SSuyog Pawar         cu_enc_loop_out_t *ps_cu_final;
1855*c83a76b0SSuyog Pawar         pu_col_mv_t *ps_ctb_col_pu;
1856*c83a76b0SSuyog Pawar 
1857*c83a76b0SSuyog Pawar         WORD32 cur_ctb_ht, cur_ctb_wd;
1858*c83a76b0SSuyog Pawar         WORD32 last_cu_pos_in_ctb;
1859*c83a76b0SSuyog Pawar         WORD32 last_cu_size;
1860*c83a76b0SSuyog Pawar         WORD32 num_pus_in_ctb;
1861*c83a76b0SSuyog Pawar         UWORD8 u1_is_ctb_noisy;
1862*c83a76b0SSuyog Pawar         ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1863*c83a76b0SSuyog Pawar 
1864*c83a76b0SSuyog Pawar         if(ctb_ctr)
1865*c83a76b0SSuyog Pawar         {
1866*c83a76b0SSuyog Pawar             ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1867*c83a76b0SSuyog Pawar         }
1868*c83a76b0SSuyog Pawar         /*If Sup pic rc is enabled*/
1869*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_sub_pic_level_rc)
1870*c83a76b0SSuyog Pawar         {
1871*c83a76b0SSuyog Pawar             ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1872*c83a76b0SSuyog Pawar         }
1873*c83a76b0SSuyog Pawar         /* check if the current row processed in last CTb row */
1874*c83a76b0SSuyog Pawar         last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1875*c83a76b0SSuyog Pawar         if(1 == last_ctb_col_flag)
1876*c83a76b0SSuyog Pawar         {
1877*c83a76b0SSuyog Pawar             cur_ctb_wd = last_hz_ctb_wd;
1878*c83a76b0SSuyog Pawar         }
1879*c83a76b0SSuyog Pawar         else
1880*c83a76b0SSuyog Pawar         {
1881*c83a76b0SSuyog Pawar             cur_ctb_wd = ctb_size;
1882*c83a76b0SSuyog Pawar         }
1883*c83a76b0SSuyog Pawar 
1884*c83a76b0SSuyog Pawar         /* If it's the last CTB, get the actual ht of CTB */
1885*c83a76b0SSuyog Pawar         if(1 == last_ctb_row_flag)
1886*c83a76b0SSuyog Pawar         {
1887*c83a76b0SSuyog Pawar             cur_ctb_ht = last_vt_ctb_ht;
1888*c83a76b0SSuyog Pawar         }
1889*c83a76b0SSuyog Pawar         else
1890*c83a76b0SSuyog Pawar         {
1891*c83a76b0SSuyog Pawar             cur_ctb_ht = ctb_size;
1892*c83a76b0SSuyog Pawar         }
1893*c83a76b0SSuyog Pawar 
1894*c83a76b0SSuyog Pawar         ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1895*c83a76b0SSuyog Pawar         ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1896*c83a76b0SSuyog Pawar 
1897*c83a76b0SSuyog Pawar         /* Wait till reference frame recon is available */
1898*c83a76b0SSuyog Pawar 
1899*c83a76b0SSuyog Pawar         /* ------------ Wait till current data is ready from ME -------------- */
1900*c83a76b0SSuyog Pawar 
1901*c83a76b0SSuyog Pawar         /*only for ref instance and Non I pics */
1902*c83a76b0SSuyog Pawar         if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1903*c83a76b0SSuyog Pawar            ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1904*c83a76b0SSuyog Pawar         {
1905*c83a76b0SSuyog Pawar             if(ctb_ctr < (num_ctbs_horz_pic))
1906*c83a76b0SSuyog Pawar             {
1907*c83a76b0SSuyog Pawar                 ihevce_dmgr_chk_row_row_sync(
1908*c83a76b0SSuyog Pawar                     pv_dep_mngr_encloop_dep_me,
1909*c83a76b0SSuyog Pawar                     ctb_ctr,
1910*c83a76b0SSuyog Pawar                     1,
1911*c83a76b0SSuyog Pawar                     vert_ctr,
1912*c83a76b0SSuyog Pawar                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1913*c83a76b0SSuyog Pawar                     ps_ctxt->thrd_id);
1914*c83a76b0SSuyog Pawar             }
1915*c83a76b0SSuyog Pawar         }
1916*c83a76b0SSuyog Pawar 
1917*c83a76b0SSuyog Pawar         /* store the cu pointer for current ctb out */
1918*c83a76b0SSuyog Pawar         ps_ctb_out->ps_enc_cu = ps_row_cu;
1919*c83a76b0SSuyog Pawar         ps_cu_final = ps_row_cu;
1920*c83a76b0SSuyog Pawar 
1921*c83a76b0SSuyog Pawar         /* Get the base point of CU recursion tree */
1922*c83a76b0SSuyog Pawar         if(ISLICE != ps_ctxt->i1_slice_type)
1923*c83a76b0SSuyog Pawar         {
1924*c83a76b0SSuyog Pawar             ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1925*c83a76b0SSuyog Pawar             ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1926*c83a76b0SSuyog Pawar         }
1927*c83a76b0SSuyog Pawar         else
1928*c83a76b0SSuyog Pawar         {
1929*c83a76b0SSuyog Pawar             /* Initialize ptr to current CTB */
1930*c83a76b0SSuyog Pawar             ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1931*c83a76b0SSuyog Pawar         }
1932*c83a76b0SSuyog Pawar 
1933*c83a76b0SSuyog Pawar         /* Get the ME data pointer for 16x16 block data in ctb */
1934*c83a76b0SSuyog Pawar         ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1935*c83a76b0SSuyog Pawar         u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1936*c83a76b0SSuyog Pawar         s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1937*c83a76b0SSuyog Pawar         s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1938*c83a76b0SSuyog Pawar 
1939*c83a76b0SSuyog Pawar         /* store the ctb level prms in cu prms */
1940*c83a76b0SSuyog Pawar         s_cu_prms.i4_ctb_pos = ctb_ctr;
1941*c83a76b0SSuyog Pawar 
1942*c83a76b0SSuyog Pawar         s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1943*c83a76b0SSuyog Pawar         s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1944*c83a76b0SSuyog Pawar 
1945*c83a76b0SSuyog Pawar         {
1946*c83a76b0SSuyog Pawar             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1947*c83a76b0SSuyog Pawar             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1948*c83a76b0SSuyog Pawar         }
1949*c83a76b0SSuyog Pawar 
1950*c83a76b0SSuyog Pawar         s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1951*c83a76b0SSuyog Pawar 
1952*c83a76b0SSuyog Pawar         s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1953*c83a76b0SSuyog Pawar 
1954*c83a76b0SSuyog Pawar         s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1955*c83a76b0SSuyog Pawar 
1956*c83a76b0SSuyog Pawar         /* Initialize ptr to current CTB */
1957*c83a76b0SSuyog Pawar         ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr;  // * ctb_size;
1958*c83a76b0SSuyog Pawar 
1959*c83a76b0SSuyog Pawar         /* reset the map idx for current ctb */
1960*c83a76b0SSuyog Pawar         col_pu_map_idx = 0;
1961*c83a76b0SSuyog Pawar         num_pus_in_ctb = 0;
1962*c83a76b0SSuyog Pawar 
1963*c83a76b0SSuyog Pawar         /* reset the map buffer to 0*/
1964*c83a76b0SSuyog Pawar 
1965*c83a76b0SSuyog Pawar         memset(
1966*c83a76b0SSuyog Pawar             &ps_ctxt->au1_nbr_ctb_map[0][0],
1967*c83a76b0SSuyog Pawar             0,
1968*c83a76b0SSuyog Pawar             (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1969*c83a76b0SSuyog Pawar 
1970*c83a76b0SSuyog Pawar         /* set the CTB neighbour availability flags */
1971*c83a76b0SSuyog Pawar         ihevce_set_ctb_nbr(
1972*c83a76b0SSuyog Pawar             &ps_ctb_out->s_ctb_nbr_avail_flags,
1973*c83a76b0SSuyog Pawar             ps_ctxt->pu1_ctb_nbr_map,
1974*c83a76b0SSuyog Pawar             ps_ctxt->i4_nbr_map_strd,
1975*c83a76b0SSuyog Pawar             ctb_ctr,
1976*c83a76b0SSuyog Pawar             vert_ctr,
1977*c83a76b0SSuyog Pawar             ps_frm_ctb_prms);
1978*c83a76b0SSuyog Pawar 
1979*c83a76b0SSuyog Pawar         /* -------- update the cur CTB offsets for inter prediction-------- */
1980*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1981*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1982*c83a76b0SSuyog Pawar 
1983*c83a76b0SSuyog Pawar         /* -------- update the cur CTB offsets for MV prediction-------- */
1984*c83a76b0SSuyog Pawar         ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1985*c83a76b0SSuyog Pawar         ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1986*c83a76b0SSuyog Pawar 
1987*c83a76b0SSuyog Pawar         /* -------------- Boundary Strength Initialization ----------- */
1988*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1989*c83a76b0SSuyog Pawar         {
1990*c83a76b0SSuyog Pawar             ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1991*c83a76b0SSuyog Pawar         }
1992*c83a76b0SSuyog Pawar 
1993*c83a76b0SSuyog Pawar         /* -------- update cur CTB offsets for entropy rdopt context------- */
1994*c83a76b0SSuyog Pawar         ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1995*c83a76b0SSuyog Pawar 
1996*c83a76b0SSuyog Pawar         /* --------- CU Recursion --------------- */
1997*c83a76b0SSuyog Pawar 
1998*c83a76b0SSuyog Pawar         {
1999*c83a76b0SSuyog Pawar #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2000*c83a76b0SSuyog Pawar             WORD32 i4_max_tree_depth = 4;
2001*c83a76b0SSuyog Pawar #endif
2002*c83a76b0SSuyog Pawar             WORD32 i4_tree_depth = 0;
2003*c83a76b0SSuyog Pawar             /* Init no. of CU in CTB to 0*/
2004*c83a76b0SSuyog Pawar             ps_ctb_out->u1_num_cus_in_ctb = 0;
2005*c83a76b0SSuyog Pawar 
2006*c83a76b0SSuyog Pawar #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2007*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_bitrate_instance_num == 0)
2008*c83a76b0SSuyog Pawar             {
2009*c83a76b0SSuyog Pawar                 WORD32 i4_max_tree_depth = 4;
2010*c83a76b0SSuyog Pawar                 WORD32 i;
2011*c83a76b0SSuyog Pawar                 for(i = 0; i < i4_max_tree_depth; i++)
2012*c83a76b0SSuyog Pawar                 {
2013*c83a76b0SSuyog Pawar                     COPY_CABAC_STATES(
2014*c83a76b0SSuyog Pawar                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2015*c83a76b0SSuyog Pawar                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2016*c83a76b0SSuyog Pawar                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2017*c83a76b0SSuyog Pawar                 }
2018*c83a76b0SSuyog Pawar             }
2019*c83a76b0SSuyog Pawar #else
2020*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_bitrate_instance_num == 0)
2021*c83a76b0SSuyog Pawar             {
2022*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2023*c83a76b0SSuyog Pawar                 {
2024*c83a76b0SSuyog Pawar                     WORD32 i4_max_tree_depth = 4;
2025*c83a76b0SSuyog Pawar                     WORD32 i;
2026*c83a76b0SSuyog Pawar                     for(i = 0; i < i4_max_tree_depth; i++)
2027*c83a76b0SSuyog Pawar                     {
2028*c83a76b0SSuyog Pawar                         COPY_CABAC_STATES(
2029*c83a76b0SSuyog Pawar                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2030*c83a76b0SSuyog Pawar                             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2031*c83a76b0SSuyog Pawar                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2032*c83a76b0SSuyog Pawar                     }
2033*c83a76b0SSuyog Pawar                 }
2034*c83a76b0SSuyog Pawar             }
2035*c83a76b0SSuyog Pawar 
2036*c83a76b0SSuyog Pawar #endif
2037*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_bitrate_instance_num == 0)
2038*c83a76b0SSuyog Pawar             {
2039*c83a76b0SSuyog Pawar                 /* FOR I- PIC populate the curr_ctb accordingly */
2040*c83a76b0SSuyog Pawar                 if(ISLICE == ps_ctxt->i1_slice_type)
2041*c83a76b0SSuyog Pawar                 {
2042*c83a76b0SSuyog Pawar                     ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2043*c83a76b0SSuyog Pawar                     ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2044*c83a76b0SSuyog Pawar 
2045*c83a76b0SSuyog Pawar                     ihevce_populate_cu_tree(
2046*c83a76b0SSuyog Pawar                         ps_ctb_ipe_analyse,
2047*c83a76b0SSuyog Pawar                         ps_cu_tree_analyse,
2048*c83a76b0SSuyog Pawar                         0,
2049*c83a76b0SSuyog Pawar                         (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2050*c83a76b0SSuyog Pawar                         POS_NA,
2051*c83a76b0SSuyog Pawar                         POS_NA,
2052*c83a76b0SSuyog Pawar                         POS_NA);
2053*c83a76b0SSuyog Pawar                 }
2054*c83a76b0SSuyog Pawar             }
2055*c83a76b0SSuyog Pawar             ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2056*c83a76b0SSuyog Pawar             ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2057*c83a76b0SSuyog Pawar             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2058*c83a76b0SSuyog Pawar 
2059*c83a76b0SSuyog Pawar             s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2060*c83a76b0SSuyog Pawar             s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2061*c83a76b0SSuyog Pawar             s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2062*c83a76b0SSuyog Pawar             s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2063*c83a76b0SSuyog Pawar             s_cu_update_prms.pps_cu_final = &ps_cu_final;
2064*c83a76b0SSuyog Pawar             s_cu_update_prms.pps_row_pu = &ps_row_pu;
2065*c83a76b0SSuyog Pawar             s_cu_update_prms.pps_row_tu = &ps_row_tu;
2066*c83a76b0SSuyog Pawar             s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2067*c83a76b0SSuyog Pawar 
2068*c83a76b0SSuyog Pawar             // source satd computation
2069*c83a76b0SSuyog Pawar             /* compute the source 8x8 SATD for the current CTB */
2070*c83a76b0SSuyog Pawar             /* populate  pui4_source_satd in some structure and pass it inside */
2071*c83a76b0SSuyog Pawar             if(ps_ctxt->u1_enable_psyRDOPT)
2072*c83a76b0SSuyog Pawar             {
2073*c83a76b0SSuyog Pawar                 /* declare local variables */
2074*c83a76b0SSuyog Pawar                 WORD32 i;
2075*c83a76b0SSuyog Pawar                 WORD32 ctb_size;
2076*c83a76b0SSuyog Pawar                 WORD32 num_comp_had_blocks;
2077*c83a76b0SSuyog Pawar                 UWORD8 *pu1_l0_block;
2078*c83a76b0SSuyog Pawar                 WORD32 block_ht;
2079*c83a76b0SSuyog Pawar                 WORD32 block_wd;
2080*c83a76b0SSuyog Pawar                 WORD32 ht_offset;
2081*c83a76b0SSuyog Pawar                 WORD32 wd_offset;
2082*c83a76b0SSuyog Pawar 
2083*c83a76b0SSuyog Pawar                 WORD32 num_horz_blocks;
2084*c83a76b0SSuyog Pawar                 WORD32 had_block_size;
2085*c83a76b0SSuyog Pawar                 WORD32 total_had_block_size;
2086*c83a76b0SSuyog Pawar                 WORD16 pi2_residue_had_zscan[64];
2087*c83a76b0SSuyog Pawar                 UWORD8 ai1_zeros_buffer[64];
2088*c83a76b0SSuyog Pawar 
2089*c83a76b0SSuyog Pawar                 WORD32 index_satd;
2090*c83a76b0SSuyog Pawar                 WORD32 is_hbd;
2091*c83a76b0SSuyog Pawar                 /* initialize the variables */
2092*c83a76b0SSuyog Pawar                 block_ht = cur_ctb_ht;
2093*c83a76b0SSuyog Pawar                 block_wd = cur_ctb_wd;
2094*c83a76b0SSuyog Pawar 
2095*c83a76b0SSuyog Pawar                 is_hbd = ps_ctxt->u1_is_input_data_hbd;
2096*c83a76b0SSuyog Pawar 
2097*c83a76b0SSuyog Pawar                 had_block_size = 8;
2098*c83a76b0SSuyog Pawar                 total_had_block_size = had_block_size * had_block_size;
2099*c83a76b0SSuyog Pawar 
2100*c83a76b0SSuyog Pawar                 for(i = 0; i < total_had_block_size; i++)
2101*c83a76b0SSuyog Pawar                 {
2102*c83a76b0SSuyog Pawar                     ai1_zeros_buffer[i] = 0;
2103*c83a76b0SSuyog Pawar                 }
2104*c83a76b0SSuyog Pawar 
2105*c83a76b0SSuyog Pawar                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2106*c83a76b0SSuyog Pawar                 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2107*c83a76b0SSuyog Pawar 
2108*c83a76b0SSuyog Pawar                 num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
2109*c83a76b0SSuyog Pawar                 ht_offset = -had_block_size;
2110*c83a76b0SSuyog Pawar                 wd_offset = -had_block_size;
2111*c83a76b0SSuyog Pawar 
2112*c83a76b0SSuyog Pawar                 index_satd = 0;
2113*c83a76b0SSuyog Pawar                 /*Loop over all 8x8 blocsk in the CTB*/
2114*c83a76b0SSuyog Pawar                 for(i = 0; i < num_comp_had_blocks; i++)
2115*c83a76b0SSuyog Pawar                 {
2116*c83a76b0SSuyog Pawar                     if(i % num_horz_blocks == 0)
2117*c83a76b0SSuyog Pawar                     {
2118*c83a76b0SSuyog Pawar                         wd_offset = -had_block_size;
2119*c83a76b0SSuyog Pawar                         ht_offset += had_block_size;
2120*c83a76b0SSuyog Pawar                     }
2121*c83a76b0SSuyog Pawar                     wd_offset += had_block_size;
2122*c83a76b0SSuyog Pawar 
2123*c83a76b0SSuyog Pawar                     if(!is_hbd)
2124*c83a76b0SSuyog Pawar                     {
2125*c83a76b0SSuyog Pawar                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2126*c83a76b0SSuyog Pawar                         pu1_l0_block = s_cu_prms.pu1_luma_src +
2127*c83a76b0SSuyog Pawar                                        ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2128*c83a76b0SSuyog Pawar 
2129*c83a76b0SSuyog Pawar                         ps_ctxt->ai4_source_satd_8x8[index_satd] =
2130*c83a76b0SSuyog Pawar 
2131*c83a76b0SSuyog Pawar                             ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2132*c83a76b0SSuyog Pawar                                 pu1_l0_block,
2133*c83a76b0SSuyog Pawar                                 ps_curr_src_bufs->i4_y_strd,
2134*c83a76b0SSuyog Pawar                                 ai1_zeros_buffer,
2135*c83a76b0SSuyog Pawar                                 had_block_size,
2136*c83a76b0SSuyog Pawar                                 pi2_residue_had_zscan,
2137*c83a76b0SSuyog Pawar                                 had_block_size);
2138*c83a76b0SSuyog Pawar                     }
2139*c83a76b0SSuyog Pawar                     index_satd++;
2140*c83a76b0SSuyog Pawar                 }
2141*c83a76b0SSuyog Pawar             }
2142*c83a76b0SSuyog Pawar 
2143*c83a76b0SSuyog Pawar             if(ps_ctxt->u1_enable_psyRDOPT)
2144*c83a76b0SSuyog Pawar             {
2145*c83a76b0SSuyog Pawar                 /* declare local variables */
2146*c83a76b0SSuyog Pawar                 WORD32 i;
2147*c83a76b0SSuyog Pawar                 WORD32 ctb_size;
2148*c83a76b0SSuyog Pawar                 WORD32 num_comp_had_blocks;
2149*c83a76b0SSuyog Pawar                 UWORD8 *pu1_l0_block;
2150*c83a76b0SSuyog Pawar                 UWORD8 *pu1_l0_block_prev = NULL;
2151*c83a76b0SSuyog Pawar                 WORD32 block_ht;
2152*c83a76b0SSuyog Pawar                 WORD32 block_wd;
2153*c83a76b0SSuyog Pawar                 WORD32 ht_offset;
2154*c83a76b0SSuyog Pawar                 WORD32 wd_offset;
2155*c83a76b0SSuyog Pawar 
2156*c83a76b0SSuyog Pawar                 WORD32 num_horz_blocks;
2157*c83a76b0SSuyog Pawar                 WORD32 had_block_size;
2158*c83a76b0SSuyog Pawar                 WORD16 pi2_residue_had[64];
2159*c83a76b0SSuyog Pawar                 UWORD8 ai1_zeros_buffer[64];
2160*c83a76b0SSuyog Pawar                 WORD32 index_satd = 0;
2161*c83a76b0SSuyog Pawar 
2162*c83a76b0SSuyog Pawar                 WORD32 is_hbd;
2163*c83a76b0SSuyog Pawar                 is_hbd = ps_ctxt->u1_is_input_data_hbd;  // 8 bit
2164*c83a76b0SSuyog Pawar 
2165*c83a76b0SSuyog Pawar                 /* initialize the variables */
2166*c83a76b0SSuyog Pawar                 /* change this based ont he bit depth */
2167*c83a76b0SSuyog Pawar                 // ps_ctxt->u1_chroma_array_type
2168*c83a76b0SSuyog Pawar                 if(ps_ctxt->u1_chroma_array_type == 1)
2169*c83a76b0SSuyog Pawar                 {
2170*c83a76b0SSuyog Pawar                     block_ht = cur_ctb_ht / 2;
2171*c83a76b0SSuyog Pawar                     block_wd = cur_ctb_wd / 2;
2172*c83a76b0SSuyog Pawar                 }
2173*c83a76b0SSuyog Pawar                 else
2174*c83a76b0SSuyog Pawar                 {
2175*c83a76b0SSuyog Pawar                     block_ht = cur_ctb_ht;
2176*c83a76b0SSuyog Pawar                     block_wd = cur_ctb_wd / 2;
2177*c83a76b0SSuyog Pawar                 }
2178*c83a76b0SSuyog Pawar 
2179*c83a76b0SSuyog Pawar                 had_block_size = 4;
2180*c83a76b0SSuyog Pawar                 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2181*c83a76b0SSuyog Pawar 
2182*c83a76b0SSuyog Pawar                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2183*c83a76b0SSuyog Pawar                 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2184*c83a76b0SSuyog Pawar 
2185*c83a76b0SSuyog Pawar                 num_horz_blocks = 2 * block_wd / had_block_size;  //ctb_width / had_block_size;
2186*c83a76b0SSuyog Pawar                 ht_offset = -had_block_size;
2187*c83a76b0SSuyog Pawar                 wd_offset = -had_block_size;
2188*c83a76b0SSuyog Pawar 
2189*c83a76b0SSuyog Pawar                 if(!is_hbd)
2190*c83a76b0SSuyog Pawar                 {
2191*c83a76b0SSuyog Pawar                     /* loop over for every 4x4 blocks in the CU for Cb */
2192*c83a76b0SSuyog Pawar                     for(i = 0; i < num_comp_had_blocks; i++)
2193*c83a76b0SSuyog Pawar                     {
2194*c83a76b0SSuyog Pawar                         if(i % num_horz_blocks == 0)
2195*c83a76b0SSuyog Pawar                         {
2196*c83a76b0SSuyog Pawar                             wd_offset = -had_block_size;
2197*c83a76b0SSuyog Pawar                             ht_offset += had_block_size;
2198*c83a76b0SSuyog Pawar                         }
2199*c83a76b0SSuyog Pawar                         wd_offset += had_block_size;
2200*c83a76b0SSuyog Pawar 
2201*c83a76b0SSuyog Pawar                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2202*c83a76b0SSuyog Pawar                         if(i % 2 != 0)
2203*c83a76b0SSuyog Pawar                         {
2204*c83a76b0SSuyog Pawar                             if(!is_hbd)
2205*c83a76b0SSuyog Pawar                             {
2206*c83a76b0SSuyog Pawar                                 pu1_l0_block = pu1_l0_block_prev + 1;
2207*c83a76b0SSuyog Pawar                             }
2208*c83a76b0SSuyog Pawar                         }
2209*c83a76b0SSuyog Pawar                         else
2210*c83a76b0SSuyog Pawar                         {
2211*c83a76b0SSuyog Pawar                             if(!is_hbd)
2212*c83a76b0SSuyog Pawar                             {
2213*c83a76b0SSuyog Pawar                                 pu1_l0_block = s_cu_prms.pu1_chrm_src +
2214*c83a76b0SSuyog Pawar                                                s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2215*c83a76b0SSuyog Pawar                                 pu1_l0_block_prev = pu1_l0_block;
2216*c83a76b0SSuyog Pawar                             }
2217*c83a76b0SSuyog Pawar                         }
2218*c83a76b0SSuyog Pawar 
2219*c83a76b0SSuyog Pawar                         if(had_block_size == 4)
2220*c83a76b0SSuyog Pawar                         {
2221*c83a76b0SSuyog Pawar                             if(!is_hbd)
2222*c83a76b0SSuyog Pawar                             {
2223*c83a76b0SSuyog Pawar                                 ps_ctxt->ai4_source_chroma_satd[index_satd] =
2224*c83a76b0SSuyog Pawar                                     ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2225*c83a76b0SSuyog Pawar                                         pu1_l0_block,
2226*c83a76b0SSuyog Pawar                                         s_cu_prms.i4_chrm_src_stride,
2227*c83a76b0SSuyog Pawar                                         ai1_zeros_buffer,
2228*c83a76b0SSuyog Pawar                                         had_block_size,
2229*c83a76b0SSuyog Pawar                                         pi2_residue_had,
2230*c83a76b0SSuyog Pawar                                         had_block_size);
2231*c83a76b0SSuyog Pawar                             }
2232*c83a76b0SSuyog Pawar 
2233*c83a76b0SSuyog Pawar                             index_satd++;
2234*c83a76b0SSuyog Pawar 
2235*c83a76b0SSuyog Pawar                         }  // block size of 4x4
2236*c83a76b0SSuyog Pawar 
2237*c83a76b0SSuyog Pawar                     }  // for all blocks
2238*c83a76b0SSuyog Pawar 
2239*c83a76b0SSuyog Pawar                 }  // is hbd check
2240*c83a76b0SSuyog Pawar             }
2241*c83a76b0SSuyog Pawar 
2242*c83a76b0SSuyog Pawar             ihevce_cu_recurse_decide(
2243*c83a76b0SSuyog Pawar                 ps_ctxt,
2244*c83a76b0SSuyog Pawar                 &s_cu_prms,
2245*c83a76b0SSuyog Pawar                 ps_cu_tree_analyse,
2246*c83a76b0SSuyog Pawar                 ps_cu_tree_analyse,
2247*c83a76b0SSuyog Pawar                 ps_ctb_ipe_analyse,
2248*c83a76b0SSuyog Pawar                 ps_cu_me_data,
2249*c83a76b0SSuyog Pawar                 &ps_ctb_col_pu,
2250*c83a76b0SSuyog Pawar                 &s_cu_update_prms,
2251*c83a76b0SSuyog Pawar                 pu1_row_pu_map,
2252*c83a76b0SSuyog Pawar                 &col_pu_map_idx,
2253*c83a76b0SSuyog Pawar                 i4_tree_depth,
2254*c83a76b0SSuyog Pawar                 ctb_ctr << 6,
2255*c83a76b0SSuyog Pawar                 vert_ctr << 6,
2256*c83a76b0SSuyog Pawar                 cur_ctb_ht);
2257*c83a76b0SSuyog Pawar 
2258*c83a76b0SSuyog Pawar             if(ps_ctxt->i1_slice_type != ISLICE)
2259*c83a76b0SSuyog Pawar             {
2260*c83a76b0SSuyog Pawar                 ASSERT(
2261*c83a76b0SSuyog Pawar                     (cur_ctb_wd * cur_ctb_ht) <=
2262*c83a76b0SSuyog Pawar                     ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2263*c83a76b0SSuyog Pawar             }
2264*c83a76b0SSuyog Pawar             /*If Sup pic rc is enabled*/
2265*c83a76b0SSuyog Pawar             if(1 == ps_ctxt->i4_sub_pic_level_rc)
2266*c83a76b0SSuyog Pawar             {
2267*c83a76b0SSuyog Pawar                 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2268*c83a76b0SSuyog Pawar                 ihevce_sub_pic_rc_in_data(
2269*c83a76b0SSuyog Pawar                     (void *)ps_multi_thrd_ctxt,
2270*c83a76b0SSuyog Pawar                     (void *)ps_ctxt,
2271*c83a76b0SSuyog Pawar                     (void *)ps_ctb_ipe_analyse,
2272*c83a76b0SSuyog Pawar                     (void *)ps_frm_ctb_prms);
2273*c83a76b0SSuyog Pawar             }
2274*c83a76b0SSuyog Pawar 
2275*c83a76b0SSuyog Pawar             ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2276*c83a76b0SSuyog Pawar 
2277*c83a76b0SSuyog Pawar         } /* End of CU recursion block */
2278*c83a76b0SSuyog Pawar 
2279*c83a76b0SSuyog Pawar #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2280*c83a76b0SSuyog Pawar         {
2281*c83a76b0SSuyog Pawar             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2282*c83a76b0SSuyog Pawar             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2283*c83a76b0SSuyog Pawar             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2284*c83a76b0SSuyog Pawar 
2285*c83a76b0SSuyog Pawar             do
2286*c83a76b0SSuyog Pawar             {
2287*c83a76b0SSuyog Pawar                 ihevce_update_final_cu_results(
2288*c83a76b0SSuyog Pawar                     ps_ctxt,
2289*c83a76b0SSuyog Pawar                     ps_enc_out_ctxt,
2290*c83a76b0SSuyog Pawar                     ps_cu_prms,
2291*c83a76b0SSuyog Pawar                     NULL, /* &ps_ctb_col_pu */
2292*c83a76b0SSuyog Pawar                     NULL, /* &col_pu_map_idx */
2293*c83a76b0SSuyog Pawar                     &s_cu_update_prms,
2294*c83a76b0SSuyog Pawar                     ctb_ctr,
2295*c83a76b0SSuyog Pawar                     vert_ctr);
2296*c83a76b0SSuyog Pawar 
2297*c83a76b0SSuyog Pawar                 ps_enc_out_ctxt++;
2298*c83a76b0SSuyog Pawar 
2299*c83a76b0SSuyog Pawar                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2300*c83a76b0SSuyog Pawar 
2301*c83a76b0SSuyog Pawar             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2302*c83a76b0SSuyog Pawar         }
2303*c83a76b0SSuyog Pawar #else
2304*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2305*c83a76b0SSuyog Pawar         {
2306*c83a76b0SSuyog Pawar             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2307*c83a76b0SSuyog Pawar             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2308*c83a76b0SSuyog Pawar             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2309*c83a76b0SSuyog Pawar 
2310*c83a76b0SSuyog Pawar             do
2311*c83a76b0SSuyog Pawar             {
2312*c83a76b0SSuyog Pawar                 ihevce_update_final_cu_results(
2313*c83a76b0SSuyog Pawar                     ps_ctxt,
2314*c83a76b0SSuyog Pawar                     ps_enc_out_ctxt,
2315*c83a76b0SSuyog Pawar                     ps_cu_prms,
2316*c83a76b0SSuyog Pawar                     NULL, /* &ps_ctb_col_pu */
2317*c83a76b0SSuyog Pawar                     NULL, /* &col_pu_map_idx */
2318*c83a76b0SSuyog Pawar                     &s_cu_update_prms,
2319*c83a76b0SSuyog Pawar                     ctb_ctr,
2320*c83a76b0SSuyog Pawar                     vert_ctr);
2321*c83a76b0SSuyog Pawar 
2322*c83a76b0SSuyog Pawar                 ps_enc_out_ctxt++;
2323*c83a76b0SSuyog Pawar 
2324*c83a76b0SSuyog Pawar                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2325*c83a76b0SSuyog Pawar 
2326*c83a76b0SSuyog Pawar             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2327*c83a76b0SSuyog Pawar         }
2328*c83a76b0SSuyog Pawar #endif
2329*c83a76b0SSuyog Pawar 
2330*c83a76b0SSuyog Pawar         /* --- ctb level copy of data to left buffers--*/
2331*c83a76b0SSuyog Pawar         ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2332*c83a76b0SSuyog Pawar 
2333*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2334*c83a76b0SSuyog Pawar         {
2335*c83a76b0SSuyog Pawar             /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2336*c83a76b0SSuyog Pawar             ihevce_bs_clear_invalid(
2337*c83a76b0SSuyog Pawar                 &ps_ctxt->s_deblk_bs_prms,
2338*c83a76b0SSuyog Pawar                 last_ctb_row_flag,
2339*c83a76b0SSuyog Pawar                 (ctb_ctr == (num_ctbs_horz_pic - 1)),
2340*c83a76b0SSuyog Pawar                 last_hz_ctb_wd,
2341*c83a76b0SSuyog Pawar                 last_vt_ctb_ht);
2342*c83a76b0SSuyog Pawar 
2343*c83a76b0SSuyog Pawar             /* -----------------Read boundary strengts for current CTB------------- */
2344*c83a76b0SSuyog Pawar 
2345*c83a76b0SSuyog Pawar             if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2346*c83a76b0SSuyog Pawar             {
2347*c83a76b0SSuyog Pawar                 /*Storing boundary strengths of current CTB*/
2348*c83a76b0SSuyog Pawar                 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2349*c83a76b0SSuyog Pawar                 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2350*c83a76b0SSuyog Pawar 
2351*c83a76b0SSuyog Pawar                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2352*c83a76b0SSuyog Pawar                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2353*c83a76b0SSuyog Pawar             }
2354*c83a76b0SSuyog Pawar             //Increment for storing next CTB info
2355*c83a76b0SSuyog Pawar             s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2356*c83a76b0SSuyog Pawar                 (ctb_size >> 3);  //one vertical edge per 8x8 block
2357*c83a76b0SSuyog Pawar             s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2358*c83a76b0SSuyog Pawar                 (ctb_size >> 3);  //one horizontal edge per 8x8 block
2359*c83a76b0SSuyog Pawar         }
2360*c83a76b0SSuyog Pawar 
2361*c83a76b0SSuyog Pawar         /* -------------- ctb level updates ----------------- */
2362*c83a76b0SSuyog Pawar         ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2363*c83a76b0SSuyog Pawar 
2364*c83a76b0SSuyog Pawar         pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2365*c83a76b0SSuyog Pawar 
2366*c83a76b0SSuyog Pawar         /* first ctb offset will be populated by the caller */
2367*c83a76b0SSuyog Pawar         if(0 != ctb_ctr)
2368*c83a76b0SSuyog Pawar         {
2369*c83a76b0SSuyog Pawar             pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2370*c83a76b0SSuyog Pawar         }
2371*c83a76b0SSuyog Pawar         pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2372*c83a76b0SSuyog Pawar         ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2373*c83a76b0SSuyog Pawar 
2374*c83a76b0SSuyog Pawar         ps_ctb_in++;
2375*c83a76b0SSuyog Pawar         ps_ctb_out++;
2376*c83a76b0SSuyog Pawar     }
2377*c83a76b0SSuyog Pawar 
2378*c83a76b0SSuyog Pawar     /* ---------- Encloop end of row updates ----------------- */
2379*c83a76b0SSuyog Pawar 
2380*c83a76b0SSuyog Pawar     /* at the end of row processing cu pixel counter is set to */
2381*c83a76b0SSuyog Pawar     /* (num ctb * ctbzise) + ctb size                          */
2382*c83a76b0SSuyog Pawar     /* this is to set the dependency for right most cu of last */
2383*c83a76b0SSuyog Pawar     /* ctb's top right data dependency                         */
2384*c83a76b0SSuyog Pawar     /* this even takes care of entropy dependency for          */
2385*c83a76b0SSuyog Pawar     /* incomplete ctb as well                                  */
2386*c83a76b0SSuyog Pawar     ihevce_dmgr_set_row_row_sync(
2387*c83a76b0SSuyog Pawar         pv_dep_mngr_enc_loop_cu_top_right,
2388*c83a76b0SSuyog Pawar         (ctb_ctr * ctb_size + ctb_size),
2389*c83a76b0SSuyog Pawar         vert_ctr,
2390*c83a76b0SSuyog Pawar         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2391*c83a76b0SSuyog Pawar 
2392*c83a76b0SSuyog Pawar     ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2393*c83a76b0SSuyog Pawar 
2394*c83a76b0SSuyog Pawar     /* Restore structure.
2395*c83a76b0SSuyog Pawar     Getting the address of stored-BS and Qp-map and other info */
2396*c83a76b0SSuyog Pawar     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2397*c83a76b0SSuyog Pawar     {
2398*c83a76b0SSuyog Pawar         /* Update the pointers to the tile start */
2399*c83a76b0SSuyog Pawar         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2400*c83a76b0SSuyog Pawar             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
2401*c83a76b0SSuyog Pawar         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2402*c83a76b0SSuyog Pawar             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
2403*c83a76b0SSuyog Pawar         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2404*c83a76b0SSuyog Pawar     }
2405*c83a76b0SSuyog Pawar 
2406*c83a76b0SSuyog Pawar #if PROFILE_ENC_REG_DATA
2407*c83a76b0SSuyog Pawar     s_profile.u8_enc_reg_data[vert_ctr] = 0;
2408*c83a76b0SSuyog Pawar #endif
2409*c83a76b0SSuyog Pawar 
2410*c83a76b0SSuyog Pawar     /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2411*c83a76b0SSuyog Pawar     if(!ps_ctxt->u1_is_input_data_hbd)
2412*c83a76b0SSuyog Pawar     {
2413*c83a76b0SSuyog Pawar         WORD32 last_col_pic, last_col_tile;
2414*c83a76b0SSuyog Pawar 
2415*c83a76b0SSuyog Pawar         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2416*c83a76b0SSuyog Pawar         {
2417*c83a76b0SSuyog Pawar             /* store the ctb level prms in cu prms */
2418*c83a76b0SSuyog Pawar             s_cu_prms.i4_ctb_pos = ctb_ctr;
2419*c83a76b0SSuyog Pawar             s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2420*c83a76b0SSuyog Pawar             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2421*c83a76b0SSuyog Pawar 
2422*c83a76b0SSuyog Pawar             s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2423*c83a76b0SSuyog Pawar             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2424*c83a76b0SSuyog Pawar             s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2425*c83a76b0SSuyog Pawar 
2426*c83a76b0SSuyog Pawar             s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2427*c83a76b0SSuyog Pawar 
2428*c83a76b0SSuyog Pawar             s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2429*c83a76b0SSuyog Pawar 
2430*c83a76b0SSuyog Pawar             /* If last ctb in the horizontal row */
2431*c83a76b0SSuyog Pawar             if(ctb_ctr == (num_ctbs_horz_pic - 1))
2432*c83a76b0SSuyog Pawar             {
2433*c83a76b0SSuyog Pawar                 last_col_pic = 1;
2434*c83a76b0SSuyog Pawar             }
2435*c83a76b0SSuyog Pawar             else
2436*c83a76b0SSuyog Pawar             {
2437*c83a76b0SSuyog Pawar                 last_col_pic = 0;
2438*c83a76b0SSuyog Pawar             }
2439*c83a76b0SSuyog Pawar 
2440*c83a76b0SSuyog Pawar             /* If last ctb in the tile row */
2441*c83a76b0SSuyog Pawar             if(ctb_ctr == (ctb_end - 1))
2442*c83a76b0SSuyog Pawar             {
2443*c83a76b0SSuyog Pawar                 last_col_tile = 1;
2444*c83a76b0SSuyog Pawar             }
2445*c83a76b0SSuyog Pawar             else
2446*c83a76b0SSuyog Pawar             {
2447*c83a76b0SSuyog Pawar                 last_col_tile = 0;
2448*c83a76b0SSuyog Pawar             }
2449*c83a76b0SSuyog Pawar 
2450*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2451*c83a76b0SSuyog Pawar             {
2452*c83a76b0SSuyog Pawar                 /* for last ctb of a row check top instead of top right */
2453*c83a76b0SSuyog Pawar                 if(((ctb_ctr + 1) == ctb_end) && (vert_ctr > 0))
2454*c83a76b0SSuyog Pawar                 {
2455*c83a76b0SSuyog Pawar                     dblk_offset = 1;
2456*c83a76b0SSuyog Pawar                 }
2457*c83a76b0SSuyog Pawar                 /* Wait till top neighbour CTB has done it's deblocking*/
2458*c83a76b0SSuyog Pawar                 ihevce_dmgr_chk_row_row_sync(
2459*c83a76b0SSuyog Pawar                     pv_dep_mngr_enc_loop_dblk,
2460*c83a76b0SSuyog Pawar                     ctb_ctr,
2461*c83a76b0SSuyog Pawar                     dblk_offset,
2462*c83a76b0SSuyog Pawar                     dblk_check_dep_pos,
2463*c83a76b0SSuyog Pawar                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2464*c83a76b0SSuyog Pawar                     ps_ctxt->thrd_id);
2465*c83a76b0SSuyog Pawar 
2466*c83a76b0SSuyog Pawar                 if((0 == ps_ctxt->i4_deblock_type))
2467*c83a76b0SSuyog Pawar                 {
2468*c83a76b0SSuyog Pawar                     /* Populate Qp-map */
2469*c83a76b0SSuyog Pawar                     if(ctb_start == ctb_ctr)
2470*c83a76b0SSuyog Pawar                     {
2471*c83a76b0SSuyog Pawar                         ihevce_deblk_populate_qp_map(
2472*c83a76b0SSuyog Pawar                             ps_ctxt,
2473*c83a76b0SSuyog Pawar                             &s_deblk_ctb_row_params,
2474*c83a76b0SSuyog Pawar                             ps_ctb_out_dblk,
2475*c83a76b0SSuyog Pawar                             vert_ctr,
2476*c83a76b0SSuyog Pawar                             ps_frm_ctb_prms,
2477*c83a76b0SSuyog Pawar                             ps_tile_params);
2478*c83a76b0SSuyog Pawar                     }
2479*c83a76b0SSuyog Pawar                     ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2480*c83a76b0SSuyog Pawar 
2481*c83a76b0SSuyog Pawar                     /* recon pointers and stride */
2482*c83a76b0SSuyog Pawar                     ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2483*c83a76b0SSuyog Pawar                     ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2484*c83a76b0SSuyog Pawar                     ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2485*c83a76b0SSuyog Pawar                     ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2486*c83a76b0SSuyog Pawar 
2487*c83a76b0SSuyog Pawar                     ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2488*c83a76b0SSuyog Pawar                     {
2489*c83a76b0SSuyog Pawar                         ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2490*c83a76b0SSuyog Pawar                             (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2491*c83a76b0SSuyog Pawar                     }
2492*c83a76b0SSuyog Pawar                     ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2493*c83a76b0SSuyog Pawar                     //or according to slice boundary. Support yet to be added !!!!
2494*c83a76b0SSuyog Pawar 
2495*c83a76b0SSuyog Pawar                     ihevce_deblk_ctb(
2496*c83a76b0SSuyog Pawar                         &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2497*c83a76b0SSuyog Pawar 
2498*c83a76b0SSuyog Pawar                     //Increment for storing next CTB info
2499*c83a76b0SSuyog Pawar                     s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2500*c83a76b0SSuyog Pawar                         (ctb_size >> 3);  //one vertical edge per 8x8 block
2501*c83a76b0SSuyog Pawar                     s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2502*c83a76b0SSuyog Pawar                         (ctb_size >> 3);  //one horizontal edge per 8x8 block
2503*c83a76b0SSuyog Pawar                     s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2504*c83a76b0SSuyog Pawar                         (ctb_size >> 2);  //one qp per 4x4 block.
2505*c83a76b0SSuyog Pawar                 }
2506*c83a76b0SSuyog Pawar             }  // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2507*c83a76b0SSuyog Pawar 
2508*c83a76b0SSuyog Pawar             /* update the number of ctbs deblocked for this row */
2509*c83a76b0SSuyog Pawar             ihevce_dmgr_set_row_row_sync(
2510*c83a76b0SSuyog Pawar                 pv_dep_mngr_enc_loop_dblk,
2511*c83a76b0SSuyog Pawar                 (ctb_ctr + 1),
2512*c83a76b0SSuyog Pawar                 vert_ctr,
2513*c83a76b0SSuyog Pawar                 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2514*c83a76b0SSuyog Pawar 
2515*c83a76b0SSuyog Pawar         }  //end of loop over CTBs in current CTB-row
2516*c83a76b0SSuyog Pawar 
2517*c83a76b0SSuyog Pawar         /* Apply SAO over the previous CTB-row */
2518*c83a76b0SSuyog Pawar         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2519*c83a76b0SSuyog Pawar         {
2520*c83a76b0SSuyog Pawar             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2521*c83a76b0SSuyog Pawar                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2522*c83a76b0SSuyog Pawar             {
2523*c83a76b0SSuyog Pawar                 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2524*c83a76b0SSuyog Pawar 
2525*c83a76b0SSuyog Pawar                 if(vert_ctr > ps_tile_params->i4_first_ctb_y)
2526*c83a76b0SSuyog Pawar                 {
2527*c83a76b0SSuyog Pawar                     /*For last ctb check top dep only*/
2528*c83a76b0SSuyog Pawar                     if((vert_ctr > 1) && ((ctb_ctr + 1) == ctb_end))
2529*c83a76b0SSuyog Pawar                     {
2530*c83a76b0SSuyog Pawar                         sao_offset = 1;
2531*c83a76b0SSuyog Pawar                     }
2532*c83a76b0SSuyog Pawar 
2533*c83a76b0SSuyog Pawar                     ihevce_dmgr_chk_row_row_sync(
2534*c83a76b0SSuyog Pawar                         pv_dep_mngr_enc_loop_sao,
2535*c83a76b0SSuyog Pawar                         ctb_ctr,
2536*c83a76b0SSuyog Pawar                         sao_offset,
2537*c83a76b0SSuyog Pawar                         sao_check_dep_pos,
2538*c83a76b0SSuyog Pawar                         ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2539*c83a76b0SSuyog Pawar                         ps_ctxt->thrd_id);
2540*c83a76b0SSuyog Pawar 
2541*c83a76b0SSuyog Pawar                     /* Call the sao function to do sao for the current ctb*/
2542*c83a76b0SSuyog Pawar 
2543*c83a76b0SSuyog Pawar                     /* Register the curr ctb's x pos in sao context*/
2544*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_ctb_x = ctb_ctr;
2545*c83a76b0SSuyog Pawar 
2546*c83a76b0SSuyog Pawar                     /* Register the curr ctb's y pos in sao context*/
2547*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2548*c83a76b0SSuyog Pawar 
2549*c83a76b0SSuyog Pawar                     ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2550*c83a76b0SSuyog Pawar                                      (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2551*c83a76b0SSuyog Pawar                     ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2552*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2553*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2554*c83a76b0SSuyog Pawar 
2555*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_is_last_ctb_row = 0;
2556*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_is_last_ctb_col = 0;
2557*c83a76b0SSuyog Pawar 
2558*c83a76b0SSuyog Pawar                     if((ctb_ctr + 1) == ctb_end)
2559*c83a76b0SSuyog Pawar                     {
2560*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_is_last_ctb_col = 1;
2561*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_sao_blk_wd =
2562*c83a76b0SSuyog Pawar                             ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2563*c83a76b0SSuyog Pawar                                         ps_tile_params->i4_curr_tile_width);
2564*c83a76b0SSuyog Pawar                     }
2565*c83a76b0SSuyog Pawar 
2566*c83a76b0SSuyog Pawar                     /* Calculate the recon buf pointer and stride for teh current ctb */
2567*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2568*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2569*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2570*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2571*c83a76b0SSuyog Pawar 
2572*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2573*c83a76b0SSuyog Pawar 
2574*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2575*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2576*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2577*c83a76b0SSuyog Pawar                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2578*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2579*c83a76b0SSuyog Pawar 
2580*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2581*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2582*c83a76b0SSuyog Pawar 
2583*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2584*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2585*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2586*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2587*c83a76b0SSuyog Pawar 
2588*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2589*c83a76b0SSuyog Pawar 
2590*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2591*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2592*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2593*c83a76b0SSuyog Pawar                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2594*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2595*c83a76b0SSuyog Pawar 
2596*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2597*c83a76b0SSuyog Pawar 
2598*c83a76b0SSuyog Pawar                     /* Calculate the pointer to buff to store the (x,y)th sao
2599*c83a76b0SSuyog Pawar                     * for the top merge of (x,y+1)th ctb
2600*c83a76b0SSuyog Pawar                     */
2601*c83a76b0SSuyog Pawar                     ps_sao_ctxt->ps_top_ctb_sao =
2602*c83a76b0SSuyog Pawar                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2603*c83a76b0SSuyog Pawar                                                          [ps_sao_ctxt->i4_ctb_x +
2604*c83a76b0SSuyog Pawar                                                           (ps_sao_ctxt->i4_ctb_y) *
2605*c83a76b0SSuyog Pawar                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2606*c83a76b0SSuyog Pawar                                                           (ps_ctxt->i4_bitrate_instance_num *
2607*c83a76b0SSuyog Pawar                                                            ps_sao_ctxt->i4_num_ctb_units)];
2608*c83a76b0SSuyog Pawar 
2609*c83a76b0SSuyog Pawar                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2610*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2611*c83a76b0SSuyog Pawar                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2612*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2613*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2614*c83a76b0SSuyog Pawar                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2615*c83a76b0SSuyog Pawar                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2616*c83a76b0SSuyog Pawar 
2617*c83a76b0SSuyog Pawar                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2618*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2619*c83a76b0SSuyog Pawar                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2620*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2621*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2622*c83a76b0SSuyog Pawar                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2623*c83a76b0SSuyog Pawar                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2624*c83a76b0SSuyog Pawar 
2625*c83a76b0SSuyog Pawar                     {
2626*c83a76b0SSuyog Pawar                         UWORD32 u4_ctb_sao_bits;
2627*c83a76b0SSuyog Pawar 
2628*c83a76b0SSuyog Pawar                         ihevce_sao_analyse(
2629*c83a76b0SSuyog Pawar                             &ps_ctxt->s_sao_ctxt_t,
2630*c83a76b0SSuyog Pawar                             ps_ctb_out_sao,
2631*c83a76b0SSuyog Pawar                             &u4_ctb_sao_bits,
2632*c83a76b0SSuyog Pawar                             ps_tile_params);
2633*c83a76b0SSuyog Pawar                         ps_ctxt
2634*c83a76b0SSuyog Pawar                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2635*c83a76b0SSuyog Pawar                                                      [ps_ctxt->i4_bitrate_instance_num]
2636*c83a76b0SSuyog Pawar                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2637*c83a76b0SSuyog Pawar                         ps_ctxt
2638*c83a76b0SSuyog Pawar                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2639*c83a76b0SSuyog Pawar                                                      [ps_ctxt->i4_bitrate_instance_num]
2640*c83a76b0SSuyog Pawar                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2641*c83a76b0SSuyog Pawar                     }
2642*c83a76b0SSuyog Pawar                     /** Subpel generation not done for non-ref picture **/
2643*c83a76b0SSuyog Pawar                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2644*c83a76b0SSuyog Pawar                     {
2645*c83a76b0SSuyog Pawar                         /* Recon Padding */
2646*c83a76b0SSuyog Pawar                         ihevce_recon_padding(
2647*c83a76b0SSuyog Pawar                             ps_pad_interp_recon,
2648*c83a76b0SSuyog Pawar                             ctb_ctr,
2649*c83a76b0SSuyog Pawar                             vert_ctr - 1,
2650*c83a76b0SSuyog Pawar                             ps_frm_ctb_prms,
2651*c83a76b0SSuyog Pawar                             ps_ctxt->ps_func_selector);
2652*c83a76b0SSuyog Pawar                     }
2653*c83a76b0SSuyog Pawar                     /* update the number of SAO ctbs for this row */
2654*c83a76b0SSuyog Pawar                     ihevce_dmgr_set_row_row_sync(
2655*c83a76b0SSuyog Pawar                         pv_dep_mngr_enc_loop_sao,
2656*c83a76b0SSuyog Pawar                         ctb_ctr + 1,
2657*c83a76b0SSuyog Pawar                         vert_ctr - 1,
2658*c83a76b0SSuyog Pawar                         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2659*c83a76b0SSuyog Pawar                 }
2660*c83a76b0SSuyog Pawar             }
2661*c83a76b0SSuyog Pawar             else  //SAO Disabled
2662*c83a76b0SSuyog Pawar             {
2663*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2664*c83a76b0SSuyog Pawar                 {
2665*c83a76b0SSuyog Pawar                     /* Recon Padding */
2666*c83a76b0SSuyog Pawar                     ihevce_recon_padding(
2667*c83a76b0SSuyog Pawar                         ps_pad_interp_recon,
2668*c83a76b0SSuyog Pawar                         ctb_ctr,
2669*c83a76b0SSuyog Pawar                         vert_ctr,
2670*c83a76b0SSuyog Pawar                         ps_frm_ctb_prms,
2671*c83a76b0SSuyog Pawar                         ps_ctxt->ps_func_selector);
2672*c83a76b0SSuyog Pawar                 }
2673*c83a76b0SSuyog Pawar             }
2674*c83a76b0SSuyog Pawar         }  // end of SAO for loop
2675*c83a76b0SSuyog Pawar 
2676*c83a76b0SSuyog Pawar         /* Call the sao function again for the last ctb row of frame */
2677*c83a76b0SSuyog Pawar         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2678*c83a76b0SSuyog Pawar            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2679*c83a76b0SSuyog Pawar         {
2680*c83a76b0SSuyog Pawar             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2681*c83a76b0SSuyog Pawar 
2682*c83a76b0SSuyog Pawar             if(vert_ctr ==
2683*c83a76b0SSuyog Pawar                (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2684*c83a76b0SSuyog Pawar             {
2685*c83a76b0SSuyog Pawar                 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2686*c83a76b0SSuyog Pawar                 {
2687*c83a76b0SSuyog Pawar                     /* Register the curr ctb's x pos in sao context*/
2688*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2689*c83a76b0SSuyog Pawar 
2690*c83a76b0SSuyog Pawar                     /* Register the curr ctb's y pos in sao context*/
2691*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2692*c83a76b0SSuyog Pawar 
2693*c83a76b0SSuyog Pawar                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2694*c83a76b0SSuyog Pawar                                      vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2695*c83a76b0SSuyog Pawar 
2696*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2697*c83a76b0SSuyog Pawar 
2698*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2699*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2700*c83a76b0SSuyog Pawar 
2701*c83a76b0SSuyog Pawar                     if((ctb_ctr + 1) == ctb_end)
2702*c83a76b0SSuyog Pawar                     {
2703*c83a76b0SSuyog Pawar                         ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2704*c83a76b0SSuyog Pawar                         ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2705*c83a76b0SSuyog Pawar                             ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2706*c83a76b0SSuyog Pawar                                         ps_tile_params->i4_curr_tile_width);
2707*c83a76b0SSuyog Pawar                     }
2708*c83a76b0SSuyog Pawar 
2709*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2710*c83a76b0SSuyog Pawar                         ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2711*c83a76b0SSuyog Pawar                                     ps_tile_params->i4_curr_tile_height);
2712*c83a76b0SSuyog Pawar 
2713*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2714*c83a76b0SSuyog Pawar 
2715*c83a76b0SSuyog Pawar                     /* Calculate the recon buf pointer and stride for teh current ctb */
2716*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2717*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2718*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2719*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2720*c83a76b0SSuyog Pawar 
2721*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2722*c83a76b0SSuyog Pawar 
2723*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2724*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2725*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2726*c83a76b0SSuyog Pawar                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2727*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2728*c83a76b0SSuyog Pawar 
2729*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2730*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2731*c83a76b0SSuyog Pawar 
2732*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2733*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2734*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2735*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2736*c83a76b0SSuyog Pawar 
2737*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2738*c83a76b0SSuyog Pawar 
2739*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2740*c83a76b0SSuyog Pawar                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2741*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2742*c83a76b0SSuyog Pawar                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2743*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2744*c83a76b0SSuyog Pawar 
2745*c83a76b0SSuyog Pawar                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2746*c83a76b0SSuyog Pawar 
2747*c83a76b0SSuyog Pawar                     /* Calculate the pointer to buff to store the (x,y)th sao
2748*c83a76b0SSuyog Pawar                     * for the top merge of (x,y+1)th ctb
2749*c83a76b0SSuyog Pawar                     */
2750*c83a76b0SSuyog Pawar                     ps_sao_ctxt->ps_top_ctb_sao =
2751*c83a76b0SSuyog Pawar                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2752*c83a76b0SSuyog Pawar                                                          [ps_sao_ctxt->i4_ctb_x +
2753*c83a76b0SSuyog Pawar                                                           (ps_sao_ctxt->i4_ctb_y) *
2754*c83a76b0SSuyog Pawar                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2755*c83a76b0SSuyog Pawar                                                           (ps_ctxt->i4_bitrate_instance_num *
2756*c83a76b0SSuyog Pawar                                                            ps_sao_ctxt->i4_num_ctb_units)];
2757*c83a76b0SSuyog Pawar 
2758*c83a76b0SSuyog Pawar                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2759*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2760*c83a76b0SSuyog Pawar                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2761*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2762*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2763*c83a76b0SSuyog Pawar                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2764*c83a76b0SSuyog Pawar                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2765*c83a76b0SSuyog Pawar 
2766*c83a76b0SSuyog Pawar                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2767*c83a76b0SSuyog Pawar                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2768*c83a76b0SSuyog Pawar                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2769*c83a76b0SSuyog Pawar                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2770*c83a76b0SSuyog Pawar                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2771*c83a76b0SSuyog Pawar                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2772*c83a76b0SSuyog Pawar                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2773*c83a76b0SSuyog Pawar 
2774*c83a76b0SSuyog Pawar                     {
2775*c83a76b0SSuyog Pawar                         UWORD32 u4_ctb_sao_bits;
2776*c83a76b0SSuyog Pawar                         ihevce_sao_analyse(
2777*c83a76b0SSuyog Pawar                             &ps_ctxt->s_sao_ctxt_t,
2778*c83a76b0SSuyog Pawar                             ps_ctb_out_sao,
2779*c83a76b0SSuyog Pawar                             &u4_ctb_sao_bits,
2780*c83a76b0SSuyog Pawar                             ps_tile_params);
2781*c83a76b0SSuyog Pawar                         ps_ctxt
2782*c83a76b0SSuyog Pawar                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2783*c83a76b0SSuyog Pawar                                                      [ps_ctxt->i4_bitrate_instance_num]
2784*c83a76b0SSuyog Pawar                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2785*c83a76b0SSuyog Pawar                         ps_ctxt
2786*c83a76b0SSuyog Pawar                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2787*c83a76b0SSuyog Pawar                                                      [ps_ctxt->i4_bitrate_instance_num]
2788*c83a76b0SSuyog Pawar                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2789*c83a76b0SSuyog Pawar                     }
2790*c83a76b0SSuyog Pawar                     /** Subpel generation not done for non-ref picture **/
2791*c83a76b0SSuyog Pawar                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2792*c83a76b0SSuyog Pawar                     {
2793*c83a76b0SSuyog Pawar                         /* Recon Padding */
2794*c83a76b0SSuyog Pawar                         ihevce_recon_padding(
2795*c83a76b0SSuyog Pawar                             ps_pad_interp_recon,
2796*c83a76b0SSuyog Pawar                             ctb_ctr,
2797*c83a76b0SSuyog Pawar                             vert_ctr,
2798*c83a76b0SSuyog Pawar                             ps_frm_ctb_prms,
2799*c83a76b0SSuyog Pawar                             ps_ctxt->ps_func_selector);
2800*c83a76b0SSuyog Pawar                     }
2801*c83a76b0SSuyog Pawar                 }
2802*c83a76b0SSuyog Pawar             }  //end of loop over CTBs in current CTB-row
2803*c83a76b0SSuyog Pawar         }
2804*c83a76b0SSuyog Pawar 
2805*c83a76b0SSuyog Pawar         /* Subpel Plane Generation*/
2806*c83a76b0SSuyog Pawar         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2807*c83a76b0SSuyog Pawar         {
2808*c83a76b0SSuyog Pawar             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2809*c83a76b0SSuyog Pawar                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2810*c83a76b0SSuyog Pawar             {
2811*c83a76b0SSuyog Pawar                 if(0 != vert_ctr)
2812*c83a76b0SSuyog Pawar                 {
2813*c83a76b0SSuyog Pawar                     /** Subpel generation not done for non-ref picture **/
2814*c83a76b0SSuyog Pawar                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2815*c83a76b0SSuyog Pawar                     {
2816*c83a76b0SSuyog Pawar                         /* Padding and Subpel Plane Generation */
2817*c83a76b0SSuyog Pawar                         ihevce_pad_interp_recon_ctb(
2818*c83a76b0SSuyog Pawar                             ps_pad_interp_recon,
2819*c83a76b0SSuyog Pawar                             ctb_ctr,
2820*c83a76b0SSuyog Pawar                             vert_ctr - 1,
2821*c83a76b0SSuyog Pawar                             ps_ctxt->i4_quality_preset,
2822*c83a76b0SSuyog Pawar                             ps_frm_ctb_prms,
2823*c83a76b0SSuyog Pawar                             ps_ctxt->ai2_scratch,
2824*c83a76b0SSuyog Pawar                             ps_ctxt->i4_bitrate_instance_num,
2825*c83a76b0SSuyog Pawar                             ps_ctxt->ps_func_selector);
2826*c83a76b0SSuyog Pawar                     }
2827*c83a76b0SSuyog Pawar                 }
2828*c83a76b0SSuyog Pawar             }
2829*c83a76b0SSuyog Pawar             else
2830*c83a76b0SSuyog Pawar             {  // SAO Disabled
2831*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2832*c83a76b0SSuyog Pawar                 {
2833*c83a76b0SSuyog Pawar                     /* Padding and Subpel Plane Generation */
2834*c83a76b0SSuyog Pawar                     ihevce_pad_interp_recon_ctb(
2835*c83a76b0SSuyog Pawar                         ps_pad_interp_recon,
2836*c83a76b0SSuyog Pawar                         ctb_ctr,
2837*c83a76b0SSuyog Pawar                         vert_ctr,
2838*c83a76b0SSuyog Pawar                         ps_ctxt->i4_quality_preset,
2839*c83a76b0SSuyog Pawar                         ps_frm_ctb_prms,
2840*c83a76b0SSuyog Pawar                         ps_ctxt->ai2_scratch,
2841*c83a76b0SSuyog Pawar                         ps_ctxt->i4_bitrate_instance_num,
2842*c83a76b0SSuyog Pawar                         ps_ctxt->ps_func_selector);
2843*c83a76b0SSuyog Pawar                 }
2844*c83a76b0SSuyog Pawar             }
2845*c83a76b0SSuyog Pawar         }
2846*c83a76b0SSuyog Pawar 
2847*c83a76b0SSuyog Pawar         {
2848*c83a76b0SSuyog Pawar             if(!ps_ctxt->i4_bitrate_instance_num)
2849*c83a76b0SSuyog Pawar             {
2850*c83a76b0SSuyog Pawar                 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2851*c83a76b0SSuyog Pawar                    ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2852*c83a76b0SSuyog Pawar                 {
2853*c83a76b0SSuyog Pawar                     /* If SAO is on, then signal completion of previous CTB row */
2854*c83a76b0SSuyog Pawar                     if(0 != vert_ctr)
2855*c83a76b0SSuyog Pawar                     {
2856*c83a76b0SSuyog Pawar                         {
2857*c83a76b0SSuyog Pawar                             WORD32 post_ctb_ctr;
2858*c83a76b0SSuyog Pawar 
2859*c83a76b0SSuyog Pawar                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2860*c83a76b0SSuyog Pawar                             {
2861*c83a76b0SSuyog Pawar                                 ihevce_dmgr_map_set_sync(
2862*c83a76b0SSuyog Pawar                                     pv_dep_mngr_me_dep_encloop,
2863*c83a76b0SSuyog Pawar                                     post_ctb_ctr,
2864*c83a76b0SSuyog Pawar                                     (vert_ctr - 1),
2865*c83a76b0SSuyog Pawar                                     MAP_CTB_COMPLETE);
2866*c83a76b0SSuyog Pawar                             }
2867*c83a76b0SSuyog Pawar                         }
2868*c83a76b0SSuyog Pawar                     }
2869*c83a76b0SSuyog Pawar                 }
2870*c83a76b0SSuyog Pawar                 else
2871*c83a76b0SSuyog Pawar                 {
2872*c83a76b0SSuyog Pawar                     {
2873*c83a76b0SSuyog Pawar                         WORD32 post_ctb_ctr;
2874*c83a76b0SSuyog Pawar 
2875*c83a76b0SSuyog Pawar                         for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2876*c83a76b0SSuyog Pawar                         {
2877*c83a76b0SSuyog Pawar                             ihevce_dmgr_map_set_sync(
2878*c83a76b0SSuyog Pawar                                 pv_dep_mngr_me_dep_encloop,
2879*c83a76b0SSuyog Pawar                                 post_ctb_ctr,
2880*c83a76b0SSuyog Pawar                                 vert_ctr,
2881*c83a76b0SSuyog Pawar                                 MAP_CTB_COMPLETE);
2882*c83a76b0SSuyog Pawar                         }
2883*c83a76b0SSuyog Pawar                     }
2884*c83a76b0SSuyog Pawar                 }
2885*c83a76b0SSuyog Pawar             }
2886*c83a76b0SSuyog Pawar         }
2887*c83a76b0SSuyog Pawar 
2888*c83a76b0SSuyog Pawar         /*process last ctb row*/
2889*c83a76b0SSuyog Pawar         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2890*c83a76b0SSuyog Pawar            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2891*c83a76b0SSuyog Pawar         {
2892*c83a76b0SSuyog Pawar             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2893*c83a76b0SSuyog Pawar 
2894*c83a76b0SSuyog Pawar             if(vert_ctr ==
2895*c83a76b0SSuyog Pawar                (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2896*c83a76b0SSuyog Pawar             {
2897*c83a76b0SSuyog Pawar                 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2898*c83a76b0SSuyog Pawar                 {
2899*c83a76b0SSuyog Pawar                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2900*c83a76b0SSuyog Pawar                     {
2901*c83a76b0SSuyog Pawar                         /* Padding and Subpel Plane Generation */
2902*c83a76b0SSuyog Pawar                         ihevce_pad_interp_recon_ctb(
2903*c83a76b0SSuyog Pawar                             ps_pad_interp_recon,
2904*c83a76b0SSuyog Pawar                             ctb_ctr,
2905*c83a76b0SSuyog Pawar                             vert_ctr,
2906*c83a76b0SSuyog Pawar                             ps_ctxt->i4_quality_preset,
2907*c83a76b0SSuyog Pawar                             ps_frm_ctb_prms,
2908*c83a76b0SSuyog Pawar                             ps_ctxt->ai2_scratch,
2909*c83a76b0SSuyog Pawar                             ps_ctxt->i4_bitrate_instance_num,
2910*c83a76b0SSuyog Pawar                             ps_ctxt->ps_func_selector);
2911*c83a76b0SSuyog Pawar                     }
2912*c83a76b0SSuyog Pawar                 }
2913*c83a76b0SSuyog Pawar             }
2914*c83a76b0SSuyog Pawar             /* If SAO is on, then signal completion of the last CTB row of frame */
2915*c83a76b0SSuyog Pawar             {
2916*c83a76b0SSuyog Pawar                 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
2917*c83a76b0SSuyog Pawar                 {
2918*c83a76b0SSuyog Pawar                     if(!ps_ctxt->i4_bitrate_instance_num)
2919*c83a76b0SSuyog Pawar                     {
2920*c83a76b0SSuyog Pawar                         {
2921*c83a76b0SSuyog Pawar                             WORD32 post_ctb_ctr;
2922*c83a76b0SSuyog Pawar 
2923*c83a76b0SSuyog Pawar                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2924*c83a76b0SSuyog Pawar                             {
2925*c83a76b0SSuyog Pawar                                 ihevce_dmgr_map_set_sync(
2926*c83a76b0SSuyog Pawar                                     pv_dep_mngr_me_dep_encloop,
2927*c83a76b0SSuyog Pawar                                     post_ctb_ctr,
2928*c83a76b0SSuyog Pawar                                     vert_ctr,
2929*c83a76b0SSuyog Pawar                                     MAP_CTB_COMPLETE);
2930*c83a76b0SSuyog Pawar                             }
2931*c83a76b0SSuyog Pawar                         }
2932*c83a76b0SSuyog Pawar                     }
2933*c83a76b0SSuyog Pawar                 }
2934*c83a76b0SSuyog Pawar             }
2935*c83a76b0SSuyog Pawar         }
2936*c83a76b0SSuyog Pawar     }
2937*c83a76b0SSuyog Pawar 
2938*c83a76b0SSuyog Pawar     return;
2939*c83a76b0SSuyog Pawar }
2940*c83a76b0SSuyog Pawar 
2941*c83a76b0SSuyog Pawar /*!
2942*c83a76b0SSuyog Pawar ******************************************************************************
2943*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_pass \endif
2944*c83a76b0SSuyog Pawar *
2945*c83a76b0SSuyog Pawar * \brief
2946*c83a76b0SSuyog Pawar *    Frame level enc_loop pass function
2947*c83a76b0SSuyog Pawar *
2948*c83a76b0SSuyog Pawar * \param[in] pv_ctxt : pointer to enc_loop module
2949*c83a76b0SSuyog Pawar * \param[in] ps_frm_lamda : Frame level Lambda params
2950*c83a76b0SSuyog Pawar * \param[in] ps_inp  : pointer to input yuv buffer (frame buffer)
2951*c83a76b0SSuyog Pawar * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
2952*c83a76b0SSuyog Pawar * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
2953*c83a76b0SSuyog Pawar * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
2954*c83a76b0SSuyog Pawar * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
2955*c83a76b0SSuyog Pawar * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
2956*c83a76b0SSuyog Pawar * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
2957*c83a76b0SSuyog Pawar *
2958*c83a76b0SSuyog Pawar * \return
2959*c83a76b0SSuyog Pawar *    None
2960*c83a76b0SSuyog Pawar *
2961*c83a76b0SSuyog Pawar * Note : Currently the frame level calcualtions done assumes that
2962*c83a76b0SSuyog Pawar *        framewidth of the input /recon are excat multiple of ctbsize
2963*c83a76b0SSuyog Pawar *
2964*c83a76b0SSuyog Pawar * \author
2965*c83a76b0SSuyog Pawar *  Ittiam
2966*c83a76b0SSuyog Pawar *
2967*c83a76b0SSuyog Pawar *****************************************************************************
2968*c83a76b0SSuyog Pawar */
ihevce_enc_loop_process(void * pv_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,ctb_analyse_t * ps_ctb_in,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse,recon_pic_buf_t * ps_frm_recon,cur_ctb_cu_tree_t * ps_cu_tree_out,ctb_enc_loop_out_t * ps_ctb_out,cu_enc_loop_out_t * ps_cu_out,tu_enc_loop_out_t * ps_tu_out,pu_t * ps_pu_out,UWORD8 * pu1_frm_ecd_data,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_pass)2969*c83a76b0SSuyog Pawar void ihevce_enc_loop_process(
2970*c83a76b0SSuyog Pawar     void *pv_ctxt,
2971*c83a76b0SSuyog Pawar     ihevce_lap_enc_buf_t *ps_curr_inp,
2972*c83a76b0SSuyog Pawar     ctb_analyse_t *ps_ctb_in,
2973*c83a76b0SSuyog Pawar     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
2974*c83a76b0SSuyog Pawar     recon_pic_buf_t *ps_frm_recon,
2975*c83a76b0SSuyog Pawar     cur_ctb_cu_tree_t *ps_cu_tree_out,
2976*c83a76b0SSuyog Pawar     ctb_enc_loop_out_t *ps_ctb_out,
2977*c83a76b0SSuyog Pawar     cu_enc_loop_out_t *ps_cu_out,
2978*c83a76b0SSuyog Pawar     tu_enc_loop_out_t *ps_tu_out,
2979*c83a76b0SSuyog Pawar     pu_t *ps_pu_out,
2980*c83a76b0SSuyog Pawar     UWORD8 *pu1_frm_ecd_data,
2981*c83a76b0SSuyog Pawar     frm_ctb_ctxt_t *ps_frm_ctb_prms,
2982*c83a76b0SSuyog Pawar     frm_lambda_ctxt_t *ps_frm_lamda,
2983*c83a76b0SSuyog Pawar     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
2984*c83a76b0SSuyog Pawar     WORD32 thrd_id,
2985*c83a76b0SSuyog Pawar     WORD32 i4_enc_frm_id,
2986*c83a76b0SSuyog Pawar     WORD32 i4_pass)
2987*c83a76b0SSuyog Pawar {
2988*c83a76b0SSuyog Pawar     WORD32 vert_ctr;
2989*c83a76b0SSuyog Pawar     WORD32 tile_col_idx;
2990*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_t s_curr_src_bufs;
2991*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_t s_curr_recon_bufs;
2992*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
2993*c83a76b0SSuyog Pawar     UWORD32 *pu4_pu_offsets;
2994*c83a76b0SSuyog Pawar     WORD32 end_of_frame;
2995*c83a76b0SSuyog Pawar     UWORD8 *apu1_y_sub_pel_planes[3];
2996*c83a76b0SSuyog Pawar     pad_interp_recon_frm_t s_pad_interp_recon;
2997*c83a76b0SSuyog Pawar     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
2998*c83a76b0SSuyog Pawar 
2999*c83a76b0SSuyog Pawar     ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3000*c83a76b0SSuyog Pawar 
3001*c83a76b0SSuyog Pawar     WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3002*c83a76b0SSuyog Pawar 
3003*c83a76b0SSuyog Pawar     /* initialize the closed loop lambda for the current frame */
3004*c83a76b0SSuyog Pawar     ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3005*c83a76b0SSuyog Pawar     ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3006*c83a76b0SSuyog Pawar     ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3007*c83a76b0SSuyog Pawar     ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3008*c83a76b0SSuyog Pawar     ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3009*c83a76b0SSuyog Pawar     ps_ctxt->thrd_id = thrd_id;
3010*c83a76b0SSuyog Pawar     ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3011*c83a76b0SSuyog Pawar 
3012*c83a76b0SSuyog Pawar #if DISABLE_SAO_WHEN_NOISY
3013*c83a76b0SSuyog Pawar     ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3014*c83a76b0SSuyog Pawar     ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3015*c83a76b0SSuyog Pawar #endif
3016*c83a76b0SSuyog Pawar 
3017*c83a76b0SSuyog Pawar #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3018*c83a76b0SSuyog Pawar     ps_ctxt->pv_err_func_selector = ps_func_selector;
3019*c83a76b0SSuyog Pawar #endif
3020*c83a76b0SSuyog Pawar 
3021*c83a76b0SSuyog Pawar     ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3022*c83a76b0SSuyog Pawar         ps_frm_recon->i4_deblk_pad_hpel_cur_pic ||
3023*c83a76b0SSuyog Pawar         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3024*c83a76b0SSuyog Pawar         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag;
3025*c83a76b0SSuyog Pawar 
3026*c83a76b0SSuyog Pawar     /* Share all reference pictures with nbr clients. This flag will be used only
3027*c83a76b0SSuyog Pawar     in case of dist-enc mode */
3028*c83a76b0SSuyog Pawar     ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3029*c83a76b0SSuyog Pawar     ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3030*c83a76b0SSuyog Pawar 
3031*c83a76b0SSuyog Pawar     /* Register the frame level ssd lamda for both luma and chroma*/
3032*c83a76b0SSuyog Pawar     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3033*c83a76b0SSuyog Pawar     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3034*c83a76b0SSuyog Pawar 
3035*c83a76b0SSuyog Pawar     ihevce_populate_cl_cu_lambda_prms(
3036*c83a76b0SSuyog Pawar         ps_ctxt,
3037*c83a76b0SSuyog Pawar         ps_frm_lamda,
3038*c83a76b0SSuyog Pawar         (WORD32)ps_ctxt->i1_slice_type,
3039*c83a76b0SSuyog Pawar         ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3040*c83a76b0SSuyog Pawar         ENC_LOOP_LAMBDA_TYPE);
3041*c83a76b0SSuyog Pawar 
3042*c83a76b0SSuyog Pawar     ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3043*c83a76b0SSuyog Pawar                                      (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3044*c83a76b0SSuyog Pawar                                      (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3045*c83a76b0SSuyog Pawar 
3046*c83a76b0SSuyog Pawar     end_of_frame = 0;
3047*c83a76b0SSuyog Pawar 
3048*c83a76b0SSuyog Pawar     /* ----------------------------------------------------- */
3049*c83a76b0SSuyog Pawar     /* store the stride and dimensions of source and recon   */
3050*c83a76b0SSuyog Pawar     /* buffer pointers will be over written at every CTB row */
3051*c83a76b0SSuyog Pawar     /* ----------------------------------------------------- */
3052*c83a76b0SSuyog Pawar     memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3053*c83a76b0SSuyog Pawar 
3054*c83a76b0SSuyog Pawar     memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3055*c83a76b0SSuyog Pawar 
3056*c83a76b0SSuyog Pawar     memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3057*c83a76b0SSuyog Pawar 
3058*c83a76b0SSuyog Pawar     /* get the frame level pu offset pointer*/
3059*c83a76b0SSuyog Pawar     pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3060*c83a76b0SSuyog Pawar 
3061*c83a76b0SSuyog Pawar     s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3062*c83a76b0SSuyog Pawar 
3063*c83a76b0SSuyog Pawar     /* ------------ Loop over all the CTB rows --------------- */
3064*c83a76b0SSuyog Pawar     while(0 == end_of_frame)
3065*c83a76b0SSuyog Pawar     {
3066*c83a76b0SSuyog Pawar         UWORD8 *pu1_tmp;
3067*c83a76b0SSuyog Pawar         UWORD8 *pu1_row_pu_map;
3068*c83a76b0SSuyog Pawar         UWORD8 *pu1_row_ecd_data;
3069*c83a76b0SSuyog Pawar         ctb_analyse_t *ps_ctb_row_in;
3070*c83a76b0SSuyog Pawar         ctb_enc_loop_out_t *ps_ctb_row_out;
3071*c83a76b0SSuyog Pawar         cu_enc_loop_out_t *ps_row_cu;
3072*c83a76b0SSuyog Pawar         tu_enc_loop_out_t *ps_row_tu;
3073*c83a76b0SSuyog Pawar         pu_t *ps_row_pu;
3074*c83a76b0SSuyog Pawar         pu_col_mv_t *ps_row_col_pu;
3075*c83a76b0SSuyog Pawar         job_queue_t *ps_job;
3076*c83a76b0SSuyog Pawar         UWORD32 *pu4_pu_row_offsets;
3077*c83a76b0SSuyog Pawar         UWORD16 *pu2_num_pu_row;
3078*c83a76b0SSuyog Pawar 
3079*c83a76b0SSuyog Pawar         ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3080*c83a76b0SSuyog Pawar         cur_ctb_cu_tree_t *ps_row_cu_tree;
3081*c83a76b0SSuyog Pawar         UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3082*c83a76b0SSuyog Pawar 
3083*c83a76b0SSuyog Pawar         /* Get the current row from the job queue */
3084*c83a76b0SSuyog Pawar         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3085*c83a76b0SSuyog Pawar             ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3086*c83a76b0SSuyog Pawar 
3087*c83a76b0SSuyog Pawar         /* Register the pointer to ctb out of the current frame*/
3088*c83a76b0SSuyog Pawar         ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3089*c83a76b0SSuyog Pawar 
3090*c83a76b0SSuyog Pawar         /* If all rows are done, set the end of process flag to 1, */
3091*c83a76b0SSuyog Pawar         /* and the current row to -1 */
3092*c83a76b0SSuyog Pawar         if(NULL == ps_job)
3093*c83a76b0SSuyog Pawar         {
3094*c83a76b0SSuyog Pawar             vert_ctr = -1;
3095*c83a76b0SSuyog Pawar             tile_col_idx = -1;
3096*c83a76b0SSuyog Pawar             end_of_frame = 1;
3097*c83a76b0SSuyog Pawar         }
3098*c83a76b0SSuyog Pawar         else
3099*c83a76b0SSuyog Pawar         {
3100*c83a76b0SSuyog Pawar             ihevce_tile_params_t *ps_col_tile_params_temp;
3101*c83a76b0SSuyog Pawar             ihevce_tile_params_t *ps_tile_params;
3102*c83a76b0SSuyog Pawar             WORD32 i4_tile_id;
3103*c83a76b0SSuyog Pawar 
3104*c83a76b0SSuyog Pawar             ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3105*c83a76b0SSuyog Pawar             /* set the output dependency */
3106*c83a76b0SSuyog Pawar             ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3107*c83a76b0SSuyog Pawar 
3108*c83a76b0SSuyog Pawar             /* Obtain the current row's details from the job */
3109*c83a76b0SSuyog Pawar             vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3110*c83a76b0SSuyog Pawar             {
3111*c83a76b0SSuyog Pawar                 /* Obtain the current colum tile index from the job */
3112*c83a76b0SSuyog Pawar                 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3113*c83a76b0SSuyog Pawar 
3114*c83a76b0SSuyog Pawar                 /* The tile parameter for the col. idx. Use only the properties
3115*c83a76b0SSuyog Pawar                 which is same for all the bottom tiles like width, start_x, etc.
3116*c83a76b0SSuyog Pawar                 Don't use height, start_y, etc.                                  */
3117*c83a76b0SSuyog Pawar                 ps_col_tile_params_temp =
3118*c83a76b0SSuyog Pawar                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3119*c83a76b0SSuyog Pawar 
3120*c83a76b0SSuyog Pawar                 /* Derive actual tile_id based on vert_ctr */
3121*c83a76b0SSuyog Pawar                 i4_tile_id =
3122*c83a76b0SSuyog Pawar                     *(ps_frm_ctb_prms->pi4_tile_id_map +
3123*c83a76b0SSuyog Pawar                       vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3124*c83a76b0SSuyog Pawar                       ps_col_tile_params_temp->i4_first_ctb_x);
3125*c83a76b0SSuyog Pawar                 /* Derive pointer to current tile prms */
3126*c83a76b0SSuyog Pawar                 ps_tile_params =
3127*c83a76b0SSuyog Pawar                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3128*c83a76b0SSuyog Pawar             }
3129*c83a76b0SSuyog Pawar 
3130*c83a76b0SSuyog Pawar             ps_ctxt->i4_tile_col_idx = tile_col_idx;
3131*c83a76b0SSuyog Pawar             /* derive the current ctb row pointers */
3132*c83a76b0SSuyog Pawar 
3133*c83a76b0SSuyog Pawar             /* luma src */
3134*c83a76b0SSuyog Pawar             pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3135*c83a76b0SSuyog Pawar                       (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3136*c83a76b0SSuyog Pawar                        ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3137*c83a76b0SSuyog Pawar                       ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3138*c83a76b0SSuyog Pawar 
3139*c83a76b0SSuyog Pawar             pu1_tmp +=
3140*c83a76b0SSuyog Pawar                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3141*c83a76b0SSuyog Pawar                  ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3142*c83a76b0SSuyog Pawar 
3143*c83a76b0SSuyog Pawar             s_curr_src_bufs.pv_y_buf = pu1_tmp;
3144*c83a76b0SSuyog Pawar 
3145*c83a76b0SSuyog Pawar             if(!ps_ctxt->u1_is_input_data_hbd)
3146*c83a76b0SSuyog Pawar             {
3147*c83a76b0SSuyog Pawar                 /* cb src */
3148*c83a76b0SSuyog Pawar                 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3149*c83a76b0SSuyog Pawar                 pu1_tmp +=
3150*c83a76b0SSuyog Pawar                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3151*c83a76b0SSuyog Pawar                      ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3152*c83a76b0SSuyog Pawar 
3153*c83a76b0SSuyog Pawar                 s_curr_src_bufs.pv_u_buf = pu1_tmp;
3154*c83a76b0SSuyog Pawar             }
3155*c83a76b0SSuyog Pawar 
3156*c83a76b0SSuyog Pawar             /* luma recon */
3157*c83a76b0SSuyog Pawar             pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3158*c83a76b0SSuyog Pawar             pu1_tmp +=
3159*c83a76b0SSuyog Pawar                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3160*c83a76b0SSuyog Pawar 
3161*c83a76b0SSuyog Pawar             s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3162*c83a76b0SSuyog Pawar             s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3163*c83a76b0SSuyog Pawar             s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3164*c83a76b0SSuyog Pawar             if(!ps_ctxt->u1_is_input_data_hbd)
3165*c83a76b0SSuyog Pawar             {
3166*c83a76b0SSuyog Pawar                 /* cb recon */
3167*c83a76b0SSuyog Pawar                 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3168*c83a76b0SSuyog Pawar                 pu1_tmp +=
3169*c83a76b0SSuyog Pawar                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3170*c83a76b0SSuyog Pawar                      ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3171*c83a76b0SSuyog Pawar 
3172*c83a76b0SSuyog Pawar                 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3173*c83a76b0SSuyog Pawar                 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3174*c83a76b0SSuyog Pawar                 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3175*c83a76b0SSuyog Pawar 
3176*c83a76b0SSuyog Pawar                 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3177*c83a76b0SSuyog Pawar 
3178*c83a76b0SSuyog Pawar                 /* Register the source buffer pointers in sao context*/
3179*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3180*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3181*c83a76b0SSuyog Pawar                     (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3182*c83a76b0SSuyog Pawar                      ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3183*c83a76b0SSuyog Pawar                     ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3184*c83a76b0SSuyog Pawar 
3185*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3186*c83a76b0SSuyog Pawar                     ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3187*c83a76b0SSuyog Pawar 
3188*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3189*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3190*c83a76b0SSuyog Pawar 
3191*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3192*c83a76b0SSuyog Pawar                     ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3193*c83a76b0SSuyog Pawar             }
3194*c83a76b0SSuyog Pawar 
3195*c83a76b0SSuyog Pawar             /* Subpel planes hxfy, fxhy, hxhy*/
3196*c83a76b0SSuyog Pawar             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3197*c83a76b0SSuyog Pawar             pu1_tmp +=
3198*c83a76b0SSuyog Pawar                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3199*c83a76b0SSuyog Pawar             apu1_y_sub_pel_planes[0] = pu1_tmp;
3200*c83a76b0SSuyog Pawar             s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3201*c83a76b0SSuyog Pawar 
3202*c83a76b0SSuyog Pawar             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3203*c83a76b0SSuyog Pawar             pu1_tmp +=
3204*c83a76b0SSuyog Pawar                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3205*c83a76b0SSuyog Pawar             apu1_y_sub_pel_planes[1] = pu1_tmp;
3206*c83a76b0SSuyog Pawar             s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3207*c83a76b0SSuyog Pawar 
3208*c83a76b0SSuyog Pawar             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3209*c83a76b0SSuyog Pawar             pu1_tmp +=
3210*c83a76b0SSuyog Pawar                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3211*c83a76b0SSuyog Pawar             apu1_y_sub_pel_planes[2] = pu1_tmp;
3212*c83a76b0SSuyog Pawar             s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3213*c83a76b0SSuyog Pawar 
3214*c83a76b0SSuyog Pawar             /* row level coeffs buffer */
3215*c83a76b0SSuyog Pawar             pu1_row_ecd_data =
3216*c83a76b0SSuyog Pawar                 pu1_frm_ecd_data +
3217*c83a76b0SSuyog Pawar                 (vert_ctr *
3218*c83a76b0SSuyog Pawar                  ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3219*c83a76b0SSuyog Pawar                                     : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3220*c83a76b0SSuyog Pawar                  MAX_SCAN_COEFFS_BYTES_4x4);
3221*c83a76b0SSuyog Pawar 
3222*c83a76b0SSuyog Pawar             /* Row level CU buffer */
3223*c83a76b0SSuyog Pawar             ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3224*c83a76b0SSuyog Pawar 
3225*c83a76b0SSuyog Pawar             /* Row level TU buffer */
3226*c83a76b0SSuyog Pawar             ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3227*c83a76b0SSuyog Pawar 
3228*c83a76b0SSuyog Pawar             /* Row level PU buffer */
3229*c83a76b0SSuyog Pawar             ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3230*c83a76b0SSuyog Pawar 
3231*c83a76b0SSuyog Pawar             /* Row level colocated PU buffer */
3232*c83a76b0SSuyog Pawar             /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3233*c83a76b0SSuyog Pawar             ps_row_col_pu =
3234*c83a76b0SSuyog Pawar                 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3235*c83a76b0SSuyog Pawar                                                ps_frm_ctb_prms->i4_num_pus_in_ctb);
3236*c83a76b0SSuyog Pawar             /* Row level col PU map buffer */
3237*c83a76b0SSuyog Pawar             /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3238*c83a76b0SSuyog Pawar             pu1_row_pu_map =
3239*c83a76b0SSuyog Pawar                 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3240*c83a76b0SSuyog Pawar                                                 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3241*c83a76b0SSuyog Pawar             /* row ctb in pointer  */
3242*c83a76b0SSuyog Pawar             ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3243*c83a76b0SSuyog Pawar 
3244*c83a76b0SSuyog Pawar             /* row ctb out pointer  */
3245*c83a76b0SSuyog Pawar             ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3246*c83a76b0SSuyog Pawar 
3247*c83a76b0SSuyog Pawar             /* row number of PUs map pointer */
3248*c83a76b0SSuyog Pawar             pu2_num_pu_row =
3249*c83a76b0SSuyog Pawar                 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3250*c83a76b0SSuyog Pawar 
3251*c83a76b0SSuyog Pawar             /* row pu offsets pointer  */
3252*c83a76b0SSuyog Pawar             pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3253*c83a76b0SSuyog Pawar             /* store the first CTB pu offset pointer */
3254*c83a76b0SSuyog Pawar             *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3255*c83a76b0SSuyog Pawar             /* Initialize ptr to current IPE row */
3256*c83a76b0SSuyog Pawar             ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3257*c83a76b0SSuyog Pawar 
3258*c83a76b0SSuyog Pawar             /* Initialize ptr to current row */
3259*c83a76b0SSuyog Pawar             ps_row_cu_tree = ps_cu_tree_out +
3260*c83a76b0SSuyog Pawar                              (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3261*c83a76b0SSuyog Pawar 
3262*c83a76b0SSuyog Pawar             /* Get the EncLoop Top-Right CU Dep Mngr */
3263*c83a76b0SSuyog Pawar             ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3264*c83a76b0SSuyog Pawar                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3265*c83a76b0SSuyog Pawar                                                                    [i4_bitrate_instance_num];
3266*c83a76b0SSuyog Pawar             /* Get the EncLoop Deblock Dep Mngr */
3267*c83a76b0SSuyog Pawar             ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3268*c83a76b0SSuyog Pawar                 ps_master_ctxt
3269*c83a76b0SSuyog Pawar                     ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3270*c83a76b0SSuyog Pawar             /* Get the EncLoop Sao Dep Mngr */
3271*c83a76b0SSuyog Pawar             ps_ctxt->pv_dep_mngr_enc_loop_sao =
3272*c83a76b0SSuyog Pawar                 ps_master_ctxt
3273*c83a76b0SSuyog Pawar                     ->aapv_dep_mngr_enc_loop_sao[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3274*c83a76b0SSuyog Pawar 
3275*c83a76b0SSuyog Pawar             ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3276*c83a76b0SSuyog Pawar 
3277*c83a76b0SSuyog Pawar             {
3278*c83a76b0SSuyog Pawar                 /* derive the pointers of top row buffers */
3279*c83a76b0SSuyog Pawar                 ps_ctxt->pv_top_row_luma =
3280*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3281*c83a76b0SSuyog Pawar                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3282*c83a76b0SSuyog Pawar                     (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3283*c83a76b0SSuyog Pawar 
3284*c83a76b0SSuyog Pawar                 ps_ctxt->pv_top_row_chroma =
3285*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3286*c83a76b0SSuyog Pawar                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3287*c83a76b0SSuyog Pawar                     (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3288*c83a76b0SSuyog Pawar 
3289*c83a76b0SSuyog Pawar                 /* derive the pointers of bottom row buffers to update current row data */
3290*c83a76b0SSuyog Pawar                 ps_ctxt->pv_bot_row_luma =
3291*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3292*c83a76b0SSuyog Pawar                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3293*c83a76b0SSuyog Pawar                     (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3294*c83a76b0SSuyog Pawar 
3295*c83a76b0SSuyog Pawar                 ps_ctxt->pv_bot_row_chroma =
3296*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3297*c83a76b0SSuyog Pawar                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3298*c83a76b0SSuyog Pawar                     (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3299*c83a76b0SSuyog Pawar 
3300*c83a76b0SSuyog Pawar                 /* Register the buffer pointers in sao context*/
3301*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3302*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3303*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3304*c83a76b0SSuyog Pawar                     ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3305*c83a76b0SSuyog Pawar 
3306*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3307*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3308*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3309*c83a76b0SSuyog Pawar                     ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3310*c83a76b0SSuyog Pawar 
3311*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3312*c83a76b0SSuyog Pawar 
3313*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3314*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3315*c83a76b0SSuyog Pawar 
3316*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3317*c83a76b0SSuyog Pawar                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3318*c83a76b0SSuyog Pawar             }
3319*c83a76b0SSuyog Pawar 
3320*c83a76b0SSuyog Pawar             ps_ctxt->ps_top_row_nbr =
3321*c83a76b0SSuyog Pawar                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3322*c83a76b0SSuyog Pawar                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3323*c83a76b0SSuyog Pawar                 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3324*c83a76b0SSuyog Pawar 
3325*c83a76b0SSuyog Pawar             ps_ctxt->ps_bot_row_nbr =
3326*c83a76b0SSuyog Pawar                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3327*c83a76b0SSuyog Pawar                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3328*c83a76b0SSuyog Pawar                 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3329*c83a76b0SSuyog Pawar 
3330*c83a76b0SSuyog Pawar             if(vert_ctr > 0)
3331*c83a76b0SSuyog Pawar             {
3332*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3333*c83a76b0SSuyog Pawar             }
3334*c83a76b0SSuyog Pawar             else
3335*c83a76b0SSuyog Pawar             {
3336*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_top_rt_cabac_state = NULL;
3337*c83a76b0SSuyog Pawar             }
3338*c83a76b0SSuyog Pawar 
3339*c83a76b0SSuyog Pawar             ASSERT(
3340*c83a76b0SSuyog Pawar                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3341*c83a76b0SSuyog Pawar                     .ps_pps->i1_sign_data_hiding_flag ==
3342*c83a76b0SSuyog Pawar                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3343*c83a76b0SSuyog Pawar                     .ps_pps->i1_sign_data_hiding_flag);
3344*c83a76b0SSuyog Pawar 
3345*c83a76b0SSuyog Pawar             /* call the row level processing function */
3346*c83a76b0SSuyog Pawar             ihevce_enc_loop_process_row(
3347*c83a76b0SSuyog Pawar                 ps_ctxt,
3348*c83a76b0SSuyog Pawar                 &s_curr_src_bufs,
3349*c83a76b0SSuyog Pawar                 &s_curr_recon_bufs,
3350*c83a76b0SSuyog Pawar                 &s_curr_recon_bufs_src,
3351*c83a76b0SSuyog Pawar                 &apu1_y_sub_pel_planes[0],
3352*c83a76b0SSuyog Pawar                 ps_ctb_row_in,
3353*c83a76b0SSuyog Pawar                 ps_ctb_row_out,
3354*c83a76b0SSuyog Pawar                 ps_row_ipe_analyse,
3355*c83a76b0SSuyog Pawar                 ps_row_cu_tree,
3356*c83a76b0SSuyog Pawar                 ps_row_cu,
3357*c83a76b0SSuyog Pawar                 ps_row_tu,
3358*c83a76b0SSuyog Pawar                 ps_row_pu,
3359*c83a76b0SSuyog Pawar                 ps_row_col_pu,
3360*c83a76b0SSuyog Pawar                 pu2_num_pu_row,
3361*c83a76b0SSuyog Pawar                 pu1_row_pu_map,
3362*c83a76b0SSuyog Pawar                 pu1_row_ecd_data,
3363*c83a76b0SSuyog Pawar                 pu4_pu_row_offsets,
3364*c83a76b0SSuyog Pawar                 ps_frm_ctb_prms,
3365*c83a76b0SSuyog Pawar                 vert_ctr,
3366*c83a76b0SSuyog Pawar                 ps_frm_recon,
3367*c83a76b0SSuyog Pawar                 ps_ctxt->pv_dep_mngr_encloop_dep_me,
3368*c83a76b0SSuyog Pawar                 &s_pad_interp_recon,
3369*c83a76b0SSuyog Pawar                 i4_pass,
3370*c83a76b0SSuyog Pawar                 ps_multi_thrd_ctxt,
3371*c83a76b0SSuyog Pawar                 ps_tile_params);
3372*c83a76b0SSuyog Pawar         }
3373*c83a76b0SSuyog Pawar     }
3374*c83a76b0SSuyog Pawar }
3375*c83a76b0SSuyog Pawar 
3376*c83a76b0SSuyog Pawar /*!
3377*c83a76b0SSuyog Pawar ******************************************************************************
3378*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3379*c83a76b0SSuyog Pawar *
3380*c83a76b0SSuyog Pawar * \brief Returns to the caller key attributes relevant for dependency manager,
3381*c83a76b0SSuyog Pawar *        ie, the number of vertical units in l0 layer
3382*c83a76b0SSuyog Pawar *
3383*c83a76b0SSuyog Pawar * \par Description:
3384*c83a76b0SSuyog Pawar *
3385*c83a76b0SSuyog Pawar * \param[in] pai4_ht    : ht
3386*c83a76b0SSuyog Pawar * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3387*c83a76b0SSuyog Pawar *                                         for deblocking
3388*c83a76b0SSuyog Pawar *
3389*c83a76b0SSuyog Pawar * \return
3390*c83a76b0SSuyog Pawar *    None
3391*c83a76b0SSuyog Pawar *
3392*c83a76b0SSuyog Pawar * \author
3393*c83a76b0SSuyog Pawar *  Ittiam
3394*c83a76b0SSuyog Pawar *
3395*c83a76b0SSuyog Pawar *****************************************************************************
3396*c83a76b0SSuyog Pawar */
ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht,WORD32 * pi4_num_vert_units_in_lyr)3397*c83a76b0SSuyog Pawar void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3398*c83a76b0SSuyog Pawar {
3399*c83a76b0SSuyog Pawar     /* Blk ht at a given layer*/
3400*c83a76b0SSuyog Pawar     WORD32 unit_ht_c;
3401*c83a76b0SSuyog Pawar     WORD32 ctb_size = 64;
3402*c83a76b0SSuyog Pawar 
3403*c83a76b0SSuyog Pawar     /* compute blk ht and unit ht */
3404*c83a76b0SSuyog Pawar     unit_ht_c = ctb_size;
3405*c83a76b0SSuyog Pawar 
3406*c83a76b0SSuyog Pawar     /* set the numebr of vertical units */
3407*c83a76b0SSuyog Pawar     *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3408*c83a76b0SSuyog Pawar }
3409*c83a76b0SSuyog Pawar 
3410*c83a76b0SSuyog Pawar /*!
3411*c83a76b0SSuyog Pawar ******************************************************************************
3412*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3413*c83a76b0SSuyog Pawar *
3414*c83a76b0SSuyog Pawar * \brief
3415*c83a76b0SSuyog Pawar *    Number of memory records are returned for enc_loop module
3416*c83a76b0SSuyog Pawar * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3417*c83a76b0SSuyog Pawar *
3418*c83a76b0SSuyog Pawar * \return
3419*c83a76b0SSuyog Pawar *    None
3420*c83a76b0SSuyog Pawar *
3421*c83a76b0SSuyog Pawar * \author
3422*c83a76b0SSuyog Pawar *  Ittiam
3423*c83a76b0SSuyog Pawar *
3424*c83a76b0SSuyog Pawar *****************************************************************************
3425*c83a76b0SSuyog Pawar */
3426*c83a76b0SSuyog Pawar WORD32
ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel)3427*c83a76b0SSuyog Pawar     ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3428*c83a76b0SSuyog Pawar {
3429*c83a76b0SSuyog Pawar     WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3430*c83a76b0SSuyog Pawar     WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3431*c83a76b0SSuyog Pawar         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3432*c83a76b0SSuyog Pawar     WORD32 enc_loop_sao_dep_mngr_mem_recs =
3433*c83a76b0SSuyog Pawar         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3434*c83a76b0SSuyog Pawar     WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3435*c83a76b0SSuyog Pawar         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3436*c83a76b0SSuyog Pawar     WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3437*c83a76b0SSuyog Pawar         i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3438*c83a76b0SSuyog Pawar 
3439*c83a76b0SSuyog Pawar     return (
3440*c83a76b0SSuyog Pawar         (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + enc_loop_sao_dep_mngr_mem_recs +
3441*c83a76b0SSuyog Pawar          enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3442*c83a76b0SSuyog Pawar }
3443*c83a76b0SSuyog Pawar /*!
3444*c83a76b0SSuyog Pawar ******************************************************************************
3445*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_get_mem_recs \endif
3446*c83a76b0SSuyog Pawar *
3447*c83a76b0SSuyog Pawar * \brief
3448*c83a76b0SSuyog Pawar *    Memory requirements are returned for ENC_LOOP.
3449*c83a76b0SSuyog Pawar *
3450*c83a76b0SSuyog Pawar * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
3451*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
3452*c83a76b0SSuyog Pawar * \param[in] i4_num_proc_thrds : Number of processing threads for this module
3453*c83a76b0SSuyog Pawar * \param[in] i4_mem_space : memspace in whihc memory request should be done
3454*c83a76b0SSuyog Pawar *
3455*c83a76b0SSuyog Pawar * \return
3456*c83a76b0SSuyog Pawar *    None
3457*c83a76b0SSuyog Pawar *
3458*c83a76b0SSuyog Pawar * \author
3459*c83a76b0SSuyog Pawar *  Ittiam
3460*c83a76b0SSuyog Pawar *
3461*c83a76b0SSuyog Pawar *****************************************************************************
3462*c83a76b0SSuyog Pawar */
ihevce_enc_loop_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel,WORD32 i4_mem_space,WORD32 i4_resolution_id)3463*c83a76b0SSuyog Pawar WORD32 ihevce_enc_loop_get_mem_recs(
3464*c83a76b0SSuyog Pawar     iv_mem_rec_t *ps_mem_tab,
3465*c83a76b0SSuyog Pawar     ihevce_static_cfg_params_t *ps_init_prms,
3466*c83a76b0SSuyog Pawar     WORD32 i4_num_proc_thrds,
3467*c83a76b0SSuyog Pawar     WORD32 i4_num_bitrate_inst,
3468*c83a76b0SSuyog Pawar     WORD32 i4_num_enc_loop_frm_pllel,
3469*c83a76b0SSuyog Pawar     WORD32 i4_mem_space,
3470*c83a76b0SSuyog Pawar     WORD32 i4_resolution_id)
3471*c83a76b0SSuyog Pawar {
3472*c83a76b0SSuyog Pawar     UWORD32 u4_width, u4_height, n_tabs;
3473*c83a76b0SSuyog Pawar     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3474*c83a76b0SSuyog Pawar     WORD32 ctr;
3475*c83a76b0SSuyog Pawar     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3476*c83a76b0SSuyog Pawar 
3477*c83a76b0SSuyog Pawar     /* derive frame dimensions */
3478*c83a76b0SSuyog Pawar     /*width of the input YUV to be encoded */
3479*c83a76b0SSuyog Pawar     u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3480*c83a76b0SSuyog Pawar     /*making the width a multiple of CTB size*/
3481*c83a76b0SSuyog Pawar     u4_width += SET_CTB_ALIGN(
3482*c83a76b0SSuyog Pawar         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3483*c83a76b0SSuyog Pawar 
3484*c83a76b0SSuyog Pawar     /*height of the input YUV to be encoded */
3485*c83a76b0SSuyog Pawar     u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3486*c83a76b0SSuyog Pawar     /*making the height a multiple of CTB size*/
3487*c83a76b0SSuyog Pawar     u4_height += SET_CTB_ALIGN(
3488*c83a76b0SSuyog Pawar         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3489*c83a76b0SSuyog Pawar     u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3490*c83a76b0SSuyog Pawar     u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3491*c83a76b0SSuyog Pawar     /* memories should be requested assuming worst case requirememnts */
3492*c83a76b0SSuyog Pawar 
3493*c83a76b0SSuyog Pawar     /* Module context structure */
3494*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3495*c83a76b0SSuyog Pawar 
3496*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3497*c83a76b0SSuyog Pawar 
3498*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3499*c83a76b0SSuyog Pawar 
3500*c83a76b0SSuyog Pawar     /* Thread context structure */
3501*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3502*c83a76b0SSuyog Pawar         i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3503*c83a76b0SSuyog Pawar 
3504*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3505*c83a76b0SSuyog Pawar 
3506*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3507*c83a76b0SSuyog Pawar 
3508*c83a76b0SSuyog Pawar     /* Scale matrices */
3509*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3510*c83a76b0SSuyog Pawar 
3511*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3512*c83a76b0SSuyog Pawar 
3513*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3514*c83a76b0SSuyog Pawar 
3515*c83a76b0SSuyog Pawar     /* Rescale matrices */
3516*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3517*c83a76b0SSuyog Pawar 
3518*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3519*c83a76b0SSuyog Pawar 
3520*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3521*c83a76b0SSuyog Pawar 
3522*c83a76b0SSuyog Pawar     /* top row luma one row of pixel data per CTB row */
3523*c83a76b0SSuyog Pawar     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3524*c83a76b0SSuyog Pawar     {
3525*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3526*c83a76b0SSuyog Pawar                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3527*c83a76b0SSuyog Pawar                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3528*c83a76b0SSuyog Pawar     }
3529*c83a76b0SSuyog Pawar     else
3530*c83a76b0SSuyog Pawar     {
3531*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3532*c83a76b0SSuyog Pawar                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3533*c83a76b0SSuyog Pawar                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3534*c83a76b0SSuyog Pawar     }
3535*c83a76b0SSuyog Pawar 
3536*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3537*c83a76b0SSuyog Pawar 
3538*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3539*c83a76b0SSuyog Pawar 
3540*c83a76b0SSuyog Pawar     /* top row chroma */
3541*c83a76b0SSuyog Pawar     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3542*c83a76b0SSuyog Pawar     {
3543*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3544*c83a76b0SSuyog Pawar             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3545*c83a76b0SSuyog Pawar             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3546*c83a76b0SSuyog Pawar     }
3547*c83a76b0SSuyog Pawar     else
3548*c83a76b0SSuyog Pawar     {
3549*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3550*c83a76b0SSuyog Pawar             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3551*c83a76b0SSuyog Pawar             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3552*c83a76b0SSuyog Pawar     }
3553*c83a76b0SSuyog Pawar 
3554*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3555*c83a76b0SSuyog Pawar 
3556*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3557*c83a76b0SSuyog Pawar 
3558*c83a76b0SSuyog Pawar     /* top row neighbour 4x4 */
3559*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3560*c83a76b0SSuyog Pawar         (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3561*c83a76b0SSuyog Pawar         i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3562*c83a76b0SSuyog Pawar 
3563*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3564*c83a76b0SSuyog Pawar 
3565*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3566*c83a76b0SSuyog Pawar 
3567*c83a76b0SSuyog Pawar     /* memory to dump rate control parameters by each thread for each bit-rate instance */
3568*c83a76b0SSuyog Pawar     /* RC params collated by each thread for each bit-rate instance separately */
3569*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3570*c83a76b0SSuyog Pawar                                                  i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3571*c83a76b0SSuyog Pawar 
3572*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3573*c83a76b0SSuyog Pawar 
3574*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3575*c83a76b0SSuyog Pawar     /* Memory required for deblocking */
3576*c83a76b0SSuyog Pawar     {
3577*c83a76b0SSuyog Pawar         /* Memory to store Qp of top4x4 blocks for each CTB row.
3578*c83a76b0SSuyog Pawar         This memory is allocated at frame level and shared across
3579*c83a76b0SSuyog Pawar         all cores. The Qp values are needed to form Qp-map(described
3580*c83a76b0SSuyog Pawar         in the ENC_LOOP_DEBLOCKING section below)*/
3581*c83a76b0SSuyog Pawar 
3582*c83a76b0SSuyog Pawar         UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3583*c83a76b0SSuyog Pawar         UWORD32 u4_size_top_4x4_qp_memory;
3584*c83a76b0SSuyog Pawar 
3585*c83a76b0SSuyog Pawar         /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3586*c83a76b0SSuyog Pawar         /*Space required per CTB*/
3587*c83a76b0SSuyog Pawar         u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3588*c83a76b0SSuyog Pawar         /*Space required for entire CTB row*/
3589*c83a76b0SSuyog Pawar         u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3590*c83a76b0SSuyog Pawar         /*Space required for entire frame*/
3591*c83a76b0SSuyog Pawar         u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3592*c83a76b0SSuyog Pawar         /*Space required for multiple bitrate*/
3593*c83a76b0SSuyog Pawar         u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3594*c83a76b0SSuyog Pawar         /*Space required for multiple frames in parallel*/
3595*c83a76b0SSuyog Pawar         u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3596*c83a76b0SSuyog Pawar 
3597*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3598*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3599*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3600*c83a76b0SSuyog Pawar 
3601*c83a76b0SSuyog Pawar         /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3602*c83a76b0SSuyog Pawar         ## Boundary Strength(Vertical):
3603*c83a76b0SSuyog Pawar         BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3604*c83a76b0SSuyog Pawar         of the row followed by 8 entries of second CTB and so on.
3605*c83a76b0SSuyog Pawar         8 entries: Includes left edge of current CTB and excludes right edge.
3606*c83a76b0SSuyog Pawar         ## Boundary Strength(Horizontal):
3607*c83a76b0SSuyog Pawar         Same as Vertical.
3608*c83a76b0SSuyog Pawar         8 entries:  Includes top edge of current CTB and excludes bottom edge.
3609*c83a76b0SSuyog Pawar 
3610*c83a76b0SSuyog Pawar         ## Qp-map storage:
3611*c83a76b0SSuyog Pawar         T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3612*c83a76b0SSuyog Pawar         00 01 02 03 04 05 ..........to the end of the CTB row
3613*c83a76b0SSuyog Pawar         10 11 12 13 14 15 ..........to the end of the CTB row
3614*c83a76b0SSuyog Pawar         20 21 22 23 24 25 ..........to the end of the CTB row
3615*c83a76b0SSuyog Pawar         30 31 32 33 34 35 ..........to the end of the CTB row
3616*c83a76b0SSuyog Pawar         40 41 42 43 44 45 ..........to the end of the CTB row
3617*c83a76b0SSuyog Pawar         ............................to the end of the CTB row
3618*c83a76b0SSuyog Pawar         upto height_of_CTB..........to the end of the CTB row
3619*c83a76b0SSuyog Pawar 
3620*c83a76b0SSuyog Pawar         Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3621*c83a76b0SSuyog Pawar         A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3622*c83a76b0SSuyog Pawar         where,
3623*c83a76b0SSuyog Pawar         => height_of_CTB = number of 4x4 blocks in a CTB  vertically,
3624*c83a76b0SSuyog Pawar         => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3625*c83a76b0SSuyog Pawar         in order to deblock top edge of current CTB.
3626*c83a76b0SSuyog Pawar         => width_of_CTB  = number of 4x4 blocks in a CTB  horizontally,
3627*c83a76b0SSuyog Pawar         */
3628*c83a76b0SSuyog Pawar 
3629*c83a76b0SSuyog Pawar         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3630*c83a76b0SSuyog Pawar         /*1 vertical edge per 8 pixel*/
3631*c83a76b0SSuyog Pawar         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3632*c83a76b0SSuyog Pawar         /*Vertical edges for entire width of CTB row*/
3633*c83a76b0SSuyog Pawar         u4_size_bs_memory *= u4_ctb_in_a_row;
3634*c83a76b0SSuyog Pawar         /*Each vertical edge of CTB row is 4 bytes*/
3635*c83a76b0SSuyog Pawar         u4_size_bs_memory = u4_size_bs_memory << 2;
3636*c83a76b0SSuyog Pawar         /*Adding Memory required for storing horizontal BS by doubling*/
3637*c83a76b0SSuyog Pawar         u4_size_bs_memory = u4_size_bs_memory << 1;
3638*c83a76b0SSuyog Pawar 
3639*c83a76b0SSuyog Pawar         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3640*c83a76b0SSuyog Pawar         /*Number of 4x4 blocks in the width of a CTB*/
3641*c83a76b0SSuyog Pawar         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3642*c83a76b0SSuyog Pawar         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3643*c83a76b0SSuyog Pawar         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3644*c83a76b0SSuyog Pawar         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3645*c83a76b0SSuyog Pawar         /*Storage for entire CTB row*/
3646*c83a76b0SSuyog Pawar         u4_size_qp_memory *= u4_ctb_in_a_row;
3647*c83a76b0SSuyog Pawar 
3648*c83a76b0SSuyog Pawar         /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3649*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3650*c83a76b0SSuyog Pawar             i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3651*c83a76b0SSuyog Pawar 
3652*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3653*c83a76b0SSuyog Pawar 
3654*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3655*c83a76b0SSuyog Pawar     }
3656*c83a76b0SSuyog Pawar 
3657*c83a76b0SSuyog Pawar     /* Memory required to store pred for 422 chroma */
3658*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3659*c83a76b0SSuyog Pawar         i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3660*c83a76b0SSuyog Pawar         (i4_chroma_format == IV_YUV_422SP_UV) *
3661*c83a76b0SSuyog Pawar         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3662*c83a76b0SSuyog Pawar 
3663*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3664*c83a76b0SSuyog Pawar 
3665*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3666*c83a76b0SSuyog Pawar 
3667*c83a76b0SSuyog Pawar     /* Memory for inter pred buffers */
3668*c83a76b0SSuyog Pawar     {
3669*c83a76b0SSuyog Pawar         WORD32 i4_num_bufs_per_thread = 0;
3670*c83a76b0SSuyog Pawar 
3671*c83a76b0SSuyog Pawar         WORD32 i4_buf_size_per_cand =
3672*c83a76b0SSuyog Pawar             (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3673*c83a76b0SSuyog Pawar             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3674*c83a76b0SSuyog Pawar         WORD32 i4_quality_preset =
3675*c83a76b0SSuyog Pawar             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3676*c83a76b0SSuyog Pawar         switch(i4_quality_preset)
3677*c83a76b0SSuyog Pawar         {
3678*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P0:
3679*c83a76b0SSuyog Pawar         {
3680*c83a76b0SSuyog Pawar             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3681*c83a76b0SSuyog Pawar             break;
3682*c83a76b0SSuyog Pawar         }
3683*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P2:
3684*c83a76b0SSuyog Pawar         {
3685*c83a76b0SSuyog Pawar             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3686*c83a76b0SSuyog Pawar             break;
3687*c83a76b0SSuyog Pawar         }
3688*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P3:
3689*c83a76b0SSuyog Pawar         {
3690*c83a76b0SSuyog Pawar             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3691*c83a76b0SSuyog Pawar             break;
3692*c83a76b0SSuyog Pawar         }
3693*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P4:
3694*c83a76b0SSuyog Pawar         {
3695*c83a76b0SSuyog Pawar             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3696*c83a76b0SSuyog Pawar             break;
3697*c83a76b0SSuyog Pawar         }
3698*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P5:
3699*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P6:
3700*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P7:
3701*c83a76b0SSuyog Pawar         {
3702*c83a76b0SSuyog Pawar             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3703*c83a76b0SSuyog Pawar             break;
3704*c83a76b0SSuyog Pawar         }
3705*c83a76b0SSuyog Pawar         default:
3706*c83a76b0SSuyog Pawar         {
3707*c83a76b0SSuyog Pawar             ASSERT(0);
3708*c83a76b0SSuyog Pawar         }
3709*c83a76b0SSuyog Pawar         }
3710*c83a76b0SSuyog Pawar 
3711*c83a76b0SSuyog Pawar         i4_num_bufs_per_thread += 4;
3712*c83a76b0SSuyog Pawar 
3713*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3714*c83a76b0SSuyog Pawar             i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3715*c83a76b0SSuyog Pawar 
3716*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3717*c83a76b0SSuyog Pawar 
3718*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3719*c83a76b0SSuyog Pawar     }
3720*c83a76b0SSuyog Pawar 
3721*c83a76b0SSuyog Pawar     /* Memory required to store chroma intra pred */
3722*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3723*c83a76b0SSuyog Pawar         i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3724*c83a76b0SSuyog Pawar         ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3725*c83a76b0SSuyog Pawar         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3726*c83a76b0SSuyog Pawar 
3727*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3728*c83a76b0SSuyog Pawar 
3729*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3730*c83a76b0SSuyog Pawar 
3731*c83a76b0SSuyog Pawar     /* Memory required to store pred for reference substitution output */
3732*c83a76b0SSuyog Pawar     /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3733*c83a76b0SSuyog Pawar        allocate 16 bytes to the left and 7 bytes to the right to facilitate
3734*c83a76b0SSuyog Pawar        SIMD access */
3735*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3736*c83a76b0SSuyog Pawar         i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3737*c83a76b0SSuyog Pawar         + INTRAPRED_SIMD_LEFT_PADDING)*
3738*c83a76b0SSuyog Pawar         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3739*c83a76b0SSuyog Pawar 
3740*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3741*c83a76b0SSuyog Pawar 
3742*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3743*c83a76b0SSuyog Pawar 
3744*c83a76b0SSuyog Pawar     /* Memory required to store pred for reference filtering output */
3745*c83a76b0SSuyog Pawar     /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3746*c83a76b0SSuyog Pawar        allocate 16 bytes to the left and 7 bytes to the right to facilitate
3747*c83a76b0SSuyog Pawar        SIMD access */
3748*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3749*c83a76b0SSuyog Pawar         i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3750*c83a76b0SSuyog Pawar         + INTRAPRED_SIMD_LEFT_PADDING)*
3751*c83a76b0SSuyog Pawar         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3752*c83a76b0SSuyog Pawar 
3753*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3754*c83a76b0SSuyog Pawar 
3755*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3756*c83a76b0SSuyog Pawar 
3757*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3758*c83a76b0SSuyog Pawar     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3759*c83a76b0SSuyog Pawar #endif
3760*c83a76b0SSuyog Pawar     {
3761*c83a76b0SSuyog Pawar         /* Memory assignments for recon storage during CU Recursion */
3762*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3763*c83a76b0SSuyog Pawar             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3764*c83a76b0SSuyog Pawar             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3765*c83a76b0SSuyog Pawar 
3766*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3767*c83a76b0SSuyog Pawar 
3768*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3769*c83a76b0SSuyog Pawar 
3770*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3771*c83a76b0SSuyog Pawar             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3772*c83a76b0SSuyog Pawar             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3773*c83a76b0SSuyog Pawar             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3774*c83a76b0SSuyog Pawar 
3775*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3776*c83a76b0SSuyog Pawar 
3777*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3778*c83a76b0SSuyog Pawar     }
3779*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3780*c83a76b0SSuyog Pawar     else
3781*c83a76b0SSuyog Pawar     {
3782*c83a76b0SSuyog Pawar         /* Memory assignments for recon storage during CU Recursion */
3783*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3784*c83a76b0SSuyog Pawar 
3785*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3786*c83a76b0SSuyog Pawar 
3787*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3788*c83a76b0SSuyog Pawar 
3789*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3790*c83a76b0SSuyog Pawar 
3791*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3792*c83a76b0SSuyog Pawar 
3793*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3794*c83a76b0SSuyog Pawar     }
3795*c83a76b0SSuyog Pawar #endif
3796*c83a76b0SSuyog Pawar 
3797*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3798*c83a76b0SSuyog Pawar     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3799*c83a76b0SSuyog Pawar #endif
3800*c83a76b0SSuyog Pawar     {
3801*c83a76b0SSuyog Pawar         /* Memory assignments for pred storage during CU Recursion */
3802*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3803*c83a76b0SSuyog Pawar             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3804*c83a76b0SSuyog Pawar             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3805*c83a76b0SSuyog Pawar 
3806*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3807*c83a76b0SSuyog Pawar 
3808*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3809*c83a76b0SSuyog Pawar 
3810*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3811*c83a76b0SSuyog Pawar             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3812*c83a76b0SSuyog Pawar             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3813*c83a76b0SSuyog Pawar             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3814*c83a76b0SSuyog Pawar 
3815*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3816*c83a76b0SSuyog Pawar 
3817*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3818*c83a76b0SSuyog Pawar     }
3819*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3820*c83a76b0SSuyog Pawar     else
3821*c83a76b0SSuyog Pawar     {
3822*c83a76b0SSuyog Pawar         /* Memory assignments for pred storage during CU Recursion */
3823*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3824*c83a76b0SSuyog Pawar 
3825*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3826*c83a76b0SSuyog Pawar 
3827*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3828*c83a76b0SSuyog Pawar 
3829*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3830*c83a76b0SSuyog Pawar 
3831*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3832*c83a76b0SSuyog Pawar 
3833*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3834*c83a76b0SSuyog Pawar     }
3835*c83a76b0SSuyog Pawar #endif
3836*c83a76b0SSuyog Pawar 
3837*c83a76b0SSuyog Pawar     /* Memory assignments for CTB left luma data storage */
3838*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3839*c83a76b0SSuyog Pawar         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3840*c83a76b0SSuyog Pawar         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3841*c83a76b0SSuyog Pawar 
3842*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3843*c83a76b0SSuyog Pawar 
3844*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3845*c83a76b0SSuyog Pawar 
3846*c83a76b0SSuyog Pawar     /* Memory assignments for CTB left chroma data storage */
3847*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3848*c83a76b0SSuyog Pawar         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3849*c83a76b0SSuyog Pawar         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3850*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3851*c83a76b0SSuyog Pawar         ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3852*c83a76b0SSuyog Pawar 
3853*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3854*c83a76b0SSuyog Pawar 
3855*c83a76b0SSuyog Pawar     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3856*c83a76b0SSuyog Pawar 
3857*c83a76b0SSuyog Pawar     /* Memory required for SAO */
3858*c83a76b0SSuyog Pawar     {
3859*c83a76b0SSuyog Pawar         WORD32 num_vert_units;
3860*c83a76b0SSuyog Pawar         WORD32 num_horz_units;
3861*c83a76b0SSuyog Pawar         WORD32 ctb_aligned_ht, ctb_aligned_wd;
3862*c83a76b0SSuyog Pawar         WORD32 luma_buf, chroma_buf;
3863*c83a76b0SSuyog Pawar 
3864*c83a76b0SSuyog Pawar         num_vert_units = u4_height / MAX_CTB_SIZE;
3865*c83a76b0SSuyog Pawar         num_horz_units = u4_width / MAX_CTB_SIZE;
3866*c83a76b0SSuyog Pawar 
3867*c83a76b0SSuyog Pawar         ctb_aligned_ht = u4_height;
3868*c83a76b0SSuyog Pawar         ctb_aligned_wd = u4_width;
3869*c83a76b0SSuyog Pawar 
3870*c83a76b0SSuyog Pawar         /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3871*c83a76b0SSuyog Pawar         * and 1 extra location is required for top left buf ptr for row 0
3872*c83a76b0SSuyog Pawar         * Also 1 extra byte is required for every row for top left pixel if
3873*c83a76b0SSuyog Pawar         * the top left ptr is to be passed to leaf level unconditionally
3874*c83a76b0SSuyog Pawar         */
3875*c83a76b0SSuyog Pawar         luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3876*c83a76b0SSuyog Pawar                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3877*c83a76b0SSuyog Pawar         chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3878*c83a76b0SSuyog Pawar                      ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3879*c83a76b0SSuyog Pawar 
3880*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3881*c83a76b0SSuyog Pawar             (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3882*c83a76b0SSuyog Pawar 
3883*c83a76b0SSuyog Pawar         /* Add the memory required to store the sao information of top ctb for top merge
3884*c83a76b0SSuyog Pawar         * This is frame level buffer.
3885*c83a76b0SSuyog Pawar         */
3886*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
3887*c83a76b0SSuyog Pawar             ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
3888*c83a76b0SSuyog Pawar             (i4_num_enc_loop_frm_pllel);
3889*c83a76b0SSuyog Pawar 
3890*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3891*c83a76b0SSuyog Pawar 
3892*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
3893*c83a76b0SSuyog Pawar     }
3894*c83a76b0SSuyog Pawar 
3895*c83a76b0SSuyog Pawar     /* Memory for CU level Coeff data buffer */
3896*c83a76b0SSuyog Pawar     {
3897*c83a76b0SSuyog Pawar         /* 16 additional bytes are required to ensure alignment */
3898*c83a76b0SSuyog Pawar         {
3899*c83a76b0SSuyog Pawar             ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
3900*c83a76b0SSuyog Pawar                 i4_num_proc_thrds *
3901*c83a76b0SSuyog Pawar                 (((MAX_LUMA_COEFFS_CTB +
3902*c83a76b0SSuyog Pawar                    (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
3903*c83a76b0SSuyog Pawar                   16) *
3904*c83a76b0SSuyog Pawar                  (2) * sizeof(UWORD8));
3905*c83a76b0SSuyog Pawar         }
3906*c83a76b0SSuyog Pawar 
3907*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3908*c83a76b0SSuyog Pawar 
3909*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
3910*c83a76b0SSuyog Pawar 
3911*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
3912*c83a76b0SSuyog Pawar             i4_num_proc_thrds *
3913*c83a76b0SSuyog Pawar             (MAX_LUMA_COEFFS_CTB +
3914*c83a76b0SSuyog Pawar              (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
3915*c83a76b0SSuyog Pawar             sizeof(UWORD8);
3916*c83a76b0SSuyog Pawar 
3917*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3918*c83a76b0SSuyog Pawar 
3919*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
3920*c83a76b0SSuyog Pawar     }
3921*c83a76b0SSuyog Pawar 
3922*c83a76b0SSuyog Pawar     /* Memory for CU dequant data buffer */
3923*c83a76b0SSuyog Pawar     {
3924*c83a76b0SSuyog Pawar         /* 16 additional bytes are required to ensure alignment */
3925*c83a76b0SSuyog Pawar         {
3926*c83a76b0SSuyog Pawar             ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
3927*c83a76b0SSuyog Pawar                 i4_num_proc_thrds *
3928*c83a76b0SSuyog Pawar                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
3929*c83a76b0SSuyog Pawar                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
3930*c83a76b0SSuyog Pawar                  8) *
3931*c83a76b0SSuyog Pawar                 (2) * sizeof(WORD16);
3932*c83a76b0SSuyog Pawar         }
3933*c83a76b0SSuyog Pawar 
3934*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3935*c83a76b0SSuyog Pawar 
3936*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
3937*c83a76b0SSuyog Pawar     }
3938*c83a76b0SSuyog Pawar 
3939*c83a76b0SSuyog Pawar     /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
3940*c83a76b0SSuyog Pawar     {
3941*c83a76b0SSuyog Pawar         WORD32 i4_memSize_perThread;
3942*c83a76b0SSuyog Pawar 
3943*c83a76b0SSuyog Pawar         WORD32 i4_chroma_memSize_perThread = 0;
3944*c83a76b0SSuyog Pawar         /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3945*c83a76b0SSuyog Pawar         /* used in RDOPT to store cur and best modes' data */
3946*c83a76b0SSuyog Pawar         WORD32 i4_luma_memSize_perThread =
3947*c83a76b0SSuyog Pawar             4 * MAX_CU_SIZE * MAX_CU_SIZE *
3948*c83a76b0SSuyog Pawar             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3949*c83a76b0SSuyog Pawar 
3950*c83a76b0SSuyog Pawar         /* 'Glossary' for comments in the following codeBlock */
3951*c83a76b0SSuyog Pawar         /* 1 - 2 Bufs for storing recons of the best modes determined in the */
3952*c83a76b0SSuyog Pawar         /* function 'ihevce_intra_chroma_pred_mode_selector' */
3953*c83a76b0SSuyog Pawar         /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3954*c83a76b0SSuyog Pawar         /* used in RDOPT to store cur and best modes' data */
3955*c83a76b0SSuyog Pawar         if(i4_chroma_format == IV_YUV_422SP_UV)
3956*c83a76b0SSuyog Pawar         {
3957*c83a76b0SSuyog Pawar             WORD32 i4_quality_preset =
3958*c83a76b0SSuyog Pawar                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3959*c83a76b0SSuyog Pawar             switch(i4_quality_preset)
3960*c83a76b0SSuyog Pawar             {
3961*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P0:
3962*c83a76b0SSuyog Pawar             {
3963*c83a76b0SSuyog Pawar                 /* 1 */
3964*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
3965*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
3966*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3967*c83a76b0SSuyog Pawar 
3968*c83a76b0SSuyog Pawar                 /* 2 */
3969*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
3970*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
3971*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3972*c83a76b0SSuyog Pawar 
3973*c83a76b0SSuyog Pawar                 break;
3974*c83a76b0SSuyog Pawar             }
3975*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P2:
3976*c83a76b0SSuyog Pawar             {
3977*c83a76b0SSuyog Pawar                 /* 1 */
3978*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
3979*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
3980*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3981*c83a76b0SSuyog Pawar 
3982*c83a76b0SSuyog Pawar                 /* 2 */
3983*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
3984*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
3985*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3986*c83a76b0SSuyog Pawar 
3987*c83a76b0SSuyog Pawar                 break;
3988*c83a76b0SSuyog Pawar             }
3989*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P3:
3990*c83a76b0SSuyog Pawar             {
3991*c83a76b0SSuyog Pawar                 /* 1 */
3992*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
3993*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
3994*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3995*c83a76b0SSuyog Pawar 
3996*c83a76b0SSuyog Pawar                 /* 2 */
3997*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
3998*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
3999*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4000*c83a76b0SSuyog Pawar 
4001*c83a76b0SSuyog Pawar                 break;
4002*c83a76b0SSuyog Pawar             }
4003*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P4:
4004*c83a76b0SSuyog Pawar             {
4005*c83a76b0SSuyog Pawar                 /* 1 */
4006*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4007*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4008*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4009*c83a76b0SSuyog Pawar 
4010*c83a76b0SSuyog Pawar                 /* 2 */
4011*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4012*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4013*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4014*c83a76b0SSuyog Pawar 
4015*c83a76b0SSuyog Pawar                 break;
4016*c83a76b0SSuyog Pawar             }
4017*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P5:
4018*c83a76b0SSuyog Pawar             {
4019*c83a76b0SSuyog Pawar                 /* 1 */
4020*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4021*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4022*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4023*c83a76b0SSuyog Pawar 
4024*c83a76b0SSuyog Pawar                 /* 2 */
4025*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4026*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4027*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4028*c83a76b0SSuyog Pawar 
4029*c83a76b0SSuyog Pawar                 break;
4030*c83a76b0SSuyog Pawar             }
4031*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P6:
4032*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P7:
4033*c83a76b0SSuyog Pawar             {
4034*c83a76b0SSuyog Pawar                 /* 1 */
4035*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4036*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4037*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4038*c83a76b0SSuyog Pawar 
4039*c83a76b0SSuyog Pawar                 /* 2 */
4040*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4041*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4042*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4043*c83a76b0SSuyog Pawar 
4044*c83a76b0SSuyog Pawar                 break;
4045*c83a76b0SSuyog Pawar             }
4046*c83a76b0SSuyog Pawar             }
4047*c83a76b0SSuyog Pawar         }
4048*c83a76b0SSuyog Pawar         else
4049*c83a76b0SSuyog Pawar         {
4050*c83a76b0SSuyog Pawar             WORD32 i4_quality_preset =
4051*c83a76b0SSuyog Pawar                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4052*c83a76b0SSuyog Pawar             switch(i4_quality_preset)
4053*c83a76b0SSuyog Pawar             {
4054*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P0:
4055*c83a76b0SSuyog Pawar             {
4056*c83a76b0SSuyog Pawar                 /* 1 */
4057*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4058*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4059*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4060*c83a76b0SSuyog Pawar 
4061*c83a76b0SSuyog Pawar                 /* 2 */
4062*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4063*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4064*c83a76b0SSuyog Pawar                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4065*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4066*c83a76b0SSuyog Pawar 
4067*c83a76b0SSuyog Pawar                 break;
4068*c83a76b0SSuyog Pawar             }
4069*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P2:
4070*c83a76b0SSuyog Pawar             {
4071*c83a76b0SSuyog Pawar                 /* 1 */
4072*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4073*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4074*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4075*c83a76b0SSuyog Pawar 
4076*c83a76b0SSuyog Pawar                 /* 2 */
4077*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4078*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4079*c83a76b0SSuyog Pawar                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4080*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4081*c83a76b0SSuyog Pawar 
4082*c83a76b0SSuyog Pawar                 break;
4083*c83a76b0SSuyog Pawar             }
4084*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P3:
4085*c83a76b0SSuyog Pawar             {
4086*c83a76b0SSuyog Pawar                 /* 1 */
4087*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4088*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4089*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4090*c83a76b0SSuyog Pawar 
4091*c83a76b0SSuyog Pawar                 /* 2 */
4092*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4093*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4094*c83a76b0SSuyog Pawar                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4095*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4096*c83a76b0SSuyog Pawar 
4097*c83a76b0SSuyog Pawar                 break;
4098*c83a76b0SSuyog Pawar             }
4099*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P4:
4100*c83a76b0SSuyog Pawar             {
4101*c83a76b0SSuyog Pawar                 /* 1 */
4102*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4103*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4104*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4105*c83a76b0SSuyog Pawar 
4106*c83a76b0SSuyog Pawar                 /* 2 */
4107*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4108*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4109*c83a76b0SSuyog Pawar                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4110*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4111*c83a76b0SSuyog Pawar 
4112*c83a76b0SSuyog Pawar                 break;
4113*c83a76b0SSuyog Pawar             }
4114*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P5:
4115*c83a76b0SSuyog Pawar             {
4116*c83a76b0SSuyog Pawar                 /* 1 */
4117*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4118*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4119*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4120*c83a76b0SSuyog Pawar 
4121*c83a76b0SSuyog Pawar                 /* 2 */
4122*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4123*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4124*c83a76b0SSuyog Pawar                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4125*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4126*c83a76b0SSuyog Pawar 
4127*c83a76b0SSuyog Pawar                 break;
4128*c83a76b0SSuyog Pawar             }
4129*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P6:
4130*c83a76b0SSuyog Pawar             case IHEVCE_QUALITY_P7:
4131*c83a76b0SSuyog Pawar             {
4132*c83a76b0SSuyog Pawar                 /* 1 */
4133*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4134*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4135*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4136*c83a76b0SSuyog Pawar 
4137*c83a76b0SSuyog Pawar                 /* 2 */
4138*c83a76b0SSuyog Pawar                 i4_chroma_memSize_perThread +=
4139*c83a76b0SSuyog Pawar                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4140*c83a76b0SSuyog Pawar                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4141*c83a76b0SSuyog Pawar                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4142*c83a76b0SSuyog Pawar 
4143*c83a76b0SSuyog Pawar                 break;
4144*c83a76b0SSuyog Pawar             }
4145*c83a76b0SSuyog Pawar             }
4146*c83a76b0SSuyog Pawar         }
4147*c83a76b0SSuyog Pawar 
4148*c83a76b0SSuyog Pawar         i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4149*c83a76b0SSuyog Pawar 
4150*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4151*c83a76b0SSuyog Pawar             i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4152*c83a76b0SSuyog Pawar 
4153*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4154*c83a76b0SSuyog Pawar 
4155*c83a76b0SSuyog Pawar         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4156*c83a76b0SSuyog Pawar     }
4157*c83a76b0SSuyog Pawar 
4158*c83a76b0SSuyog Pawar     n_tabs = NUM_ENC_LOOP_MEM_RECS;
4159*c83a76b0SSuyog Pawar 
4160*c83a76b0SSuyog Pawar     /*************************************************************************/
4161*c83a76b0SSuyog Pawar     /* --- EncLoop Deblock and SAO sync Dep Mngr Mem requests --                     */
4162*c83a76b0SSuyog Pawar     /*************************************************************************/
4163*c83a76b0SSuyog Pawar 
4164*c83a76b0SSuyog Pawar     /* Fill the memtabs for  EncLoop Deblock Dep Mngr */
4165*c83a76b0SSuyog Pawar     {
4166*c83a76b0SSuyog Pawar         WORD32 count;
4167*c83a76b0SSuyog Pawar         WORD32 num_vert_units;
4168*c83a76b0SSuyog Pawar         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4169*c83a76b0SSuyog Pawar 
4170*c83a76b0SSuyog Pawar         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4171*c83a76b0SSuyog Pawar         ASSERT(num_vert_units > 0);
4172*c83a76b0SSuyog Pawar         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4173*c83a76b0SSuyog Pawar         {
4174*c83a76b0SSuyog Pawar             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4175*c83a76b0SSuyog Pawar             {
4176*c83a76b0SSuyog Pawar                 n_tabs += ihevce_dmgr_get_mem_recs(
4177*c83a76b0SSuyog Pawar                     &ps_mem_tab[n_tabs],
4178*c83a76b0SSuyog Pawar                     DEP_MNGR_ROW_ROW_SYNC,
4179*c83a76b0SSuyog Pawar                     num_vert_units,
4180*c83a76b0SSuyog Pawar                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4181*c83a76b0SSuyog Pawar                     i4_num_proc_thrds,
4182*c83a76b0SSuyog Pawar                     i4_mem_space);
4183*c83a76b0SSuyog Pawar             }
4184*c83a76b0SSuyog Pawar         }
4185*c83a76b0SSuyog Pawar 
4186*c83a76b0SSuyog Pawar         /* Fill the memtabs for  EncLoop SAO Dep Mngr */
4187*c83a76b0SSuyog Pawar         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4188*c83a76b0SSuyog Pawar         {
4189*c83a76b0SSuyog Pawar             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4190*c83a76b0SSuyog Pawar             {
4191*c83a76b0SSuyog Pawar                 n_tabs += ihevce_dmgr_get_mem_recs(
4192*c83a76b0SSuyog Pawar                     &ps_mem_tab[n_tabs],
4193*c83a76b0SSuyog Pawar                     DEP_MNGR_ROW_ROW_SYNC,
4194*c83a76b0SSuyog Pawar                     num_vert_units,
4195*c83a76b0SSuyog Pawar                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4196*c83a76b0SSuyog Pawar                     i4_num_proc_thrds,
4197*c83a76b0SSuyog Pawar                     i4_mem_space);
4198*c83a76b0SSuyog Pawar             }
4199*c83a76b0SSuyog Pawar         }
4200*c83a76b0SSuyog Pawar     }
4201*c83a76b0SSuyog Pawar 
4202*c83a76b0SSuyog Pawar     /*************************************************************************/
4203*c83a76b0SSuyog Pawar     /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests --                */
4204*c83a76b0SSuyog Pawar     /*************************************************************************/
4205*c83a76b0SSuyog Pawar 
4206*c83a76b0SSuyog Pawar     /* Fill the memtabs for  Top-Right CU sync Dep Mngr */
4207*c83a76b0SSuyog Pawar     {
4208*c83a76b0SSuyog Pawar         WORD32 count;
4209*c83a76b0SSuyog Pawar         WORD32 num_vert_units;
4210*c83a76b0SSuyog Pawar         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4211*c83a76b0SSuyog Pawar         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4212*c83a76b0SSuyog Pawar         ASSERT(num_vert_units > 0);
4213*c83a76b0SSuyog Pawar 
4214*c83a76b0SSuyog Pawar         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4215*c83a76b0SSuyog Pawar         {
4216*c83a76b0SSuyog Pawar             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4217*c83a76b0SSuyog Pawar             {
4218*c83a76b0SSuyog Pawar                 n_tabs += ihevce_dmgr_get_mem_recs(
4219*c83a76b0SSuyog Pawar                     &ps_mem_tab[n_tabs],
4220*c83a76b0SSuyog Pawar                     DEP_MNGR_ROW_ROW_SYNC,
4221*c83a76b0SSuyog Pawar                     num_vert_units,
4222*c83a76b0SSuyog Pawar                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4223*c83a76b0SSuyog Pawar                     i4_num_proc_thrds,
4224*c83a76b0SSuyog Pawar                     i4_mem_space);
4225*c83a76b0SSuyog Pawar             }
4226*c83a76b0SSuyog Pawar         }
4227*c83a76b0SSuyog Pawar     }
4228*c83a76b0SSuyog Pawar 
4229*c83a76b0SSuyog Pawar     /*************************************************************************/
4230*c83a76b0SSuyog Pawar     /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests --        */
4231*c83a76b0SSuyog Pawar     /*************************************************************************/
4232*c83a76b0SSuyog Pawar 
4233*c83a76b0SSuyog Pawar     /* Fill the memtabs for  EncLoop Aux. on Ref. bitrate Dep Mngr */
4234*c83a76b0SSuyog Pawar     {
4235*c83a76b0SSuyog Pawar         WORD32 count;
4236*c83a76b0SSuyog Pawar         WORD32 num_vert_units;
4237*c83a76b0SSuyog Pawar         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4238*c83a76b0SSuyog Pawar 
4239*c83a76b0SSuyog Pawar         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4240*c83a76b0SSuyog Pawar         ASSERT(num_vert_units > 0);
4241*c83a76b0SSuyog Pawar 
4242*c83a76b0SSuyog Pawar         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4243*c83a76b0SSuyog Pawar         {
4244*c83a76b0SSuyog Pawar             for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4245*c83a76b0SSuyog Pawar             {
4246*c83a76b0SSuyog Pawar                 n_tabs += ihevce_dmgr_get_mem_recs(
4247*c83a76b0SSuyog Pawar                     &ps_mem_tab[n_tabs],
4248*c83a76b0SSuyog Pawar                     DEP_MNGR_ROW_ROW_SYNC,
4249*c83a76b0SSuyog Pawar                     num_vert_units,
4250*c83a76b0SSuyog Pawar                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4251*c83a76b0SSuyog Pawar                     i4_num_proc_thrds,
4252*c83a76b0SSuyog Pawar                     i4_mem_space);
4253*c83a76b0SSuyog Pawar             }
4254*c83a76b0SSuyog Pawar         }
4255*c83a76b0SSuyog Pawar     }
4256*c83a76b0SSuyog Pawar 
4257*c83a76b0SSuyog Pawar     return (n_tabs);
4258*c83a76b0SSuyog Pawar }
4259*c83a76b0SSuyog Pawar 
4260*c83a76b0SSuyog Pawar /*!
4261*c83a76b0SSuyog Pawar ******************************************************************************
4262*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_init \endif
4263*c83a76b0SSuyog Pawar *
4264*c83a76b0SSuyog Pawar * \brief
4265*c83a76b0SSuyog Pawar *    Intialization for ENC_LOOP context state structure .
4266*c83a76b0SSuyog Pawar *
4267*c83a76b0SSuyog Pawar * \param[in] ps_mem_tab : pointer to memory descriptors table
4268*c83a76b0SSuyog Pawar * \param[in] ps_init_prms : Create time static parameters
4269*c83a76b0SSuyog Pawar * \param[in] pv_osal_handle : Osal handle
4270*c83a76b0SSuyog Pawar *
4271*c83a76b0SSuyog Pawar * \return
4272*c83a76b0SSuyog Pawar *    None
4273*c83a76b0SSuyog Pawar *
4274*c83a76b0SSuyog Pawar * \author
4275*c83a76b0SSuyog Pawar *  Ittiam
4276*c83a76b0SSuyog Pawar *
4277*c83a76b0SSuyog Pawar *****************************************************************************
4278*c83a76b0SSuyog Pawar */
ihevce_enc_loop_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,func_selector_t * ps_func_selector,rc_quant_t * ps_rc_quant_ctxt,ihevce_tile_params_t * ps_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_enc_loop_frm_pllel,UWORD8 u1_is_popcnt_available)4279*c83a76b0SSuyog Pawar void *ihevce_enc_loop_init(
4280*c83a76b0SSuyog Pawar     iv_mem_rec_t *ps_mem_tab,
4281*c83a76b0SSuyog Pawar     ihevce_static_cfg_params_t *ps_init_prms,
4282*c83a76b0SSuyog Pawar     WORD32 i4_num_proc_thrds,
4283*c83a76b0SSuyog Pawar     void *pv_osal_handle,
4284*c83a76b0SSuyog Pawar     func_selector_t *ps_func_selector,
4285*c83a76b0SSuyog Pawar     rc_quant_t *ps_rc_quant_ctxt,
4286*c83a76b0SSuyog Pawar     ihevce_tile_params_t *ps_tile_params_base,
4287*c83a76b0SSuyog Pawar     WORD32 i4_resolution_id,
4288*c83a76b0SSuyog Pawar     WORD32 i4_num_enc_loop_frm_pllel,
4289*c83a76b0SSuyog Pawar     UWORD8 u1_is_popcnt_available)
4290*c83a76b0SSuyog Pawar {
4291*c83a76b0SSuyog Pawar     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4292*c83a76b0SSuyog Pawar     ihevce_enc_loop_ctxt_t *ps_ctxt;
4293*c83a76b0SSuyog Pawar     WORD32 ctr, n_tabs;
4294*c83a76b0SSuyog Pawar     UWORD32 u4_width, u4_height;
4295*c83a76b0SSuyog Pawar     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4296*c83a76b0SSuyog Pawar     UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4297*c83a76b0SSuyog Pawar     UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4298*c83a76b0SSuyog Pawar     WORD32 i;
4299*c83a76b0SSuyog Pawar     WORD32 i4_num_bitrate_inst =
4300*c83a76b0SSuyog Pawar         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4301*c83a76b0SSuyog Pawar     enc_loop_rc_params_t *ps_enc_loop_rc_params;
4302*c83a76b0SSuyog Pawar     UWORD8 *pu1_sao_base; /* store the base address of sao*/
4303*c83a76b0SSuyog Pawar     UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4304*c83a76b0SSuyog Pawar     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4305*c83a76b0SSuyog Pawar     WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4306*c83a76b0SSuyog Pawar     WORD32 i4_enc_frm_id;
4307*c83a76b0SSuyog Pawar     WORD32 num_cu_in_ctb;
4308*c83a76b0SSuyog Pawar     WORD32 i4_num_tile_cols = 1;  //Default value is 1
4309*c83a76b0SSuyog Pawar 
4310*c83a76b0SSuyog Pawar     /* ENC_LOOP state structure */
4311*c83a76b0SSuyog Pawar     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4312*c83a76b0SSuyog Pawar 
4313*c83a76b0SSuyog Pawar     ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4314*c83a76b0SSuyog Pawar 
4315*c83a76b0SSuyog Pawar     ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4316*c83a76b0SSuyog Pawar     ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4317*c83a76b0SSuyog Pawar     ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4318*c83a76b0SSuyog Pawar     /*Calculation of memory sizes for deblocking*/
4319*c83a76b0SSuyog Pawar     {
4320*c83a76b0SSuyog Pawar         /*width of the input YUV to be encoded. */
4321*c83a76b0SSuyog Pawar         u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4322*c83a76b0SSuyog Pawar         /*making the width a multiple of CTB size*/
4323*c83a76b0SSuyog Pawar         u4_width += SET_CTB_ALIGN(
4324*c83a76b0SSuyog Pawar             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4325*c83a76b0SSuyog Pawar 
4326*c83a76b0SSuyog Pawar         u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4327*c83a76b0SSuyog Pawar 
4328*c83a76b0SSuyog Pawar         /*height of the input YUV to be encoded */
4329*c83a76b0SSuyog Pawar         u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4330*c83a76b0SSuyog Pawar         /*making the height a multiple of CTB size*/
4331*c83a76b0SSuyog Pawar         u4_height += SET_CTB_ALIGN(
4332*c83a76b0SSuyog Pawar             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4333*c83a76b0SSuyog Pawar 
4334*c83a76b0SSuyog Pawar         u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4335*c83a76b0SSuyog Pawar 
4336*c83a76b0SSuyog Pawar         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4337*c83a76b0SSuyog Pawar         /*1 vertical edge per 8 pixel*/
4338*c83a76b0SSuyog Pawar         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4339*c83a76b0SSuyog Pawar         /*Vertical edges for entire width of CTB row*/
4340*c83a76b0SSuyog Pawar         u4_size_bs_memory *= u4_ctb_in_a_row;
4341*c83a76b0SSuyog Pawar         /*Each vertical edge of CTB row is 4 bytes*/
4342*c83a76b0SSuyog Pawar         u4_size_bs_memory = u4_size_bs_memory << 2;
4343*c83a76b0SSuyog Pawar         /*Adding Memory required for storing horizontal BS by doubling*/
4344*c83a76b0SSuyog Pawar         u4_size_bs_memory = u4_size_bs_memory << 1;
4345*c83a76b0SSuyog Pawar 
4346*c83a76b0SSuyog Pawar         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4347*c83a76b0SSuyog Pawar         /*Number of 4x4 blocks in the width of a CTB*/
4348*c83a76b0SSuyog Pawar         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4349*c83a76b0SSuyog Pawar         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4350*c83a76b0SSuyog Pawar         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4351*c83a76b0SSuyog Pawar         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4352*c83a76b0SSuyog Pawar         /*Storage for entire CTB row*/
4353*c83a76b0SSuyog Pawar         u4_size_qp_memory *= u4_ctb_in_a_row;
4354*c83a76b0SSuyog Pawar 
4355*c83a76b0SSuyog Pawar         pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4356*c83a76b0SSuyog Pawar     }
4357*c83a76b0SSuyog Pawar 
4358*c83a76b0SSuyog Pawar     /*Derive the base pointer of sao*/
4359*c83a76b0SSuyog Pawar     pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4360*c83a76b0SSuyog Pawar     ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4361*c83a76b0SSuyog Pawar     u4_ctb_aligned_wd = u4_width;
4362*c83a76b0SSuyog Pawar     u4_ctb_aligned_ht = u4_height;
4363*c83a76b0SSuyog Pawar     num_vert_units = (u4_height) / ctb_size;
4364*c83a76b0SSuyog Pawar 
4365*c83a76b0SSuyog Pawar     for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4366*c83a76b0SSuyog Pawar     {
4367*c83a76b0SSuyog Pawar         ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4368*c83a76b0SSuyog Pawar         /* Store Tile params base into EncLoop context */
4369*c83a76b0SSuyog Pawar         ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4370*c83a76b0SSuyog Pawar         ihevce_cmn_utils_instr_set_router(
4371*c83a76b0SSuyog Pawar             &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4372*c83a76b0SSuyog Pawar         ihevce_sifter_sad_fxn_assigner(
4373*c83a76b0SSuyog Pawar             (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4374*c83a76b0SSuyog Pawar         ps_ctxt->i4_max_search_range_horizontal =
4375*c83a76b0SSuyog Pawar             ps_init_prms->s_config_prms.i4_max_search_range_horz;
4376*c83a76b0SSuyog Pawar         ps_ctxt->i4_max_search_range_vertical =
4377*c83a76b0SSuyog Pawar             ps_init_prms->s_config_prms.i4_max_search_range_vert;
4378*c83a76b0SSuyog Pawar 
4379*c83a76b0SSuyog Pawar         ps_ctxt->i4_quality_preset =
4380*c83a76b0SSuyog Pawar             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4381*c83a76b0SSuyog Pawar 
4382*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4383*c83a76b0SSuyog Pawar         {
4384*c83a76b0SSuyog Pawar             ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4385*c83a76b0SSuyog Pawar         }
4386*c83a76b0SSuyog Pawar 
4387*c83a76b0SSuyog Pawar         ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4388*c83a76b0SSuyog Pawar 
4389*c83a76b0SSuyog Pawar         ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4390*c83a76b0SSuyog Pawar 
4391*c83a76b0SSuyog Pawar         ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4392*c83a76b0SSuyog Pawar 
4393*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4394*c83a76b0SSuyog Pawar 
4395*c83a76b0SSuyog Pawar         ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4396*c83a76b0SSuyog Pawar 
4397*c83a76b0SSuyog Pawar         ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4398*c83a76b0SSuyog Pawar 
4399*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4400*c83a76b0SSuyog Pawar         {
4401*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_ctb_level_lamda = 0;
4402*c83a76b0SSuyog Pawar         }
4403*c83a76b0SSuyog Pawar         else
4404*c83a76b0SSuyog Pawar         {
4405*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_ctb_level_lamda = 0;
4406*c83a76b0SSuyog Pawar         }
4407*c83a76b0SSuyog Pawar 
4408*c83a76b0SSuyog Pawar         /** Register the function selector pointer*/
4409*c83a76b0SSuyog Pawar         ps_ctxt->ps_func_selector = ps_func_selector;
4410*c83a76b0SSuyog Pawar 
4411*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4412*c83a76b0SSuyog Pawar 
4413*c83a76b0SSuyog Pawar         /* Initiallization for non-distributed mode */
4414*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4415*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4416*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4417*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4418*c83a76b0SSuyog Pawar 
4419*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4420*c83a76b0SSuyog Pawar         ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4421*c83a76b0SSuyog Pawar 
4422*c83a76b0SSuyog Pawar         ps_ctxt->i4_frm_top_row_luma_size =
4423*c83a76b0SSuyog Pawar             ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4424*c83a76b0SSuyog Pawar 
4425*c83a76b0SSuyog Pawar         ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4426*c83a76b0SSuyog Pawar 
4427*c83a76b0SSuyog Pawar         ps_ctxt->i4_frm_top_row_chroma_size =
4428*c83a76b0SSuyog Pawar             ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4429*c83a76b0SSuyog Pawar 
4430*c83a76b0SSuyog Pawar         {
4431*c83a76b0SSuyog Pawar             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4432*c83a76b0SSuyog Pawar             {
4433*c83a76b0SSuyog Pawar                 /* +1 is to provision top left pel */
4434*c83a76b0SSuyog Pawar                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4435*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4436*c83a76b0SSuyog Pawar                     (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4437*c83a76b0SSuyog Pawar 
4438*c83a76b0SSuyog Pawar                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4439*c83a76b0SSuyog Pawar                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4440*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4441*c83a76b0SSuyog Pawar                     ps_ctxt->i4_top_row_luma_stride;
4442*c83a76b0SSuyog Pawar 
4443*c83a76b0SSuyog Pawar                 /* +2 is to provision top left pel */
4444*c83a76b0SSuyog Pawar                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4445*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4446*c83a76b0SSuyog Pawar                     (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4447*c83a76b0SSuyog Pawar 
4448*c83a76b0SSuyog Pawar                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4449*c83a76b0SSuyog Pawar                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4450*c83a76b0SSuyog Pawar                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4451*c83a76b0SSuyog Pawar                     ps_ctxt->i4_top_row_chroma_stride;
4452*c83a76b0SSuyog Pawar             }
4453*c83a76b0SSuyog Pawar         }
4454*c83a76b0SSuyog Pawar 
4455*c83a76b0SSuyog Pawar         /* +1 is to provision top left nbr */
4456*c83a76b0SSuyog Pawar         ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4457*c83a76b0SSuyog Pawar         ps_ctxt->i4_frm_top_row_nbr_size =
4458*c83a76b0SSuyog Pawar             ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4459*c83a76b0SSuyog Pawar         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4460*c83a76b0SSuyog Pawar         {
4461*c83a76b0SSuyog Pawar             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4462*c83a76b0SSuyog Pawar                 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4463*c83a76b0SSuyog Pawar                 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4464*c83a76b0SSuyog Pawar             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4465*c83a76b0SSuyog Pawar         }
4466*c83a76b0SSuyog Pawar 
4467*c83a76b0SSuyog Pawar         num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4468*c83a76b0SSuyog Pawar         num_cu_in_ctb *= num_cu_in_ctb;
4469*c83a76b0SSuyog Pawar 
4470*c83a76b0SSuyog Pawar         /* pointer incremented by 1 row to avoid OOB access in 0th row */
4471*c83a76b0SSuyog Pawar 
4472*c83a76b0SSuyog Pawar         /* Memory for CU level Coeff data buffer */
4473*c83a76b0SSuyog Pawar         {
4474*c83a76b0SSuyog Pawar             WORD32 i4_16byte_boundary_overshoot;
4475*c83a76b0SSuyog Pawar             WORD32 buf_size_per_cu;
4476*c83a76b0SSuyog Pawar             WORD32 buf_size_per_thread_wo_alignment_req;
4477*c83a76b0SSuyog Pawar             WORD32 buf_size_per_thread;
4478*c83a76b0SSuyog Pawar 
4479*c83a76b0SSuyog Pawar             buf_size_per_cu =
4480*c83a76b0SSuyog Pawar                 ((MAX_LUMA_COEFFS_CTB +
4481*c83a76b0SSuyog Pawar                   (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4482*c83a76b0SSuyog Pawar                  16) *
4483*c83a76b0SSuyog Pawar                 sizeof(UWORD8);
4484*c83a76b0SSuyog Pawar             buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4485*c83a76b0SSuyog Pawar 
4486*c83a76b0SSuyog Pawar             {
4487*c83a76b0SSuyog Pawar                 buf_size_per_thread = buf_size_per_cu * (2);
4488*c83a76b0SSuyog Pawar 
4489*c83a76b0SSuyog Pawar                 for(i = 0; i < 2; i++)
4490*c83a76b0SSuyog Pawar                 {
4491*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4492*c83a76b0SSuyog Pawar                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4493*c83a76b0SSuyog Pawar                         (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4494*c83a76b0SSuyog Pawar 
4495*c83a76b0SSuyog Pawar                     i4_16byte_boundary_overshoot =
4496*c83a76b0SSuyog Pawar                         ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4497*c83a76b0SSuyog Pawar 
4498*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4499*c83a76b0SSuyog Pawar                 }
4500*c83a76b0SSuyog Pawar             }
4501*c83a76b0SSuyog Pawar 
4502*c83a76b0SSuyog Pawar             ps_ctxt->pu1_cu_recur_coeffs =
4503*c83a76b0SSuyog Pawar                 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4504*c83a76b0SSuyog Pawar                 (ctr * buf_size_per_thread_wo_alignment_req);
4505*c83a76b0SSuyog Pawar         }
4506*c83a76b0SSuyog Pawar 
4507*c83a76b0SSuyog Pawar         /* Memory for CU dequant data buffer */
4508*c83a76b0SSuyog Pawar         {
4509*c83a76b0SSuyog Pawar             WORD32 buf_size_per_thread;
4510*c83a76b0SSuyog Pawar             WORD32 i4_16byte_boundary_overshoot;
4511*c83a76b0SSuyog Pawar 
4512*c83a76b0SSuyog Pawar             WORD32 buf_size_per_cu =
4513*c83a76b0SSuyog Pawar                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4514*c83a76b0SSuyog Pawar                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4515*c83a76b0SSuyog Pawar                  8) *
4516*c83a76b0SSuyog Pawar                 sizeof(WORD16);
4517*c83a76b0SSuyog Pawar 
4518*c83a76b0SSuyog Pawar             {
4519*c83a76b0SSuyog Pawar                 buf_size_per_thread = buf_size_per_cu * 2;
4520*c83a76b0SSuyog Pawar 
4521*c83a76b0SSuyog Pawar                 for(i = 0; i < 2; i++)
4522*c83a76b0SSuyog Pawar                 {
4523*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4524*c83a76b0SSuyog Pawar                         (WORD16
4525*c83a76b0SSuyog Pawar                              *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4526*c83a76b0SSuyog Pawar 
4527*c83a76b0SSuyog Pawar                     i4_16byte_boundary_overshoot =
4528*c83a76b0SSuyog Pawar                         ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4529*c83a76b0SSuyog Pawar 
4530*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4531*c83a76b0SSuyog Pawar                         (WORD16
4532*c83a76b0SSuyog Pawar                              *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4533*c83a76b0SSuyog Pawar                 }
4534*c83a76b0SSuyog Pawar             }
4535*c83a76b0SSuyog Pawar         }
4536*c83a76b0SSuyog Pawar 
4537*c83a76b0SSuyog Pawar         /*------ Deblocking memory's pointers assignements starts ------*/
4538*c83a76b0SSuyog Pawar 
4539*c83a76b0SSuyog Pawar         /*Assign stride = 4x4 blocks in horizontal edge*/
4540*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4541*c83a76b0SSuyog Pawar 
4542*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4543*c83a76b0SSuyog Pawar             ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4544*c83a76b0SSuyog Pawar 
4545*c83a76b0SSuyog Pawar         /*Assign frame level memory to store the Qp of
4546*c83a76b0SSuyog Pawar         top 4x4 neighbours of each CTB row*/
4547*c83a76b0SSuyog Pawar         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4548*c83a76b0SSuyog Pawar         {
4549*c83a76b0SSuyog Pawar             ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4550*c83a76b0SSuyog Pawar                 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4551*c83a76b0SSuyog Pawar                 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4552*c83a76b0SSuyog Pawar                  i4_enc_frm_id);
4553*c83a76b0SSuyog Pawar         }
4554*c83a76b0SSuyog Pawar 
4555*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4556*c83a76b0SSuyog Pawar 
4557*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4558*c83a76b0SSuyog Pawar             (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4559*c83a76b0SSuyog Pawar 
4560*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4561*c83a76b0SSuyog Pawar 
4562*c83a76b0SSuyog Pawar         /*Assign stride = 4x4 blocks in horizontal edge*/
4563*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4564*c83a76b0SSuyog Pawar 
4565*c83a76b0SSuyog Pawar         pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4566*c83a76b0SSuyog Pawar 
4567*c83a76b0SSuyog Pawar         /*------Deblocking memory's pointers assignements ends ------*/
4568*c83a76b0SSuyog Pawar 
4569*c83a76b0SSuyog Pawar         /*------SAO memory's pointer assignment starts------------*/
4570*c83a76b0SSuyog Pawar         if(!is_hbd_mode)
4571*c83a76b0SSuyog Pawar         {
4572*c83a76b0SSuyog Pawar             /* 2 is added to allocate top left pixel */
4573*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4574*c83a76b0SSuyog Pawar                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4575*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4576*c83a76b0SSuyog Pawar                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4577*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4578*c83a76b0SSuyog Pawar                 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4579*c83a76b0SSuyog Pawar 
4580*c83a76b0SSuyog Pawar             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4581*c83a76b0SSuyog Pawar             {
4582*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4583*c83a76b0SSuyog Pawar                     pu1_sao_base +
4584*c83a76b0SSuyog Pawar                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4585*c83a76b0SSuyog Pawar                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4586*c83a76b0SSuyog Pawar                      i4_num_bitrate_inst * i4_enc_frm_id) +  // move to the next frame_id
4587*c83a76b0SSuyog Pawar                     u4_ctb_aligned_wd +
4588*c83a76b0SSuyog Pawar                     2;
4589*c83a76b0SSuyog Pawar 
4590*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4591*c83a76b0SSuyog Pawar                     pu1_sao_base +
4592*c83a76b0SSuyog Pawar                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4593*c83a76b0SSuyog Pawar                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4594*c83a76b0SSuyog Pawar                      i4_num_bitrate_inst * i4_enc_frm_id) +
4595*c83a76b0SSuyog Pawar                     +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4596*c83a76b0SSuyog Pawar                     u4_ctb_aligned_wd + 4;
4597*c83a76b0SSuyog Pawar 
4598*c83a76b0SSuyog Pawar                 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4599*c83a76b0SSuyog Pawar                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4600*c83a76b0SSuyog Pawar                     *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4601*c83a76b0SSuyog Pawar                     (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4602*c83a76b0SSuyog Pawar             }
4603*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4604*c83a76b0SSuyog Pawar                 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4605*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4606*c83a76b0SSuyog Pawar         }
4607*c83a76b0SSuyog Pawar 
4608*c83a76b0SSuyog Pawar         /*------SAO memory's pointer assignment ends------------*/
4609*c83a76b0SSuyog Pawar 
4610*c83a76b0SSuyog Pawar         /* perform all one time initialisation here */
4611*c83a76b0SSuyog Pawar         ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4612*c83a76b0SSuyog Pawar 
4613*c83a76b0SSuyog Pawar         ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4614*c83a76b0SSuyog Pawar 
4615*c83a76b0SSuyog Pawar         ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4616*c83a76b0SSuyog Pawar 
4617*c83a76b0SSuyog Pawar         /* move the pointer to 1,2 location */
4618*c83a76b0SSuyog Pawar         ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4619*c83a76b0SSuyog Pawar         ps_ctxt->pu1_ctb_nbr_map++;
4620*c83a76b0SSuyog Pawar 
4621*c83a76b0SSuyog Pawar         ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4622*c83a76b0SSuyog Pawar 
4623*c83a76b0SSuyog Pawar         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4624*c83a76b0SSuyog Pawar 
4625*c83a76b0SSuyog Pawar         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4626*c83a76b0SSuyog Pawar 
4627*c83a76b0SSuyog Pawar         CREATE_SUBBLOCK2CSBFID_MAP(
4628*c83a76b0SSuyog Pawar             gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4629*c83a76b0SSuyog Pawar 
4630*c83a76b0SSuyog Pawar         CREATE_SUBBLOCK2CSBFID_MAP(
4631*c83a76b0SSuyog Pawar             gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4632*c83a76b0SSuyog Pawar 
4633*c83a76b0SSuyog Pawar         /* For both instance initialise the chroma dequant start idx */
4634*c83a76b0SSuyog Pawar         ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4635*c83a76b0SSuyog Pawar         ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4636*c83a76b0SSuyog Pawar 
4637*c83a76b0SSuyog Pawar         /* initialise all the function pointer tables */
4638*c83a76b0SSuyog Pawar         {
4639*c83a76b0SSuyog Pawar             ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4640*c83a76b0SSuyog Pawar                 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4641*c83a76b0SSuyog Pawar 
4642*c83a76b0SSuyog Pawar             ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4643*c83a76b0SSuyog Pawar 
4644*c83a76b0SSuyog Pawar #if ENABLE_RDO_BASED_TU_RECURSION
4645*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4646*c83a76b0SSuyog Pawar             {
4647*c83a76b0SSuyog Pawar                 ps_ctxt->pv_inter_rdopt_cu_ntu =
4648*c83a76b0SSuyog Pawar                     (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4649*c83a76b0SSuyog Pawar             }
4650*c83a76b0SSuyog Pawar #endif
4651*c83a76b0SSuyog Pawar             ps_ctxt->pv_intra_chroma_pred_mode_selector =
4652*c83a76b0SSuyog Pawar                 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4653*c83a76b0SSuyog Pawar             ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4654*c83a76b0SSuyog Pawar             ps_ctxt->pv_final_rdopt_mode_prcs =
4655*c83a76b0SSuyog Pawar                 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4656*c83a76b0SSuyog Pawar             ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4657*c83a76b0SSuyog Pawar             ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4658*c83a76b0SSuyog Pawar             ps_ctxt->pv_enc_loop_ctb_left_copy =
4659*c83a76b0SSuyog Pawar                 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4660*c83a76b0SSuyog Pawar 
4661*c83a76b0SSuyog Pawar             /* Memory assignments for chroma intra pred buffer */
4662*c83a76b0SSuyog Pawar             {
4663*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size =
4664*c83a76b0SSuyog Pawar                     MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4665*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size_per_thread =
4666*c83a76b0SSuyog Pawar                     NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4667*c83a76b0SSuyog Pawar                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4668*c83a76b0SSuyog Pawar                                    (ctr * pred_buf_size_per_thread);
4669*c83a76b0SSuyog Pawar 
4670*c83a76b0SSuyog Pawar                 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4671*c83a76b0SSuyog Pawar                 {
4672*c83a76b0SSuyog Pawar                     ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4673*c83a76b0SSuyog Pawar                     pu1_base += pred_buf_size;
4674*c83a76b0SSuyog Pawar                 }
4675*c83a76b0SSuyog Pawar             }
4676*c83a76b0SSuyog Pawar 
4677*c83a76b0SSuyog Pawar             /* Memory assignments for reference substitution output */
4678*c83a76b0SSuyog Pawar             {
4679*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4680*c83a76b0SSuyog Pawar                                        + INTRAPRED_SIMD_LEFT_PADDING);
4681*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4682*c83a76b0SSuyog Pawar                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4683*c83a76b0SSuyog Pawar                                    (ctr * pred_buf_size_per_thread);
4684*c83a76b0SSuyog Pawar 
4685*c83a76b0SSuyog Pawar                 ps_ctxt->pv_ref_sub_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4686*c83a76b0SSuyog Pawar             }
4687*c83a76b0SSuyog Pawar 
4688*c83a76b0SSuyog Pawar             /* Memory assignments for reference filtering output */
4689*c83a76b0SSuyog Pawar             {
4690*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4691*c83a76b0SSuyog Pawar                                        + INTRAPRED_SIMD_LEFT_PADDING);
4692*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4693*c83a76b0SSuyog Pawar                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4694*c83a76b0SSuyog Pawar                                    (ctr * pred_buf_size_per_thread);
4695*c83a76b0SSuyog Pawar 
4696*c83a76b0SSuyog Pawar                 ps_ctxt->pv_ref_filt_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4697*c83a76b0SSuyog Pawar             }
4698*c83a76b0SSuyog Pawar 
4699*c83a76b0SSuyog Pawar             /* Memory assignments for recon storage during CU Recursion */
4700*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4701*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4702*c83a76b0SSuyog Pawar #endif
4703*c83a76b0SSuyog Pawar             {
4704*c83a76b0SSuyog Pawar                 {
4705*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4706*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4707*c83a76b0SSuyog Pawar                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4708*c83a76b0SSuyog Pawar                                        (ctr * pred_buf_size_per_thread);
4709*c83a76b0SSuyog Pawar 
4710*c83a76b0SSuyog Pawar                     ps_ctxt->pv_cu_luma_recon = pu1_base;
4711*c83a76b0SSuyog Pawar                 }
4712*c83a76b0SSuyog Pawar 
4713*c83a76b0SSuyog Pawar                 {
4714*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4715*c83a76b0SSuyog Pawar                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4716*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4717*c83a76b0SSuyog Pawar                     UWORD8 *pu1_base =
4718*c83a76b0SSuyog Pawar                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4719*c83a76b0SSuyog Pawar                         (ctr * pred_buf_size_per_thread);
4720*c83a76b0SSuyog Pawar 
4721*c83a76b0SSuyog Pawar                     ps_ctxt->pv_cu_chrma_recon = pu1_base;
4722*c83a76b0SSuyog Pawar                 }
4723*c83a76b0SSuyog Pawar             }
4724*c83a76b0SSuyog Pawar 
4725*c83a76b0SSuyog Pawar             /* Memory assignments for pred storage during CU Recursion */
4726*c83a76b0SSuyog Pawar #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4727*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4728*c83a76b0SSuyog Pawar #endif
4729*c83a76b0SSuyog Pawar             {
4730*c83a76b0SSuyog Pawar                 {
4731*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4732*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4733*c83a76b0SSuyog Pawar                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4734*c83a76b0SSuyog Pawar                                        (ctr * pred_buf_size_per_thread);
4735*c83a76b0SSuyog Pawar 
4736*c83a76b0SSuyog Pawar                     ps_ctxt->pv_CTB_pred_luma = pu1_base;
4737*c83a76b0SSuyog Pawar                 }
4738*c83a76b0SSuyog Pawar 
4739*c83a76b0SSuyog Pawar                 {
4740*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4741*c83a76b0SSuyog Pawar                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4742*c83a76b0SSuyog Pawar                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4743*c83a76b0SSuyog Pawar                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4744*c83a76b0SSuyog Pawar                                        (ctr * pred_buf_size_per_thread);
4745*c83a76b0SSuyog Pawar 
4746*c83a76b0SSuyog Pawar                     ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4747*c83a76b0SSuyog Pawar                 }
4748*c83a76b0SSuyog Pawar             }
4749*c83a76b0SSuyog Pawar 
4750*c83a76b0SSuyog Pawar             /* Memory assignments for CTB left luma data storage */
4751*c83a76b0SSuyog Pawar             {
4752*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4753*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4754*c83a76b0SSuyog Pawar                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4755*c83a76b0SSuyog Pawar                                    (ctr * pred_buf_size_per_thread);
4756*c83a76b0SSuyog Pawar 
4757*c83a76b0SSuyog Pawar                 ps_ctxt->pv_left_luma_data = pu1_base;
4758*c83a76b0SSuyog Pawar             }
4759*c83a76b0SSuyog Pawar 
4760*c83a76b0SSuyog Pawar             /* Memory assignments for CTB left chroma data storage */
4761*c83a76b0SSuyog Pawar             {
4762*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size =
4763*c83a76b0SSuyog Pawar                     (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4764*c83a76b0SSuyog Pawar                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4765*c83a76b0SSuyog Pawar                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4766*c83a76b0SSuyog Pawar                                    (ctr * pred_buf_size_per_thread);
4767*c83a76b0SSuyog Pawar 
4768*c83a76b0SSuyog Pawar                 ps_ctxt->pv_left_chrm_data = pu1_base;
4769*c83a76b0SSuyog Pawar             }
4770*c83a76b0SSuyog Pawar         }
4771*c83a76b0SSuyog Pawar 
4772*c83a76b0SSuyog Pawar         /* Memory for inter pred buffers */
4773*c83a76b0SSuyog Pawar         {
4774*c83a76b0SSuyog Pawar             WORD32 i4_num_bufs_per_thread;
4775*c83a76b0SSuyog Pawar 
4776*c83a76b0SSuyog Pawar             WORD32 i4_buf_size_per_cand =
4777*c83a76b0SSuyog Pawar                 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4778*c83a76b0SSuyog Pawar                 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4779*c83a76b0SSuyog Pawar 
4780*c83a76b0SSuyog Pawar             i4_num_bufs_per_thread =
4781*c83a76b0SSuyog Pawar                 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4782*c83a76b0SSuyog Pawar                 i4_buf_size_per_cand;
4783*c83a76b0SSuyog Pawar 
4784*c83a76b0SSuyog Pawar             ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4785*c83a76b0SSuyog Pawar 
4786*c83a76b0SSuyog Pawar             ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4787*c83a76b0SSuyog Pawar 
4788*c83a76b0SSuyog Pawar             {
4789*c83a76b0SSuyog Pawar                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4790*c83a76b0SSuyog Pawar                                    +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4791*c83a76b0SSuyog Pawar 
4792*c83a76b0SSuyog Pawar                 for(i = 0; i < i4_num_bufs_per_thread; i++)
4793*c83a76b0SSuyog Pawar                 {
4794*c83a76b0SSuyog Pawar                     ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4795*c83a76b0SSuyog Pawar                         pu1_base + i * i4_buf_size_per_cand;
4796*c83a76b0SSuyog Pawar                     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4797*c83a76b0SSuyog Pawar                 }
4798*c83a76b0SSuyog Pawar             }
4799*c83a76b0SSuyog Pawar         }
4800*c83a76b0SSuyog Pawar 
4801*c83a76b0SSuyog Pawar         /* Memory required to store pred for 422 chroma */
4802*c83a76b0SSuyog Pawar         if(i4_chroma_format == IV_YUV_422SP_UV)
4803*c83a76b0SSuyog Pawar         {
4804*c83a76b0SSuyog Pawar             WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4805*c83a76b0SSuyog Pawar             WORD32 pred_buf_size_per_thread =
4806*c83a76b0SSuyog Pawar                 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4807*c83a76b0SSuyog Pawar                 sizeof(UWORD8);
4808*c83a76b0SSuyog Pawar             void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4809*c83a76b0SSuyog Pawar                             (ctr * pred_buf_size_per_thread);
4810*c83a76b0SSuyog Pawar 
4811*c83a76b0SSuyog Pawar             ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4812*c83a76b0SSuyog Pawar         }
4813*c83a76b0SSuyog Pawar         else
4814*c83a76b0SSuyog Pawar         {
4815*c83a76b0SSuyog Pawar             ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4816*c83a76b0SSuyog Pawar         }
4817*c83a76b0SSuyog Pawar 
4818*c83a76b0SSuyog Pawar         /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4819*c83a76b0SSuyog Pawar         {
4820*c83a76b0SSuyog Pawar             WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4821*c83a76b0SSuyog Pawar             WORD32 i4_chromaBufSize =
4822*c83a76b0SSuyog Pawar                 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4823*c83a76b0SSuyog Pawar             WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4824*c83a76b0SSuyog Pawar                                           (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4825*c83a76b0SSuyog Pawar             WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4826*c83a76b0SSuyog Pawar             {
4827*c83a76b0SSuyog Pawar                 UWORD8 *pu1_mem_base =
4828*c83a76b0SSuyog Pawar                     (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4829*c83a76b0SSuyog Pawar                      ctr * i4_memSize_perThread);
4830*c83a76b0SSuyog Pawar 
4831*c83a76b0SSuyog Pawar                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4832*c83a76b0SSuyog Pawar                     pu1_mem_base + i4_lumaBufSize * 0;
4833*c83a76b0SSuyog Pawar                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4834*c83a76b0SSuyog Pawar                     pu1_mem_base + i4_lumaBufSize * 1;
4835*c83a76b0SSuyog Pawar                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4836*c83a76b0SSuyog Pawar                     pu1_mem_base + i4_lumaBufSize * 2;
4837*c83a76b0SSuyog Pawar                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4838*c83a76b0SSuyog Pawar                     pu1_mem_base + i4_lumaBufSize * 3;
4839*c83a76b0SSuyog Pawar 
4840*c83a76b0SSuyog Pawar                 pu1_mem_base += i4_lumaBufSize * 4;
4841*c83a76b0SSuyog Pawar 
4842*c83a76b0SSuyog Pawar                 switch(i4_quality_preset)
4843*c83a76b0SSuyog Pawar                 {
4844*c83a76b0SSuyog Pawar                 case IHEVCE_QUALITY_P0:
4845*c83a76b0SSuyog Pawar                 {
4846*c83a76b0SSuyog Pawar #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4847*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4848*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 0;
4849*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4850*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 1;
4851*c83a76b0SSuyog Pawar #else
4852*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4853*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4854*c83a76b0SSuyog Pawar #endif
4855*c83a76b0SSuyog Pawar 
4856*c83a76b0SSuyog Pawar #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4857*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4858*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4859*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4860*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4861*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4862*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4863*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4864*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4865*c83a76b0SSuyog Pawar #else
4866*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4867*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4868*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4869*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4870*c83a76b0SSuyog Pawar #endif
4871*c83a76b0SSuyog Pawar 
4872*c83a76b0SSuyog Pawar                     break;
4873*c83a76b0SSuyog Pawar                 }
4874*c83a76b0SSuyog Pawar                 case IHEVCE_QUALITY_P2:
4875*c83a76b0SSuyog Pawar                 {
4876*c83a76b0SSuyog Pawar #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4877*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4878*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 0;
4879*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4880*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 1;
4881*c83a76b0SSuyog Pawar #else
4882*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4883*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4884*c83a76b0SSuyog Pawar #endif
4885*c83a76b0SSuyog Pawar 
4886*c83a76b0SSuyog Pawar #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4887*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4888*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4889*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4890*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4891*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4892*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4893*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4894*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4895*c83a76b0SSuyog Pawar #else
4896*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4897*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4898*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4899*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4900*c83a76b0SSuyog Pawar #endif
4901*c83a76b0SSuyog Pawar 
4902*c83a76b0SSuyog Pawar                     break;
4903*c83a76b0SSuyog Pawar                 }
4904*c83a76b0SSuyog Pawar                 case IHEVCE_QUALITY_P3:
4905*c83a76b0SSuyog Pawar                 {
4906*c83a76b0SSuyog Pawar #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
4907*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4908*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 0;
4909*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4910*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 1;
4911*c83a76b0SSuyog Pawar #else
4912*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4913*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4914*c83a76b0SSuyog Pawar #endif
4915*c83a76b0SSuyog Pawar 
4916*c83a76b0SSuyog Pawar #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
4917*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4918*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4919*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4920*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4921*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4922*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4923*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4924*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4925*c83a76b0SSuyog Pawar #else
4926*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4927*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4928*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4929*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4930*c83a76b0SSuyog Pawar #endif
4931*c83a76b0SSuyog Pawar 
4932*c83a76b0SSuyog Pawar                     break;
4933*c83a76b0SSuyog Pawar                 }
4934*c83a76b0SSuyog Pawar                 case IHEVCE_QUALITY_P4:
4935*c83a76b0SSuyog Pawar                 {
4936*c83a76b0SSuyog Pawar #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
4937*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4938*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 0;
4939*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4940*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 1;
4941*c83a76b0SSuyog Pawar #else
4942*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4943*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4944*c83a76b0SSuyog Pawar #endif
4945*c83a76b0SSuyog Pawar 
4946*c83a76b0SSuyog Pawar #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
4947*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4948*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4949*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4950*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4951*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4952*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4953*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4954*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4955*c83a76b0SSuyog Pawar #else
4956*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4957*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4958*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4959*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4960*c83a76b0SSuyog Pawar #endif
4961*c83a76b0SSuyog Pawar 
4962*c83a76b0SSuyog Pawar                     break;
4963*c83a76b0SSuyog Pawar                 }
4964*c83a76b0SSuyog Pawar                 case IHEVCE_QUALITY_P5:
4965*c83a76b0SSuyog Pawar                 {
4966*c83a76b0SSuyog Pawar #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
4967*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4968*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 0;
4969*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4970*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 1;
4971*c83a76b0SSuyog Pawar #else
4972*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4973*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4974*c83a76b0SSuyog Pawar #endif
4975*c83a76b0SSuyog Pawar 
4976*c83a76b0SSuyog Pawar #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
4977*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4978*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4979*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4980*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4981*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4982*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 2;
4983*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4984*c83a76b0SSuyog Pawar                         pu1_mem_base + i4_chromaBufSize * 3;
4985*c83a76b0SSuyog Pawar #else
4986*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4987*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4988*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4989*c83a76b0SSuyog Pawar                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4990*c83a76b0SSuyog Pawar #endif
4991*c83a76b0SSuyog Pawar 
4992*c83a76b0SSuyog Pawar                     break;
4993*c83a76b0SSuyog Pawar                 }
4994*c83a76b0SSuyog Pawar                 }
4995*c83a76b0SSuyog Pawar             }
4996*c83a76b0SSuyog Pawar 
4997*c83a76b0SSuyog Pawar             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4998*c83a76b0SSuyog Pawar             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4999*c83a76b0SSuyog Pawar             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5000*c83a76b0SSuyog Pawar             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5001*c83a76b0SSuyog Pawar 
5002*c83a76b0SSuyog Pawar         } /* Recon Datastore */
5003*c83a76b0SSuyog Pawar 
5004*c83a76b0SSuyog Pawar         /****************************************************/
5005*c83a76b0SSuyog Pawar         /****************************************************/
5006*c83a76b0SSuyog Pawar         /* ps_pps->i1_sign_data_hiding_flag  == UNHIDDEN    */
5007*c83a76b0SSuyog Pawar         /* when NO_SBH. else HIDDEN                         */
5008*c83a76b0SSuyog Pawar         /****************************************************/
5009*c83a76b0SSuyog Pawar         /****************************************************/
5010*c83a76b0SSuyog Pawar         /* Zero cbf tool is enabled by default for all presets */
5011*c83a76b0SSuyog Pawar         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5012*c83a76b0SSuyog Pawar 
5013*c83a76b0SSuyog Pawar         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5014*c83a76b0SSuyog Pawar         {
5015*c83a76b0SSuyog Pawar             ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5016*c83a76b0SSuyog Pawar             ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5017*c83a76b0SSuyog Pawar             ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5018*c83a76b0SSuyog Pawar             ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5019*c83a76b0SSuyog Pawar         }
5020*c83a76b0SSuyog Pawar         else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5021*c83a76b0SSuyog Pawar         {
5022*c83a76b0SSuyog Pawar             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5023*c83a76b0SSuyog Pawar             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5024*c83a76b0SSuyog Pawar             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5025*c83a76b0SSuyog Pawar             ps_ctxt->i4_sbh_level = NO_SBH;
5026*c83a76b0SSuyog Pawar         }
5027*c83a76b0SSuyog Pawar         else
5028*c83a76b0SSuyog Pawar         {
5029*c83a76b0SSuyog Pawar             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5030*c83a76b0SSuyog Pawar             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5031*c83a76b0SSuyog Pawar             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5032*c83a76b0SSuyog Pawar             ps_ctxt->i4_sbh_level = NO_SBH;
5033*c83a76b0SSuyog Pawar         }
5034*c83a76b0SSuyog Pawar 
5035*c83a76b0SSuyog Pawar #if DISABLE_QUANT_ROUNDING
5036*c83a76b0SSuyog Pawar         ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5037*c83a76b0SSuyog Pawar         ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5038*c83a76b0SSuyog Pawar #endif
5039*c83a76b0SSuyog Pawar         /*Disabling RDOQ only when spatial modulation is enabled
5040*c83a76b0SSuyog Pawar                 as RDOQ degrades visual quality*/
5041*c83a76b0SSuyog Pawar         if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5042*c83a76b0SSuyog Pawar         {
5043*c83a76b0SSuyog Pawar             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5044*c83a76b0SSuyog Pawar         }
5045*c83a76b0SSuyog Pawar 
5046*c83a76b0SSuyog Pawar #if DISABLE_RDOQ
5047*c83a76b0SSuyog Pawar         ps_ctxt->i4_rdoq_level = NO_RDOQ;
5048*c83a76b0SSuyog Pawar #endif
5049*c83a76b0SSuyog Pawar 
5050*c83a76b0SSuyog Pawar #if DISABLE_SBH
5051*c83a76b0SSuyog Pawar         ps_ctxt->i4_sbh_level = NO_SBH;
5052*c83a76b0SSuyog Pawar #endif
5053*c83a76b0SSuyog Pawar 
5054*c83a76b0SSuyog Pawar         /*Rounding factor calc based on previous cabac states */
5055*c83a76b0SSuyog Pawar 
5056*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5057*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5058*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5059*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5060*c83a76b0SSuyog Pawar 
5061*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5062*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5063*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5064*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5065*c83a76b0SSuyog Pawar 
5066*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5067*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5068*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5069*c83a76b0SSuyog Pawar 
5070*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5071*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5072*c83a76b0SSuyog Pawar         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5073*c83a76b0SSuyog Pawar 
5074*c83a76b0SSuyog Pawar         /****************************************************************************************/
5075*c83a76b0SSuyog Pawar         /* Setting the perform rdoq and sbh flags appropriately                                 */
5076*c83a76b0SSuyog Pawar         /****************************************************************************************/
5077*c83a76b0SSuyog Pawar         {
5078*c83a76b0SSuyog Pawar             /******************************************/
5079*c83a76b0SSuyog Pawar             /* For best cand rdoq and/or sbh          */
5080*c83a76b0SSuyog Pawar             /******************************************/
5081*c83a76b0SSuyog Pawar             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5082*c83a76b0SSuyog Pawar                 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5083*c83a76b0SSuyog Pawar             /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5084*c83a76b0SSuyog Pawar             we would have to do RDOQ again.*/
5085*c83a76b0SSuyog Pawar             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5086*c83a76b0SSuyog Pawar                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5087*c83a76b0SSuyog Pawar                 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5088*c83a76b0SSuyog Pawar                  (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5089*c83a76b0SSuyog Pawar 
5090*c83a76b0SSuyog Pawar             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5091*c83a76b0SSuyog Pawar                 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5092*c83a76b0SSuyog Pawar 
5093*c83a76b0SSuyog Pawar             /* SBH should be performed if
5094*c83a76b0SSuyog Pawar             a) i4_sbh_level is BEST_CAND_SBH.
5095*c83a76b0SSuyog Pawar             b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5096*c83a76b0SSuyog Pawar             if SBH has to be done because for these presets the quant, iquant and scan coeff
5097*c83a76b0SSuyog Pawar             data are calculated in this function and not during the RDOPT stage*/
5098*c83a76b0SSuyog Pawar 
5099*c83a76b0SSuyog Pawar             /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5100*c83a76b0SSuyog Pawar             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5101*c83a76b0SSuyog Pawar                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5102*c83a76b0SSuyog Pawar                 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5103*c83a76b0SSuyog Pawar                  (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5104*c83a76b0SSuyog Pawar 
5105*c83a76b0SSuyog Pawar             /******************************************/
5106*c83a76b0SSuyog Pawar             /* For all cand rdoq and/or sbh          */
5107*c83a76b0SSuyog Pawar             /******************************************/
5108*c83a76b0SSuyog Pawar             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5109*c83a76b0SSuyog Pawar                 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5110*c83a76b0SSuyog Pawar             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5111*c83a76b0SSuyog Pawar                 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5112*c83a76b0SSuyog Pawar             ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5113*c83a76b0SSuyog Pawar                 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5114*c83a76b0SSuyog Pawar         }
5115*c83a76b0SSuyog Pawar 
5116*c83a76b0SSuyog Pawar         if(!is_hbd_mode)
5117*c83a76b0SSuyog Pawar         {
5118*c83a76b0SSuyog Pawar             if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5119*c83a76b0SSuyog Pawar             {
5120*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5121*c83a76b0SSuyog Pawar                 {
5122*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[0] =
5123*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5124*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5125*c83a76b0SSuyog Pawar                 }
5126*c83a76b0SSuyog Pawar                 else
5127*c83a76b0SSuyog Pawar                 {
5128*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[0] =
5129*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5130*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[2] =
5131*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5132*c83a76b0SSuyog Pawar                 }
5133*c83a76b0SSuyog Pawar 
5134*c83a76b0SSuyog Pawar                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5135*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5136*c83a76b0SSuyog Pawar                 {
5137*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[1] =
5138*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5139*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[3] =
5140*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5141*c83a76b0SSuyog Pawar                 }
5142*c83a76b0SSuyog Pawar                 else
5143*c83a76b0SSuyog Pawar                 {
5144*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[1] =
5145*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5146*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5147*c83a76b0SSuyog Pawar                 }
5148*c83a76b0SSuyog Pawar             }
5149*c83a76b0SSuyog Pawar             else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5150*c83a76b0SSuyog Pawar             {
5151*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5152*c83a76b0SSuyog Pawar                 {
5153*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[0] =
5154*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5155*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[2] =
5156*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5157*c83a76b0SSuyog Pawar                 }
5158*c83a76b0SSuyog Pawar                 else
5159*c83a76b0SSuyog Pawar                 {
5160*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[0] =
5161*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5162*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[2] =
5163*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5164*c83a76b0SSuyog Pawar                 }
5165*c83a76b0SSuyog Pawar 
5166*c83a76b0SSuyog Pawar                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5167*c83a76b0SSuyog Pawar                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5168*c83a76b0SSuyog Pawar                 {
5169*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[1] =
5170*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5171*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[3] =
5172*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5173*c83a76b0SSuyog Pawar                 }
5174*c83a76b0SSuyog Pawar                 else
5175*c83a76b0SSuyog Pawar                 {
5176*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[1] =
5177*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5178*c83a76b0SSuyog Pawar                     ps_ctxt->apf_quant_iquant_ssd[3] =
5179*c83a76b0SSuyog Pawar                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5180*c83a76b0SSuyog Pawar                 }
5181*c83a76b0SSuyog Pawar             }
5182*c83a76b0SSuyog Pawar 
5183*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5184*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5185*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5186*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5187*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5188*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5189*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5190*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5191*c83a76b0SSuyog Pawar 
5192*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5193*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5194*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5195*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5196*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5197*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5198*c83a76b0SSuyog Pawar             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5199*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5200*c83a76b0SSuyog Pawar 
5201*c83a76b0SSuyog Pawar             ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5202*c83a76b0SSuyog Pawar             ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5203*c83a76b0SSuyog Pawar             ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5204*c83a76b0SSuyog Pawar             ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5205*c83a76b0SSuyog Pawar             ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5206*c83a76b0SSuyog Pawar 
5207*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5208*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5209*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5210*c83a76b0SSuyog Pawar 
5211*c83a76b0SSuyog Pawar             ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5212*c83a76b0SSuyog Pawar             ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5213*c83a76b0SSuyog Pawar             ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5214*c83a76b0SSuyog Pawar             ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5215*c83a76b0SSuyog Pawar             ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5216*c83a76b0SSuyog Pawar 
5217*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5218*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5219*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5220*c83a76b0SSuyog Pawar 
5221*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5222*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5223*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5224*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5225*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5226*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5227*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5228*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5229*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5230*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5231*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5232*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5233*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5234*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5235*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5236*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5237*c83a76b0SSuyog Pawar             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5238*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5239*c83a76b0SSuyog Pawar 
5240*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5241*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5242*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5243*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5244*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5245*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5246*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5247*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5248*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5249*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5250*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5251*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5252*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5253*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5254*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5255*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5256*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5257*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5258*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5259*c83a76b0SSuyog Pawar                 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5260*c83a76b0SSuyog Pawar 
5261*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_resd_trns_had[0] =
5262*c83a76b0SSuyog Pawar                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5263*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_resd_trns_had[1] =
5264*c83a76b0SSuyog Pawar                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5265*c83a76b0SSuyog Pawar             ps_ctxt->apf_chrm_resd_trns_had[2] =
5266*c83a76b0SSuyog Pawar                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5267*c83a76b0SSuyog Pawar         }
5268*c83a76b0SSuyog Pawar 
5269*c83a76b0SSuyog Pawar         if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5270*c83a76b0SSuyog Pawar         {
5271*c83a76b0SSuyog Pawar             /* initialise the scale & rescale matricies */
5272*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5273*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5274*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5275*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5276*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5277*c83a76b0SSuyog Pawar             /*init for inter matrix*/
5278*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5279*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5280*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5281*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5282*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5283*c83a76b0SSuyog Pawar 
5284*c83a76b0SSuyog Pawar             /*init for rescale matrix*/
5285*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5286*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5287*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5288*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5289*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5290*c83a76b0SSuyog Pawar             /*init for rescale inter matrix*/
5291*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5292*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5293*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5294*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5295*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5296*c83a76b0SSuyog Pawar         }
5297*c83a76b0SSuyog Pawar         else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5298*c83a76b0SSuyog Pawar         {
5299*c83a76b0SSuyog Pawar             /* initialise the scale & rescale matricies */
5300*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5301*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5302*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5303*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5304*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5305*c83a76b0SSuyog Pawar             /*init for inter matrix*/
5306*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5307*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5308*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5309*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5310*c83a76b0SSuyog Pawar             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5311*c83a76b0SSuyog Pawar 
5312*c83a76b0SSuyog Pawar             /*init for rescale matrix*/
5313*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5314*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5315*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5316*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5317*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5318*c83a76b0SSuyog Pawar             /*init for rescale inter matrix*/
5319*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5320*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5321*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5322*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5323*c83a76b0SSuyog Pawar             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5324*c83a76b0SSuyog Pawar         }
5325*c83a76b0SSuyog Pawar         else
5326*c83a76b0SSuyog Pawar         {
5327*c83a76b0SSuyog Pawar             ASSERT(0);
5328*c83a76b0SSuyog Pawar         }
5329*c83a76b0SSuyog Pawar 
5330*c83a76b0SSuyog Pawar         /* Not recomputing Luma pred-data and header data for any preset now */
5331*c83a76b0SSuyog Pawar         ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5332*c83a76b0SSuyog Pawar         ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5333*c83a76b0SSuyog Pawar         ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5334*c83a76b0SSuyog Pawar 
5335*c83a76b0SSuyog Pawar         switch(ps_ctxt->i4_quality_preset)
5336*c83a76b0SSuyog Pawar         {
5337*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P0:
5338*c83a76b0SSuyog Pawar         {
5339*c83a76b0SSuyog Pawar             ps_ctxt->i4_max_merge_candidates = 5;
5340*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5341*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5342*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_early_cbf_data = 0;
5343*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5344*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5345*c83a76b0SSuyog Pawar                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5346*c83a76b0SSuyog Pawar 
5347*c83a76b0SSuyog Pawar             break;
5348*c83a76b0SSuyog Pawar         }
5349*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P2:
5350*c83a76b0SSuyog Pawar         {
5351*c83a76b0SSuyog Pawar             ps_ctxt->i4_max_merge_candidates = 5;
5352*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5353*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5354*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_early_cbf_data = 0;
5355*c83a76b0SSuyog Pawar 
5356*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5357*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5358*c83a76b0SSuyog Pawar                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5359*c83a76b0SSuyog Pawar 
5360*c83a76b0SSuyog Pawar             break;
5361*c83a76b0SSuyog Pawar         }
5362*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P3:
5363*c83a76b0SSuyog Pawar         {
5364*c83a76b0SSuyog Pawar             ps_ctxt->i4_max_merge_candidates = 3;
5365*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5366*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5367*c83a76b0SSuyog Pawar 
5368*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_early_cbf_data = 0;
5369*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5370*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5371*c83a76b0SSuyog Pawar                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5372*c83a76b0SSuyog Pawar 
5373*c83a76b0SSuyog Pawar             break;
5374*c83a76b0SSuyog Pawar         }
5375*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P4:
5376*c83a76b0SSuyog Pawar         {
5377*c83a76b0SSuyog Pawar             ps_ctxt->i4_max_merge_candidates = 2;
5378*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5379*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5380*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_early_cbf_data = 0;
5381*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5382*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5383*c83a76b0SSuyog Pawar                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5384*c83a76b0SSuyog Pawar 
5385*c83a76b0SSuyog Pawar             break;
5386*c83a76b0SSuyog Pawar         }
5387*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P5:
5388*c83a76b0SSuyog Pawar         {
5389*c83a76b0SSuyog Pawar             ps_ctxt->i4_max_merge_candidates = 2;
5390*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5391*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5392*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_early_cbf_data = 0;
5393*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5394*c83a76b0SSuyog Pawar             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5395*c83a76b0SSuyog Pawar                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5396*c83a76b0SSuyog Pawar 
5397*c83a76b0SSuyog Pawar             break;
5398*c83a76b0SSuyog Pawar         }
5399*c83a76b0SSuyog Pawar         case IHEVCE_QUALITY_P6:
5400*c83a76b0SSuyog Pawar         {
5401*c83a76b0SSuyog Pawar             ps_ctxt->i4_max_merge_candidates = 2;
5402*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5403*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5404*c83a76b0SSuyog Pawar             ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5405*c83a76b0SSuyog Pawar             break;
5406*c83a76b0SSuyog Pawar         }
5407*c83a76b0SSuyog Pawar         default:
5408*c83a76b0SSuyog Pawar         {
5409*c83a76b0SSuyog Pawar             ASSERT(0);
5410*c83a76b0SSuyog Pawar         }
5411*c83a76b0SSuyog Pawar         }
5412*c83a76b0SSuyog Pawar 
5413*c83a76b0SSuyog Pawar #if DISABLE_SKIP_AND_MERGE_EVAL
5414*c83a76b0SSuyog Pawar         ps_ctxt->i4_max_merge_candidates = 0;
5415*c83a76b0SSuyog Pawar #endif
5416*c83a76b0SSuyog Pawar 
5417*c83a76b0SSuyog Pawar         ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5418*c83a76b0SSuyog Pawar             !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5419*c83a76b0SSuyog Pawar 
5420*c83a76b0SSuyog Pawar         /*initialize memory for RC related parameters required/populated by enc_loop */
5421*c83a76b0SSuyog Pawar         /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5422*c83a76b0SSuyog Pawar         |-------|-> Thread 0, instance 0
5423*c83a76b0SSuyog Pawar         |       |
5424*c83a76b0SSuyog Pawar         |       |
5425*c83a76b0SSuyog Pawar         |       |
5426*c83a76b0SSuyog Pawar         |-------|-> thread 0, instance 1
5427*c83a76b0SSuyog Pawar         |       |
5428*c83a76b0SSuyog Pawar         |       |
5429*c83a76b0SSuyog Pawar         |       |
5430*c83a76b0SSuyog Pawar         |-------|-> thread 0, intance 2
5431*c83a76b0SSuyog Pawar         |       |
5432*c83a76b0SSuyog Pawar         |       |
5433*c83a76b0SSuyog Pawar         |       |
5434*c83a76b0SSuyog Pawar         |-------|-> thread 1, instance 0
5435*c83a76b0SSuyog Pawar         |       |
5436*c83a76b0SSuyog Pawar         |       |
5437*c83a76b0SSuyog Pawar         |       |
5438*c83a76b0SSuyog Pawar         |-------|-> thread 1, instance 1
5439*c83a76b0SSuyog Pawar         |       |
5440*c83a76b0SSuyog Pawar         |       |
5441*c83a76b0SSuyog Pawar         |       |
5442*c83a76b0SSuyog Pawar         |-------|-> thread 1, instance 2
5443*c83a76b0SSuyog Pawar         ...         ...
5444*c83a76b0SSuyog Pawar 
5445*c83a76b0SSuyog Pawar         Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5446*c83a76b0SSuyog Pawar         Finally, one thread will become master and collate the data from all the threads */
5447*c83a76b0SSuyog Pawar         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5448*c83a76b0SSuyog Pawar         {
5449*c83a76b0SSuyog Pawar             for(i = 0; i < i4_num_bitrate_inst; i++)
5450*c83a76b0SSuyog Pawar             {
5451*c83a76b0SSuyog Pawar                 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5452*c83a76b0SSuyog Pawar                 ps_enc_loop_rc_params++;
5453*c83a76b0SSuyog Pawar             }
5454*c83a76b0SSuyog Pawar         }
5455*c83a76b0SSuyog Pawar         /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5456*c83a76b0SSuyog Pawar 
5457*c83a76b0SSuyog Pawar #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5458*c83a76b0SSuyog Pawar         ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5459*c83a76b0SSuyog Pawar #endif
5460*c83a76b0SSuyog Pawar 
5461*c83a76b0SSuyog Pawar         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5462*c83a76b0SSuyog Pawar             MAX_TU_SIZE;
5463*c83a76b0SSuyog Pawar         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5464*c83a76b0SSuyog Pawar             MAX_TU_SIZE;
5465*c83a76b0SSuyog Pawar         /*Multiplying by two to account for interleaving of cb and cr*/
5466*c83a76b0SSuyog Pawar         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5467*c83a76b0SSuyog Pawar                                                                                        << 1;
5468*c83a76b0SSuyog Pawar         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5469*c83a76b0SSuyog Pawar             MAX_TU_SIZE << 1;
5470*c83a76b0SSuyog Pawar 
5471*c83a76b0SSuyog Pawar         /*     Memory for a frame level memory to store tile-id                  */
5472*c83a76b0SSuyog Pawar         /*              corresponding to each CTB of frame                       */
5473*c83a76b0SSuyog Pawar         ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5474*c83a76b0SSuyog Pawar 
5475*c83a76b0SSuyog Pawar         ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5476*c83a76b0SSuyog Pawar         /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5477*c83a76b0SSuyog Pawar         /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5478*c83a76b0SSuyog Pawar         if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5479*c83a76b0SSuyog Pawar            (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5480*c83a76b0SSuyog Pawar         {
5481*c83a76b0SSuyog Pawar             UWORD32 psy_strength;
5482*c83a76b0SSuyog Pawar             UWORD32 psy_strength_mask =
5483*c83a76b0SSuyog Pawar                 224;  // only bits 5,6,7 are ones. These three bits represent the psy strength
5484*c83a76b0SSuyog Pawar             psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5485*c83a76b0SSuyog Pawar             ps_ctxt->u1_enable_psyRDOPT = 1;
5486*c83a76b0SSuyog Pawar             ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5487*c83a76b0SSuyog Pawar             if(psy_strength == 0)
5488*c83a76b0SSuyog Pawar             {
5489*c83a76b0SSuyog Pawar                 ps_ctxt->u1_enable_psyRDOPT = 0;
5490*c83a76b0SSuyog Pawar                 ps_ctxt->u4_psy_strength = 0;
5491*c83a76b0SSuyog Pawar             }
5492*c83a76b0SSuyog Pawar         }
5493*c83a76b0SSuyog Pawar 
5494*c83a76b0SSuyog Pawar         ps_ctxt->u1_is_stasino_enabled =
5495*c83a76b0SSuyog Pawar             ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5496*c83a76b0SSuyog Pawar               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5497*c83a76b0SSuyog Pawar              (ps_init_prms->s_coding_tools_prms.i4_vqet &
5498*c83a76b0SSuyog Pawar               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5499*c83a76b0SSuyog Pawar 
5500*c83a76b0SSuyog Pawar         ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5501*c83a76b0SSuyog Pawar         ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5502*c83a76b0SSuyog Pawar         ps_ctxt++;
5503*c83a76b0SSuyog Pawar     }
5504*c83a76b0SSuyog Pawar     /* Store Tile params base into EncLoop Master context */
5505*c83a76b0SSuyog Pawar     ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5506*c83a76b0SSuyog Pawar 
5507*c83a76b0SSuyog Pawar     if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5508*c83a76b0SSuyog Pawar     {
5509*c83a76b0SSuyog Pawar         i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5510*c83a76b0SSuyog Pawar     }
5511*c83a76b0SSuyog Pawar 
5512*c83a76b0SSuyog Pawar     /* Updating  ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5513*c83a76b0SSuyog Pawar     /* Loop over all tile-cols in frame */
5514*c83a76b0SSuyog Pawar     for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5515*c83a76b0SSuyog Pawar     {
5516*c83a76b0SSuyog Pawar         WORD32 i4_tile_col_wd_in_ctb_unit =
5517*c83a76b0SSuyog Pawar             (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5518*c83a76b0SSuyog Pawar         WORD32 offset_x;
5519*c83a76b0SSuyog Pawar 
5520*c83a76b0SSuyog Pawar         if(ctr == (i4_num_tile_cols - 1))
5521*c83a76b0SSuyog Pawar         { /* Last tile-row of frame */
5522*c83a76b0SSuyog Pawar             WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5523*c83a76b0SSuyog Pawar 
5524*c83a76b0SSuyog Pawar             WORD32 cu_aligned_pic_wd =
5525*c83a76b0SSuyog Pawar                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5526*c83a76b0SSuyog Pawar                 SET_CTB_ALIGN(
5527*c83a76b0SSuyog Pawar                     ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5528*c83a76b0SSuyog Pawar                     min_cu_size);
5529*c83a76b0SSuyog Pawar 
5530*c83a76b0SSuyog Pawar             WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5531*c83a76b0SSuyog Pawar 
5532*c83a76b0SSuyog Pawar             offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5533*c83a76b0SSuyog Pawar             offset_x += last_hz_ctb_wd;
5534*c83a76b0SSuyog Pawar         }
5535*c83a76b0SSuyog Pawar         else
5536*c83a76b0SSuyog Pawar         { /* Not the last tile-row of frame */
5537*c83a76b0SSuyog Pawar             offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5538*c83a76b0SSuyog Pawar         }
5539*c83a76b0SSuyog Pawar 
5540*c83a76b0SSuyog Pawar         offset_x /= 4;
5541*c83a76b0SSuyog Pawar         offset_x -= 1;
5542*c83a76b0SSuyog Pawar 
5543*c83a76b0SSuyog Pawar         ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5544*c83a76b0SSuyog Pawar     }
5545*c83a76b0SSuyog Pawar 
5546*c83a76b0SSuyog Pawar     n_tabs = NUM_ENC_LOOP_MEM_RECS;
5547*c83a76b0SSuyog Pawar 
5548*c83a76b0SSuyog Pawar     /*store num bit-rate instances in the master context */
5549*c83a76b0SSuyog Pawar     ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5550*c83a76b0SSuyog Pawar     ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5551*c83a76b0SSuyog Pawar     /*************************************************************************/
5552*c83a76b0SSuyog Pawar     /* --- EncLoop Deblock and SAO sync Dep Mngr Mem init --                         */
5553*c83a76b0SSuyog Pawar     /*************************************************************************/
5554*c83a76b0SSuyog Pawar     {
5555*c83a76b0SSuyog Pawar         WORD32 count;
5556*c83a76b0SSuyog Pawar         WORD32 num_vert_units, num_blks_in_row;
5557*c83a76b0SSuyog Pawar         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5558*c83a76b0SSuyog Pawar         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5559*c83a76b0SSuyog Pawar 
5560*c83a76b0SSuyog Pawar         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5561*c83a76b0SSuyog Pawar         ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5562*c83a76b0SSuyog Pawar         ASSERT(num_vert_units > 0);
5563*c83a76b0SSuyog Pawar         ASSERT(num_blks_in_row > 0);
5564*c83a76b0SSuyog Pawar 
5565*c83a76b0SSuyog Pawar         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5566*c83a76b0SSuyog Pawar         {
5567*c83a76b0SSuyog Pawar             for(i = 0; i < i4_num_bitrate_inst; i++)
5568*c83a76b0SSuyog Pawar             {
5569*c83a76b0SSuyog Pawar                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5570*c83a76b0SSuyog Pawar                     &ps_mem_tab[n_tabs],
5571*c83a76b0SSuyog Pawar                     pv_osal_handle,
5572*c83a76b0SSuyog Pawar                     DEP_MNGR_ROW_ROW_SYNC,
5573*c83a76b0SSuyog Pawar                     num_vert_units,
5574*c83a76b0SSuyog Pawar                     num_blks_in_row,
5575*c83a76b0SSuyog Pawar                     i4_num_tile_cols, /* Number of Col Tiles */
5576*c83a76b0SSuyog Pawar                     i4_num_proc_thrds,
5577*c83a76b0SSuyog Pawar                     0 /*Sem Disabled*/
5578*c83a76b0SSuyog Pawar                 );
5579*c83a76b0SSuyog Pawar 
5580*c83a76b0SSuyog Pawar                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5581*c83a76b0SSuyog Pawar             }
5582*c83a76b0SSuyog Pawar         }
5583*c83a76b0SSuyog Pawar 
5584*c83a76b0SSuyog Pawar         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5585*c83a76b0SSuyog Pawar         {
5586*c83a76b0SSuyog Pawar             for(i = 0; i < i4_num_bitrate_inst; i++)
5587*c83a76b0SSuyog Pawar             {
5588*c83a76b0SSuyog Pawar                 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[count][i] = ihevce_dmgr_init(
5589*c83a76b0SSuyog Pawar                     &ps_mem_tab[n_tabs],
5590*c83a76b0SSuyog Pawar                     pv_osal_handle,
5591*c83a76b0SSuyog Pawar                     DEP_MNGR_ROW_ROW_SYNC,
5592*c83a76b0SSuyog Pawar                     num_vert_units,
5593*c83a76b0SSuyog Pawar                     num_blks_in_row,
5594*c83a76b0SSuyog Pawar                     i4_num_tile_cols, /* Number of Col Tiles */
5595*c83a76b0SSuyog Pawar                     i4_num_proc_thrds,
5596*c83a76b0SSuyog Pawar                     0 /*Sem Disabled*/
5597*c83a76b0SSuyog Pawar                 );
5598*c83a76b0SSuyog Pawar 
5599*c83a76b0SSuyog Pawar                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5600*c83a76b0SSuyog Pawar             }
5601*c83a76b0SSuyog Pawar         }
5602*c83a76b0SSuyog Pawar     }
5603*c83a76b0SSuyog Pawar     /*************************************************************************/
5604*c83a76b0SSuyog Pawar     /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init --                   */
5605*c83a76b0SSuyog Pawar     /*************************************************************************/
5606*c83a76b0SSuyog Pawar     {
5607*c83a76b0SSuyog Pawar         WORD32 count;
5608*c83a76b0SSuyog Pawar         WORD32 num_vert_units, num_blks_in_row;
5609*c83a76b0SSuyog Pawar         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5610*c83a76b0SSuyog Pawar         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5611*c83a76b0SSuyog Pawar 
5612*c83a76b0SSuyog Pawar         WORD32 i4_sem = 0;
5613*c83a76b0SSuyog Pawar 
5614*c83a76b0SSuyog Pawar         if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5615*c83a76b0SSuyog Pawar            IHEVCE_QUALITY_P4)
5616*c83a76b0SSuyog Pawar             i4_sem = 0;
5617*c83a76b0SSuyog Pawar         else
5618*c83a76b0SSuyog Pawar             i4_sem = 1;
5619*c83a76b0SSuyog Pawar         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5620*c83a76b0SSuyog Pawar         /* For Top-Right CU sync, adding one more CTB since value updation */
5621*c83a76b0SSuyog Pawar         /* happens in that way for the last CTB in the row                 */
5622*c83a76b0SSuyog Pawar         num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5623*c83a76b0SSuyog Pawar         num_blks_in_row += MAX_CTB_SIZE;
5624*c83a76b0SSuyog Pawar 
5625*c83a76b0SSuyog Pawar         ASSERT(num_vert_units > 0);
5626*c83a76b0SSuyog Pawar         ASSERT(num_blks_in_row > 0);
5627*c83a76b0SSuyog Pawar 
5628*c83a76b0SSuyog Pawar         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5629*c83a76b0SSuyog Pawar         {
5630*c83a76b0SSuyog Pawar             for(i = 0; i < i4_num_bitrate_inst; i++)
5631*c83a76b0SSuyog Pawar             {
5632*c83a76b0SSuyog Pawar                 /* For ES/HS, CU level updates uses spin-locks than semaphore */
5633*c83a76b0SSuyog Pawar                 {
5634*c83a76b0SSuyog Pawar                     ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5635*c83a76b0SSuyog Pawar                         ihevce_dmgr_init(
5636*c83a76b0SSuyog Pawar                             &ps_mem_tab[n_tabs],
5637*c83a76b0SSuyog Pawar                             pv_osal_handle,
5638*c83a76b0SSuyog Pawar                             DEP_MNGR_ROW_ROW_SYNC,
5639*c83a76b0SSuyog Pawar                             num_vert_units,
5640*c83a76b0SSuyog Pawar                             num_blks_in_row,
5641*c83a76b0SSuyog Pawar                             i4_num_tile_cols, /* Number of Col Tiles */
5642*c83a76b0SSuyog Pawar                             i4_num_proc_thrds,
5643*c83a76b0SSuyog Pawar                             i4_sem /*Sem Disabled*/
5644*c83a76b0SSuyog Pawar                         );
5645*c83a76b0SSuyog Pawar                 }
5646*c83a76b0SSuyog Pawar                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5647*c83a76b0SSuyog Pawar             }
5648*c83a76b0SSuyog Pawar         }
5649*c83a76b0SSuyog Pawar     }
5650*c83a76b0SSuyog Pawar 
5651*c83a76b0SSuyog Pawar     for(i = 1; i < 5; i++)
5652*c83a76b0SSuyog Pawar     {
5653*c83a76b0SSuyog Pawar         WORD32 i4_log2_trans_size = i + 1;
5654*c83a76b0SSuyog Pawar         WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5655*c83a76b0SSuyog Pawar 
5656*c83a76b0SSuyog Pawar         ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5657*c83a76b0SSuyog Pawar     }
5658*c83a76b0SSuyog Pawar 
5659*c83a76b0SSuyog Pawar     ga_trans_shift[0] = ga_trans_shift[1];
5660*c83a76b0SSuyog Pawar 
5661*c83a76b0SSuyog Pawar     /* return the handle to caller */
5662*c83a76b0SSuyog Pawar     return ((void *)ps_master_ctxt);
5663*c83a76b0SSuyog Pawar }
5664*c83a76b0SSuyog Pawar 
5665*c83a76b0SSuyog Pawar /*!
5666*c83a76b0SSuyog Pawar ******************************************************************************
5667*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5668*c83a76b0SSuyog Pawar *
5669*c83a76b0SSuyog Pawar * \brief
5670*c83a76b0SSuyog Pawar *    Intialization for ENC_LOOP context state structure .
5671*c83a76b0SSuyog Pawar *
5672*c83a76b0SSuyog Pawar * \param[in] ps_mem_tab : pointer to memory descriptors table
5673*c83a76b0SSuyog Pawar * \param[in] ppv_sem_hdls : Array of semaphore handles
5674*c83a76b0SSuyog Pawar * \param[in] i4_num_proc_thrds : Number of processing threads
5675*c83a76b0SSuyog Pawar *
5676*c83a76b0SSuyog Pawar * \return
5677*c83a76b0SSuyog Pawar *    None
5678*c83a76b0SSuyog Pawar *
5679*c83a76b0SSuyog Pawar * \author
5680*c83a76b0SSuyog Pawar *  Ittiam
5681*c83a76b0SSuyog Pawar *
5682*c83a76b0SSuyog Pawar *****************************************************************************
5683*c83a76b0SSuyog Pawar */
ihevce_enc_loop_reg_sem_hdls(void * pv_enc_loop_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)5684*c83a76b0SSuyog Pawar void ihevce_enc_loop_reg_sem_hdls(
5685*c83a76b0SSuyog Pawar     void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5686*c83a76b0SSuyog Pawar {
5687*c83a76b0SSuyog Pawar     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5688*c83a76b0SSuyog Pawar     WORD32 i, enc_frm_id;
5689*c83a76b0SSuyog Pawar 
5690*c83a76b0SSuyog Pawar     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5691*c83a76b0SSuyog Pawar 
5692*c83a76b0SSuyog Pawar     /*************************************************************************/
5693*c83a76b0SSuyog Pawar     /* --- EncLoop Deblock and SAO sync Dep Mngr reg Semaphores --                   */
5694*c83a76b0SSuyog Pawar     /*************************************************************************/
5695*c83a76b0SSuyog Pawar     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5696*c83a76b0SSuyog Pawar     {
5697*c83a76b0SSuyog Pawar         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5698*c83a76b0SSuyog Pawar         {
5699*c83a76b0SSuyog Pawar             ihevce_dmgr_reg_sem_hdls(
5700*c83a76b0SSuyog Pawar                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5701*c83a76b0SSuyog Pawar                 ppv_sem_hdls,
5702*c83a76b0SSuyog Pawar                 i4_num_proc_thrds);
5703*c83a76b0SSuyog Pawar         }
5704*c83a76b0SSuyog Pawar     }
5705*c83a76b0SSuyog Pawar 
5706*c83a76b0SSuyog Pawar     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5707*c83a76b0SSuyog Pawar     {
5708*c83a76b0SSuyog Pawar         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5709*c83a76b0SSuyog Pawar         {
5710*c83a76b0SSuyog Pawar             ihevce_dmgr_reg_sem_hdls(
5711*c83a76b0SSuyog Pawar                 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][i],
5712*c83a76b0SSuyog Pawar                 ppv_sem_hdls,
5713*c83a76b0SSuyog Pawar                 i4_num_proc_thrds);
5714*c83a76b0SSuyog Pawar         }
5715*c83a76b0SSuyog Pawar     }
5716*c83a76b0SSuyog Pawar 
5717*c83a76b0SSuyog Pawar     /*************************************************************************/
5718*c83a76b0SSuyog Pawar     /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores --             */
5719*c83a76b0SSuyog Pawar     /*************************************************************************/
5720*c83a76b0SSuyog Pawar     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5721*c83a76b0SSuyog Pawar     {
5722*c83a76b0SSuyog Pawar         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5723*c83a76b0SSuyog Pawar         {
5724*c83a76b0SSuyog Pawar             ihevce_dmgr_reg_sem_hdls(
5725*c83a76b0SSuyog Pawar                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5726*c83a76b0SSuyog Pawar                 ppv_sem_hdls,
5727*c83a76b0SSuyog Pawar                 i4_num_proc_thrds);
5728*c83a76b0SSuyog Pawar         }
5729*c83a76b0SSuyog Pawar     }
5730*c83a76b0SSuyog Pawar 
5731*c83a76b0SSuyog Pawar     return;
5732*c83a76b0SSuyog Pawar }
5733*c83a76b0SSuyog Pawar 
5734*c83a76b0SSuyog Pawar /*!
5735*c83a76b0SSuyog Pawar ******************************************************************************
5736*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_delete \endif
5737*c83a76b0SSuyog Pawar *
5738*c83a76b0SSuyog Pawar * \brief
5739*c83a76b0SSuyog Pawar *    Destroy EncLoop module
5740*c83a76b0SSuyog Pawar * Note : Only Destroys the resources allocated in the module like
5741*c83a76b0SSuyog Pawar *   semaphore,etc. Memory free is done Separately using memtabs
5742*c83a76b0SSuyog Pawar *
5743*c83a76b0SSuyog Pawar * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5744*c83a76b0SSuyog Pawar *
5745*c83a76b0SSuyog Pawar * \return
5746*c83a76b0SSuyog Pawar *    None
5747*c83a76b0SSuyog Pawar *
5748*c83a76b0SSuyog Pawar * \author
5749*c83a76b0SSuyog Pawar *  Ittiam
5750*c83a76b0SSuyog Pawar *
5751*c83a76b0SSuyog Pawar *****************************************************************************
5752*c83a76b0SSuyog Pawar */
ihevce_enc_loop_delete(void * pv_enc_loop_ctxt)5753*c83a76b0SSuyog Pawar void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5754*c83a76b0SSuyog Pawar {
5755*c83a76b0SSuyog Pawar     ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5756*c83a76b0SSuyog Pawar     WORD32 ctr, enc_frm_id;
5757*c83a76b0SSuyog Pawar 
5758*c83a76b0SSuyog Pawar     ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5759*c83a76b0SSuyog Pawar 
5760*c83a76b0SSuyog Pawar     for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5761*c83a76b0SSuyog Pawar     {
5762*c83a76b0SSuyog Pawar         for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5763*c83a76b0SSuyog Pawar         {
5764*c83a76b0SSuyog Pawar             /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5765*c83a76b0SSuyog Pawar             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5766*c83a76b0SSuyog Pawar             /* --- EncLoop Sao sync Dep Mngr Delete --*/
5767*c83a76b0SSuyog Pawar             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][ctr]);
5768*c83a76b0SSuyog Pawar             /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5769*c83a76b0SSuyog Pawar             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5770*c83a76b0SSuyog Pawar         }
5771*c83a76b0SSuyog Pawar     }
5772*c83a76b0SSuyog Pawar }
5773*c83a76b0SSuyog Pawar 
5774*c83a76b0SSuyog Pawar /*!
5775*c83a76b0SSuyog Pawar ******************************************************************************
5776*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5777*c83a76b0SSuyog Pawar *
5778*c83a76b0SSuyog Pawar * \brief
5779*c83a76b0SSuyog Pawar *    Frame level Reset for the Dependency Mngrs local to EncLoop.,
5780*c83a76b0SSuyog Pawar *    ie CU_TopRight and Dblk
5781*c83a76b0SSuyog Pawar *
5782*c83a76b0SSuyog Pawar * \param[in] pv_enc_loop_ctxt       : Enc_loop context pointer
5783*c83a76b0SSuyog Pawar *
5784*c83a76b0SSuyog Pawar * \return
5785*c83a76b0SSuyog Pawar *    None
5786*c83a76b0SSuyog Pawar *
5787*c83a76b0SSuyog Pawar * \author
5788*c83a76b0SSuyog Pawar *  Ittiam
5789*c83a76b0SSuyog Pawar *
5790*c83a76b0SSuyog Pawar *****************************************************************************
5791*c83a76b0SSuyog Pawar */
ihevce_enc_loop_dep_mngr_frame_reset(void * pv_enc_loop_ctxt,WORD32 enc_frm_id)5792*c83a76b0SSuyog Pawar void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5793*c83a76b0SSuyog Pawar {
5794*c83a76b0SSuyog Pawar     WORD32 ctr, frame_id;
5795*c83a76b0SSuyog Pawar     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5796*c83a76b0SSuyog Pawar 
5797*c83a76b0SSuyog Pawar     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5798*c83a76b0SSuyog Pawar 
5799*c83a76b0SSuyog Pawar     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5800*c83a76b0SSuyog Pawar     {
5801*c83a76b0SSuyog Pawar         frame_id = 0;
5802*c83a76b0SSuyog Pawar     }
5803*c83a76b0SSuyog Pawar     else
5804*c83a76b0SSuyog Pawar     {
5805*c83a76b0SSuyog Pawar         frame_id = enc_frm_id;
5806*c83a76b0SSuyog Pawar     }
5807*c83a76b0SSuyog Pawar 
5808*c83a76b0SSuyog Pawar     for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5809*c83a76b0SSuyog Pawar     {
5810*c83a76b0SSuyog Pawar         /* Dep. Mngr : Reset the num ctb Deblocked in every row  for ENC sync */
5811*c83a76b0SSuyog Pawar         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5812*c83a76b0SSuyog Pawar 
5813*c83a76b0SSuyog Pawar         /* Dep. Mngr : Reset the num SAO ctb in every row  for ENC sync */
5814*c83a76b0SSuyog Pawar         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[frame_id][ctr]);
5815*c83a76b0SSuyog Pawar 
5816*c83a76b0SSuyog Pawar         /* Dep. Mngr : Reset the TopRight CU Processed in every row  for ENC sync */
5817*c83a76b0SSuyog Pawar         ihevce_dmgr_rst_row_row_sync(
5818*c83a76b0SSuyog Pawar             ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5819*c83a76b0SSuyog Pawar     }
5820*c83a76b0SSuyog Pawar }
5821*c83a76b0SSuyog Pawar 
5822*c83a76b0SSuyog Pawar /*!
5823*c83a76b0SSuyog Pawar ******************************************************************************
5824*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_frame_init \endif
5825*c83a76b0SSuyog Pawar *
5826*c83a76b0SSuyog Pawar * \brief
5827*c83a76b0SSuyog Pawar *    Frame level init of enocde loop function .
5828*c83a76b0SSuyog Pawar *
5829*c83a76b0SSuyog Pawar * \param[in] pv_enc_loop_ctxt           : Enc_loop context pointer
5830*c83a76b0SSuyog Pawar * \param[in] pi4_cu_processed           : ptr to cur frame cu process in pix.
5831*c83a76b0SSuyog Pawar * \param[in] aps_ref_list               : ref pic list for the current frame
5832*c83a76b0SSuyog Pawar * \param[in] ps_slice_hdr               : ptr to current slice header params
5833*c83a76b0SSuyog Pawar * \param[in] ps_pps                     : ptr to active pps params
5834*c83a76b0SSuyog Pawar * \param[in] ps_sps                     : ptr to active sps params
5835*c83a76b0SSuyog Pawar * \param[in] ps_vps                     : ptr to active vps params
5836*c83a76b0SSuyog Pawar 
5837*c83a76b0SSuyog Pawar 
5838*c83a76b0SSuyog Pawar * \param[in] i1_weighted_pred_flag      : weighted pred enable flag (unidir)
5839*c83a76b0SSuyog Pawar * \param[in] i1_weighted_bipred_flag    : weighted pred enable flag (bidir)
5840*c83a76b0SSuyog Pawar * \param[in] log2_luma_wght_denom       : down shift factor for weighted pred of luma
5841*c83a76b0SSuyog Pawar * \param[in] log2_chroma_wght_denom       : down shift factor for weighted pred of chroma
5842*c83a76b0SSuyog Pawar * \param[in] cur_poc                    : currennt frame poc
5843*c83a76b0SSuyog Pawar * \param[in] i4_bitrate_instance_num    : number indicating the instance of bit-rate for multi-rate encoder
5844*c83a76b0SSuyog Pawar *
5845*c83a76b0SSuyog Pawar * \return
5846*c83a76b0SSuyog Pawar *    None
5847*c83a76b0SSuyog Pawar *
5848*c83a76b0SSuyog Pawar * \author
5849*c83a76b0SSuyog Pawar *  Ittiam
5850*c83a76b0SSuyog Pawar *
5851*c83a76b0SSuyog Pawar *****************************************************************************
5852*c83a76b0SSuyog Pawar */
ihevce_enc_loop_frame_init(void * pv_enc_loop_ctxt,WORD32 i4_frm_qp,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],recon_pic_buf_t * ps_frm_recon,slice_header_t * ps_slice_hdr,pps_t * ps_pps,sps_t * ps_sps,vps_t * ps_vps,WORD8 i1_weighted_pred_flag,WORD8 i1_weighted_bipred_flag,WORD32 log2_luma_wght_denom,WORD32 log2_chroma_wght_denom,WORD32 cur_poc,WORD32 i4_display_num,enc_ctxt_t * ps_enc_ctxt,me_enc_rdopt_ctxt_t * ps_curr_inp_prms,WORD32 i4_bitrate_instance_num,WORD32 i4_thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_num_bitrates,WORD32 i4_quality_preset,void * pv_dep_mngr_encloop_dep_me)5853*c83a76b0SSuyog Pawar void ihevce_enc_loop_frame_init(
5854*c83a76b0SSuyog Pawar     void *pv_enc_loop_ctxt,
5855*c83a76b0SSuyog Pawar     WORD32 i4_frm_qp,
5856*c83a76b0SSuyog Pawar     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5857*c83a76b0SSuyog Pawar     recon_pic_buf_t *ps_frm_recon,
5858*c83a76b0SSuyog Pawar     slice_header_t *ps_slice_hdr,
5859*c83a76b0SSuyog Pawar     pps_t *ps_pps,
5860*c83a76b0SSuyog Pawar     sps_t *ps_sps,
5861*c83a76b0SSuyog Pawar     vps_t *ps_vps,
5862*c83a76b0SSuyog Pawar     WORD8 i1_weighted_pred_flag,
5863*c83a76b0SSuyog Pawar     WORD8 i1_weighted_bipred_flag,
5864*c83a76b0SSuyog Pawar     WORD32 log2_luma_wght_denom,
5865*c83a76b0SSuyog Pawar     WORD32 log2_chroma_wght_denom,
5866*c83a76b0SSuyog Pawar     WORD32 cur_poc,
5867*c83a76b0SSuyog Pawar     WORD32 i4_display_num,
5868*c83a76b0SSuyog Pawar     enc_ctxt_t *ps_enc_ctxt,
5869*c83a76b0SSuyog Pawar     me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5870*c83a76b0SSuyog Pawar     WORD32 i4_bitrate_instance_num,
5871*c83a76b0SSuyog Pawar     WORD32 i4_thrd_id,
5872*c83a76b0SSuyog Pawar     WORD32 i4_enc_frm_id,
5873*c83a76b0SSuyog Pawar     WORD32 i4_num_bitrates,
5874*c83a76b0SSuyog Pawar     WORD32 i4_quality_preset,
5875*c83a76b0SSuyog Pawar     void *pv_dep_mngr_encloop_dep_me)
5876*c83a76b0SSuyog Pawar {
5877*c83a76b0SSuyog Pawar     /* local variables */
5878*c83a76b0SSuyog Pawar     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5879*c83a76b0SSuyog Pawar     ihevce_enc_loop_ctxt_t *ps_ctxt;
5880*c83a76b0SSuyog Pawar     WORD32 chroma_qp_offset, i4_div_factor;
5881*c83a76b0SSuyog Pawar     WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5882*c83a76b0SSuyog Pawar     WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5883*c83a76b0SSuyog Pawar 
5884*c83a76b0SSuyog Pawar     /* ENC_LOOP master state structure */
5885*c83a76b0SSuyog Pawar     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5886*c83a76b0SSuyog Pawar 
5887*c83a76b0SSuyog Pawar     /* Nithya: Store the current POC in the slice header */
5888*c83a76b0SSuyog Pawar     ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5889*c83a76b0SSuyog Pawar 
5890*c83a76b0SSuyog Pawar     /* Update the POC list of the current frame to the recon buffer */
5891*c83a76b0SSuyog Pawar     if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5892*c83a76b0SSuyog Pawar     {
5893*c83a76b0SSuyog Pawar         int i4_i;
5894*c83a76b0SSuyog Pawar         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5895*c83a76b0SSuyog Pawar         {
5896*c83a76b0SSuyog Pawar             ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5897*c83a76b0SSuyog Pawar         }
5898*c83a76b0SSuyog Pawar     }
5899*c83a76b0SSuyog Pawar     if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5900*c83a76b0SSuyog Pawar     {
5901*c83a76b0SSuyog Pawar         int i4_i;
5902*c83a76b0SSuyog Pawar         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5903*c83a76b0SSuyog Pawar         {
5904*c83a76b0SSuyog Pawar             ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5905*c83a76b0SSuyog Pawar         }
5906*c83a76b0SSuyog Pawar     }
5907*c83a76b0SSuyog Pawar 
5908*c83a76b0SSuyog Pawar     /* loop over all the threads */
5909*c83a76b0SSuyog Pawar     // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5910*c83a76b0SSuyog Pawar     {
5911*c83a76b0SSuyog Pawar         /* ENC_LOOP state structure */
5912*c83a76b0SSuyog Pawar         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5913*c83a76b0SSuyog Pawar 
5914*c83a76b0SSuyog Pawar         /* SAO ctxt structure initialization*/
5915*c83a76b0SSuyog Pawar         ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5916*c83a76b0SSuyog Pawar         ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5917*c83a76b0SSuyog Pawar         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5918*c83a76b0SSuyog Pawar 
5919*c83a76b0SSuyog Pawar         /*bit-rate instance number for Multi-bitrate (MBR) encode */
5920*c83a76b0SSuyog Pawar         ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5921*c83a76b0SSuyog Pawar         ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5922*c83a76b0SSuyog Pawar         ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5923*c83a76b0SSuyog Pawar         ps_ctxt->i4_is_first_query = 1;
5924*c83a76b0SSuyog Pawar         ps_ctxt->i4_is_ctb_qp_modified = 0;
5925*c83a76b0SSuyog Pawar 
5926*c83a76b0SSuyog Pawar         /* enc_frm_id for multiframe encode */
5927*c83a76b0SSuyog Pawar 
5928*c83a76b0SSuyog Pawar         if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5929*c83a76b0SSuyog Pawar         {
5930*c83a76b0SSuyog Pawar             ps_ctxt->i4_enc_frm_id = 0;
5931*c83a76b0SSuyog Pawar             i4_enc_frm_id = 0;
5932*c83a76b0SSuyog Pawar         }
5933*c83a76b0SSuyog Pawar         else
5934*c83a76b0SSuyog Pawar         {
5935*c83a76b0SSuyog Pawar             ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
5936*c83a76b0SSuyog Pawar         }
5937*c83a76b0SSuyog Pawar 
5938*c83a76b0SSuyog Pawar         /*Initialize the sub pic rc buf appropriately */
5939*c83a76b0SSuyog Pawar 
5940*c83a76b0SSuyog Pawar         /*Set the thrd id flag */
5941*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5942*c83a76b0SSuyog Pawar             .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
5943*c83a76b0SSuyog Pawar 
5944*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5945*c83a76b0SSuyog Pawar             .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5946*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5947*c83a76b0SSuyog Pawar             .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5948*c83a76b0SSuyog Pawar 
5949*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5950*c83a76b0SSuyog Pawar             .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5951*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5952*c83a76b0SSuyog Pawar             .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5953*c83a76b0SSuyog Pawar 
5954*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5955*c83a76b0SSuyog Pawar             .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5956*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5957*c83a76b0SSuyog Pawar             .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5958*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5959*c83a76b0SSuyog Pawar             .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5960*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5961*c83a76b0SSuyog Pawar             .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5962*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd
5963*c83a76b0SSuyog Pawar             .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5964*c83a76b0SSuyog Pawar         ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
5965*c83a76b0SSuyog Pawar             i4_frm_qp;
5966*c83a76b0SSuyog Pawar 
5967*c83a76b0SSuyog Pawar         /*Frame level data for Sub Pic rc is initalized here */
5968*c83a76b0SSuyog Pawar         /*Can be sent once per frame*/
5969*c83a76b0SSuyog Pawar         {
5970*c83a76b0SSuyog Pawar             WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
5971*c83a76b0SSuyog Pawar                                       ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
5972*c83a76b0SSuyog Pawar 
5973*c83a76b0SSuyog Pawar             /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
5974*c83a76b0SSuyog Pawar             ps_ctxt->u4_total_cu_bits = 0;
5975*c83a76b0SSuyog Pawar             ps_ctxt->u4_total_cu_hdr_bits = 0;
5976*c83a76b0SSuyog Pawar 
5977*c83a76b0SSuyog Pawar             ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
5978*c83a76b0SSuyog Pawar             ps_ctxt->u4_cu_tot_bits = 0;
5979*c83a76b0SSuyog Pawar             ps_ctxt->u4_total_cu_bits_mul_qs = 0;
5980*c83a76b0SSuyog Pawar             ps_ctxt->i4_display_num = i4_display_num;
5981*c83a76b0SSuyog Pawar             ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
5982*c83a76b0SSuyog Pawar             /*The Qscale is to be generated every 10th of total frame ctb is completed */
5983*c83a76b0SSuyog Pawar             //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
5984*c83a76b0SSuyog Pawar             ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
5985*c83a76b0SSuyog Pawar 
5986*c83a76b0SSuyog Pawar             ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
5987*c83a76b0SSuyog Pawar             /*Sub Pic RC frame level params */
5988*c83a76b0SSuyog Pawar             ps_ctxt->i8_frame_l1_ipe_sad =
5989*c83a76b0SSuyog Pawar                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
5990*c83a76b0SSuyog Pawar             ps_ctxt->i8_frame_l0_ipe_satd =
5991*c83a76b0SSuyog Pawar                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
5992*c83a76b0SSuyog Pawar             ps_ctxt->i8_frame_l1_me_sad =
5993*c83a76b0SSuyog Pawar                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
5994*c83a76b0SSuyog Pawar             ps_ctxt->i8_frame_l1_activity_fact =
5995*c83a76b0SSuyog Pawar                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
5996*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_sub_pic_level_rc)
5997*c83a76b0SSuyog Pawar             {
5998*c83a76b0SSuyog Pawar                 ASSERT(
5999*c83a76b0SSuyog Pawar                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6000*c83a76b0SSuyog Pawar                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6001*c83a76b0SSuyog Pawar 
6002*c83a76b0SSuyog Pawar                 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6003*c83a76b0SSuyog Pawar                                                  [ps_ctxt->i4_bitrate_instance_num] =
6004*c83a76b0SSuyog Pawar                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6005*c83a76b0SSuyog Pawar                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6006*c83a76b0SSuyog Pawar             }
6007*c83a76b0SSuyog Pawar             //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6008*c83a76b0SSuyog Pawar 
6009*c83a76b0SSuyog Pawar             ps_ctxt->i4_is_I_scenecut =
6010*c83a76b0SSuyog Pawar                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6011*c83a76b0SSuyog Pawar                  (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6012*c83a76b0SSuyog Pawar                   ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6013*c83a76b0SSuyog Pawar 
6014*c83a76b0SSuyog Pawar             ps_ctxt->i4_is_non_I_scenecut =
6015*c83a76b0SSuyog Pawar                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6016*c83a76b0SSuyog Pawar                  (ps_ctxt->i4_is_I_scenecut == 0));
6017*c83a76b0SSuyog Pawar 
6018*c83a76b0SSuyog Pawar             /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6019*c83a76b0SSuyog Pawar             ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6020*c83a76b0SSuyog Pawar             ps_ctxt->i4_is_model_valid =
6021*c83a76b0SSuyog Pawar                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6022*c83a76b0SSuyog Pawar         }
6023*c83a76b0SSuyog Pawar         /* cb and cr offsets are assumed to be same */
6024*c83a76b0SSuyog Pawar         chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6025*c83a76b0SSuyog Pawar 
6026*c83a76b0SSuyog Pawar         /* assumption of cb = cr qp */
6027*c83a76b0SSuyog Pawar         ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6028*c83a76b0SSuyog Pawar         ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6029*c83a76b0SSuyog Pawar 
6030*c83a76b0SSuyog Pawar         ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6031*c83a76b0SSuyog Pawar 
6032*c83a76b0SSuyog Pawar         ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6033*c83a76b0SSuyog Pawar 
6034*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6035*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6036*c83a76b0SSuyog Pawar 
6037*c83a76b0SSuyog Pawar         /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6038*c83a76b0SSuyog Pawar         ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6039*c83a76b0SSuyog Pawar         ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6040*c83a76b0SSuyog Pawar         ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6041*c83a76b0SSuyog Pawar 
6042*c83a76b0SSuyog Pawar         ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6043*c83a76b0SSuyog Pawar         ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6044*c83a76b0SSuyog Pawar         ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6045*c83a76b0SSuyog Pawar         ps_ctxt->i4_use_const_lamda_modifier =
6046*c83a76b0SSuyog Pawar             ps_ctxt->i4_use_const_lamda_modifier ||
6047*c83a76b0SSuyog Pawar             ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6048*c83a76b0SSuyog Pawar               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6049*c83a76b0SSuyog Pawar              ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6050*c83a76b0SSuyog Pawar                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6051*c83a76b0SSuyog Pawar               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6052*c83a76b0SSuyog Pawar                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6053*c83a76b0SSuyog Pawar               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6054*c83a76b0SSuyog Pawar                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6055*c83a76b0SSuyog Pawar               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6056*c83a76b0SSuyog Pawar                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6057*c83a76b0SSuyog Pawar 
6058*c83a76b0SSuyog Pawar         {
6059*c83a76b0SSuyog Pawar             ps_ctxt->f_i_pic_lamda_modifier =
6060*c83a76b0SSuyog Pawar                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6061*c83a76b0SSuyog Pawar         }
6062*c83a76b0SSuyog Pawar 
6063*c83a76b0SSuyog Pawar         ps_ctxt->i4_frame_qp = i4_frm_qp;
6064*c83a76b0SSuyog Pawar         ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6065*c83a76b0SSuyog Pawar         ps_ctxt->i4_cu_qp = i4_frm_qp;
6066*c83a76b0SSuyog Pawar         ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6067*c83a76b0SSuyog Pawar         ps_ctxt->i4_chrm_cu_qp =
6068*c83a76b0SSuyog Pawar             (ps_ctxt->u1_chroma_array_type == 2)
6069*c83a76b0SSuyog Pawar                 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6070*c83a76b0SSuyog Pawar                 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6071*c83a76b0SSuyog Pawar 
6072*c83a76b0SSuyog Pawar         ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6073*c83a76b0SSuyog Pawar         i4_div_factor = (i4_frm_qp + 3) / 6;
6074*c83a76b0SSuyog Pawar         i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6075*c83a76b0SSuyog Pawar         ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6076*c83a76b0SSuyog Pawar 
6077*c83a76b0SSuyog Pawar         ps_ctxt->i4_chrm_cu_qp_div6 =
6078*c83a76b0SSuyog Pawar             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6079*c83a76b0SSuyog Pawar         ps_ctxt->i4_chrm_cu_qp_mod6 =
6080*c83a76b0SSuyog Pawar             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6081*c83a76b0SSuyog Pawar 
6082*c83a76b0SSuyog Pawar #define INTER_RND_QP_BY_6
6083*c83a76b0SSuyog Pawar #ifdef INTER_RND_QP_BY_6
6084*c83a76b0SSuyog Pawar 
6085*c83a76b0SSuyog Pawar         { /*1/6 rounding for 8 bit b frames*/
6086*c83a76b0SSuyog Pawar             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6087*c83a76b0SSuyog Pawar                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6088*c83a76b0SSuyog Pawar         }
6089*c83a76b0SSuyog Pawar #else
6090*c83a76b0SSuyog Pawar         /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6091*c83a76b0SSuyog Pawar         ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6092*c83a76b0SSuyog Pawar #endif
6093*c83a76b0SSuyog Pawar 
6094*c83a76b0SSuyog Pawar         if(ISLICE == i1_slice_type)
6095*c83a76b0SSuyog Pawar         {
6096*c83a76b0SSuyog Pawar             /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6097*c83a76b0SSuyog Pawar             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6098*c83a76b0SSuyog Pawar                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6099*c83a76b0SSuyog Pawar         }
6100*c83a76b0SSuyog Pawar         else
6101*c83a76b0SSuyog Pawar         {
6102*c83a76b0SSuyog Pawar             /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6103*c83a76b0SSuyog Pawar             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6104*c83a76b0SSuyog Pawar                 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6105*c83a76b0SSuyog Pawar             /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6106*c83a76b0SSuyog Pawar         }
6107*c83a76b0SSuyog Pawar 
6108*c83a76b0SSuyog Pawar         ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6109*c83a76b0SSuyog Pawar 
6110*c83a76b0SSuyog Pawar         ps_ctxt->i1_slice_type = i1_slice_type;
6111*c83a76b0SSuyog Pawar 
6112*c83a76b0SSuyog Pawar         /* intialize the inter pred (MC) context at frame level */
6113*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6114*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6115*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6116*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6117*c83a76b0SSuyog Pawar         ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6118*c83a76b0SSuyog Pawar 
6119*c83a76b0SSuyog Pawar         /* intialize the MV pred context at frame level */
6120*c83a76b0SSuyog Pawar         ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6121*c83a76b0SSuyog Pawar         ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6122*c83a76b0SSuyog Pawar         ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6123*c83a76b0SSuyog Pawar         ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6124*c83a76b0SSuyog Pawar             ps_pps->i1_log2_parallel_merge_level - 2;
6125*c83a76b0SSuyog Pawar 
6126*c83a76b0SSuyog Pawar #if ADAPT_COLOCATED_FROM_L0_FLAG
6127*c83a76b0SSuyog Pawar         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6128*c83a76b0SSuyog Pawar         {
6129*c83a76b0SSuyog Pawar             if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6130*c83a76b0SSuyog Pawar                (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6131*c83a76b0SSuyog Pawar                 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6132*c83a76b0SSuyog Pawar             {
6133*c83a76b0SSuyog Pawar                 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6134*c83a76b0SSuyog Pawar             }
6135*c83a76b0SSuyog Pawar         }
6136*c83a76b0SSuyog Pawar #endif
6137*c83a76b0SSuyog Pawar         /* Initialization of deblocking params */
6138*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6139*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6140*c83a76b0SSuyog Pawar 
6141*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6142*c83a76b0SSuyog Pawar 
6143*c83a76b0SSuyog Pawar         ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6144*c83a76b0SSuyog Pawar         /*init frame level stat accumualtion parameters */
6145*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6146*c83a76b0SSuyog Pawar             ->u4_frame_sad_acc = 0;
6147*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6148*c83a76b0SSuyog Pawar             ->u4_frame_intra_sad_acc = 0;
6149*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6150*c83a76b0SSuyog Pawar             ->u4_frame_open_loop_intra_sad = 0;
6151*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6152*c83a76b0SSuyog Pawar             ->i8_frame_open_loop_ssd = 0;
6153*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6154*c83a76b0SSuyog Pawar             ->u4_frame_inter_sad_acc = 0;
6155*c83a76b0SSuyog Pawar 
6156*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6157*c83a76b0SSuyog Pawar             ->i8_frame_cost_acc = 0;
6158*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6159*c83a76b0SSuyog Pawar             ->i8_frame_intra_cost_acc = 0;
6160*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6161*c83a76b0SSuyog Pawar             ->i8_frame_inter_cost_acc = 0;
6162*c83a76b0SSuyog Pawar 
6163*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6164*c83a76b0SSuyog Pawar             ->u4_frame_intra_sad = 0;
6165*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6166*c83a76b0SSuyog Pawar             ->u4_frame_rdopt_bits = 0;
6167*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6168*c83a76b0SSuyog Pawar             ->u4_frame_rdopt_header_bits = 0;
6169*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6170*c83a76b0SSuyog Pawar             ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6171*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6172*c83a76b0SSuyog Pawar             ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6173*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6174*c83a76b0SSuyog Pawar             ->i4_8x8_cu_sum[0] = 0;
6175*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6176*c83a76b0SSuyog Pawar             ->i4_8x8_cu_sum[1] = 0;
6177*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6178*c83a76b0SSuyog Pawar             ->i8_sad_by_qscale[0] = 0;
6179*c83a76b0SSuyog Pawar         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6180*c83a76b0SSuyog Pawar             ->i8_sad_by_qscale[1] = 0;
6181*c83a76b0SSuyog Pawar         /* Compute the frame_qstep */
6182*c83a76b0SSuyog Pawar         GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6183*c83a76b0SSuyog Pawar 
6184*c83a76b0SSuyog Pawar         ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6185*c83a76b0SSuyog Pawar 
6186*c83a76b0SSuyog Pawar         ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6187*c83a76b0SSuyog Pawar         /* intialize the cabac rdopt context at frame level */
6188*c83a76b0SSuyog Pawar         ihevce_entropy_rdo_frame_init(
6189*c83a76b0SSuyog Pawar             &ps_ctxt->s_rdopt_entropy_ctxt,
6190*c83a76b0SSuyog Pawar             ps_slice_hdr,
6191*c83a76b0SSuyog Pawar             ps_pps,
6192*c83a76b0SSuyog Pawar             ps_sps,
6193*c83a76b0SSuyog Pawar             ps_vps,
6194*c83a76b0SSuyog Pawar             ps_master_ctxt->au1_cu_skip_top_row,
6195*c83a76b0SSuyog Pawar             &ps_enc_ctxt->s_rc_quant);
6196*c83a76b0SSuyog Pawar 
6197*c83a76b0SSuyog Pawar         /* register the dep mngr instance for forward ME sync */
6198*c83a76b0SSuyog Pawar         ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6199*c83a76b0SSuyog Pawar     }
6200*c83a76b0SSuyog Pawar }
6201*c83a76b0SSuyog Pawar /*
6202*c83a76b0SSuyog Pawar ******************************************************************************
6203*c83a76b0SSuyog Pawar * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6204*c83a76b0SSuyog Pawar *
6205*c83a76b0SSuyog Pawar * \brief
6206*c83a76b0SSuyog Pawar *    returns Nil
6207*c83a76b0SSuyog Pawar *
6208*c83a76b0SSuyog Pawar * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6209*c83a76b0SSuyog Pawar * \param[out]ps_rc_prms       : ptr to frame level info structure
6210*c83a76b0SSuyog Pawar *
6211*c83a76b0SSuyog Pawar * \return
6212*c83a76b0SSuyog Pawar *    None
6213*c83a76b0SSuyog Pawar *
6214*c83a76b0SSuyog Pawar * \author
6215*c83a76b0SSuyog Pawar *  Ittiam
6216*c83a76b0SSuyog Pawar *
6217*c83a76b0SSuyog Pawar *****************************************************************************
6218*c83a76b0SSuyog Pawar */
ihevce_enc_loop_get_frame_rc_prms(void * pv_enc_loop_ctxt,rc_bits_sad_t * ps_rc_prms,WORD32 i4_br_id,WORD32 i4_enc_frm_id)6219*c83a76b0SSuyog Pawar void ihevce_enc_loop_get_frame_rc_prms(
6220*c83a76b0SSuyog Pawar     void *pv_enc_loop_ctxt,
6221*c83a76b0SSuyog Pawar     rc_bits_sad_t *ps_rc_prms,
6222*c83a76b0SSuyog Pawar     WORD32 i4_br_id,  //bitrate instance id
6223*c83a76b0SSuyog Pawar     WORD32 i4_enc_frm_id)  // frame id
6224*c83a76b0SSuyog Pawar {
6225*c83a76b0SSuyog Pawar     /*Get the master thread pointer*/
6226*c83a76b0SSuyog Pawar     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6227*c83a76b0SSuyog Pawar     ihevce_enc_loop_ctxt_t *ps_ctxt;
6228*c83a76b0SSuyog Pawar     UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6229*c83a76b0SSuyog Pawar     LWORD64 i8_total_ssd_frame = 0;
6230*c83a76b0SSuyog Pawar     UWORD32 total_frame_sad = 0;
6231*c83a76b0SSuyog Pawar     UWORD32 total_frame_rdopt_bits = 0;
6232*c83a76b0SSuyog Pawar     UWORD32 total_frame_rdopt_header_bits = 0;
6233*c83a76b0SSuyog Pawar     WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6234*c83a76b0SSuyog Pawar     WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6235*c83a76b0SSuyog Pawar     LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6236*c83a76b0SSuyog Pawar     WORD32 i4_curr_qp_acc = 0;
6237*c83a76b0SSuyog Pawar     WORD32 i;
6238*c83a76b0SSuyog Pawar 
6239*c83a76b0SSuyog Pawar     /* ENC_LOOP master state structure */
6240*c83a76b0SSuyog Pawar     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6241*c83a76b0SSuyog Pawar 
6242*c83a76b0SSuyog Pawar     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6243*c83a76b0SSuyog Pawar     {
6244*c83a76b0SSuyog Pawar         i4_enc_frm_id = 0;
6245*c83a76b0SSuyog Pawar     }
6246*c83a76b0SSuyog Pawar     /*loop through all threads and accumulate intra sad across all threads*/
6247*c83a76b0SSuyog Pawar     for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6248*c83a76b0SSuyog Pawar     {
6249*c83a76b0SSuyog Pawar         /* ENC_LOOP state structure */
6250*c83a76b0SSuyog Pawar         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6251*c83a76b0SSuyog Pawar         total_frame_open_loop_intra_sad +=
6252*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6253*c83a76b0SSuyog Pawar         i8_total_ssd_frame +=
6254*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6255*c83a76b0SSuyog Pawar         total_frame_intra_sad +=
6256*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6257*c83a76b0SSuyog Pawar         total_frame_sad +=
6258*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6259*c83a76b0SSuyog Pawar         total_frame_rdopt_bits +=
6260*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6261*c83a76b0SSuyog Pawar         total_frame_rdopt_header_bits +=
6262*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6263*c83a76b0SSuyog Pawar         i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6264*c83a76b0SSuyog Pawar                                               ->i4_qp_normalized_8x8_cu_sum[0];
6265*c83a76b0SSuyog Pawar         i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6266*c83a76b0SSuyog Pawar                                               ->i4_qp_normalized_8x8_cu_sum[1];
6267*c83a76b0SSuyog Pawar         i4_8x8_cu_sum[0] +=
6268*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6269*c83a76b0SSuyog Pawar         i4_8x8_cu_sum[1] +=
6270*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6271*c83a76b0SSuyog Pawar         i8_sad_by_qscale[0] +=
6272*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6273*c83a76b0SSuyog Pawar         i8_sad_by_qscale[1] +=
6274*c83a76b0SSuyog Pawar             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6275*c83a76b0SSuyog Pawar     }
6276*c83a76b0SSuyog Pawar 
6277*c83a76b0SSuyog Pawar     ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6278*c83a76b0SSuyog Pawar     ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6279*c83a76b0SSuyog Pawar     ps_rc_prms->u4_total_sad = total_frame_sad;
6280*c83a76b0SSuyog Pawar     ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6281*c83a76b0SSuyog Pawar     ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6282*c83a76b0SSuyog Pawar     /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6283*c83a76b0SSuyog Pawar     ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6284*c83a76b0SSuyog Pawar     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6285*c83a76b0SSuyog Pawar     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6286*c83a76b0SSuyog Pawar     ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6287*c83a76b0SSuyog Pawar     ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6288*c83a76b0SSuyog Pawar     ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6289*c83a76b0SSuyog Pawar     ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6290*c83a76b0SSuyog Pawar }
6291