xref: /aosp_15_r20/external/libhevc/encoder/ihevce_enc_loop_pass.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_pass.c
24 *
25 * \brief
26 *    This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 *    18/09/2012
30 *
31 * \author
32 *    Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40 
41 /*****************************************************************************/
42 /* File Includes                                                             */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52 
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57 
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61 
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81 #include "ihevc_quant_tables.h"
82 
83 #include "ihevce_defs.h"
84 #include "ihevce_hle_interface.h"
85 #include "ihevce_lap_enc_structs.h"
86 #include "ihevce_multi_thrd_structs.h"
87 #include "ihevce_multi_thrd_funcs.h"
88 #include "ihevce_me_common_defs.h"
89 #include "ihevce_had_satd.h"
90 #include "ihevce_error_codes.h"
91 #include "ihevce_bitstream.h"
92 #include "ihevce_cabac.h"
93 #include "ihevce_rdoq_macros.h"
94 #include "ihevce_function_selector.h"
95 #include "ihevce_enc_structs.h"
96 #include "ihevce_entropy_structs.h"
97 #include "ihevce_cmn_utils_instr_set_router.h"
98 #include "ihevce_ipe_instr_set_router.h"
99 #include "ihevce_decomp_pre_intra_structs.h"
100 #include "ihevce_decomp_pre_intra_pass.h"
101 #include "ihevce_enc_loop_structs.h"
102 #include "ihevce_nbr_avail.h"
103 #include "ihevce_enc_loop_utils.h"
104 #include "ihevce_sub_pic_rc.h"
105 #include "ihevce_global_tables.h"
106 #include "ihevce_bs_compute_ctb.h"
107 #include "ihevce_cabac_rdo.h"
108 #include "ihevce_deblk.h"
109 #include "ihevce_frame_process.h"
110 #include "ihevce_rc_enc_structs.h"
111 #include "hme_datatype.h"
112 #include "hme_interface.h"
113 #include "hme_common_defs.h"
114 #include "hme_defs.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128 
129 #include "cast_types.h"
130 #include "osal.h"
131 #include "osal_defaults.h"
132 
133 /*****************************************************************************/
134 /* Globals                                                                   */
135 /*****************************************************************************/
136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137 
138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139 
140 /*****************************************************************************/
141 /* Constant Macros                                                           */
142 /*****************************************************************************/
143 #define UPDATE_QP_AT_CTB 6
144 #define INTRAPRED_SIMD_LEFT_PADDING 16
145 #define INTRAPRED_SIMD_RIGHT_PADDING 8
146 
147 /*****************************************************************************/
148 /* Function Definitions                                                      */
149 /*****************************************************************************/
150 
151 /*!
152 ******************************************************************************
153 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
154 *
155 * \brief
156 *    This function copy the right data of CTB to context buffers
157 *
158 * \date
159 *    18/09/2012
160 *
161 * \author
162 *    Ittiam
163 *
164 * \return
165 *
166 * List of Functions
167 *
168 *
169 ******************************************************************************
170 */
ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms)171 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
172 {
173     /* ------------------------------------------------------------------ */
174     /* copy the right coloum data to the context buffers                  */
175     /* ------------------------------------------------------------------ */
176 
177     nbr_4x4_t *ps_left_nbr;
178     nbr_4x4_t *ps_nbr;
179     UWORD8 *pu1_buff;
180     WORD32 num_pels;
181     UWORD8 *pu1_luma_left, *pu1_chrm_left;
182 
183     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
184 
185     pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
186     pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
187     ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
188 
189     /* copy right luma data */
190     pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
191 
192     for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
193     {
194         WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
195 
196         pu1_luma_left[num_pels] = pu1_buff[i4_indx];
197     }
198 
199     /* copy right chroma data */
200     pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
201 
202     for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
203     {
204         WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
205 
206         *pu1_chrm_left++ = pu1_buff[i4_indx];
207         *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
208     }
209 
210     /* store the nbr 4x4 data at ctb level */
211     {
212         WORD32 ctr;
213         WORD32 nbr_strd;
214 
215         nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
216 
217         /* copy right nbr data */
218         ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
219         ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
220 
221         for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
222         {
223             WORD32 i4_indx = nbr_strd * ctr;
224 
225             ps_left_nbr[ctr] = ps_nbr[i4_indx];
226         }
227     }
228     return;
229 }
230 
231 /*!
232 ******************************************************************************
233 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
234 *
235 * \brief
236 *   Mark all modes for inter/intra for evaluation. This function will be
237 *   called by ref instance
238 *
239 * \param[in] pv_ctxt : pointer to enc_loop module
240 * \param[in] ps_cu_analyse : pointer to cu analyse
241 *
242 * \return
243 *    None
244 *
245 * \author
246 *  Ittiam
247 *
248 *****************************************************************************
249 */
ihevce_mark_all_modes_to_evaluate(void * pv_ctxt,cu_analyse_t * ps_cu_analyse)250 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
251 {
252     UWORD8 ctr;
253     WORD32 i4_part;
254 
255     (void)pv_ctxt;
256     /* run a loop over all Inter cands */
257     for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
258     {
259         ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
260     }
261 
262     /* run a loop over all intra candidates */
263     if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
264     {
265         for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
266         {
267             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
268             ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
269 
270             for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
271             {
272                 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
273             }
274         }
275     }
276 }
277 
278 /*!
279 ******************************************************************************
280 * \if Function name : ihevce_cu_mode_decide \endif
281 *
282 * \brief
283 *    Coding Unit mode decide function. Performs RD opt and decides the best mode
284 *
285 * \param[in] ps_ctxt : pointer to enc_loop module
286 * \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
287 * \param[in] ps_cu_analyse : pointer to cu analyse
288 * \param[out] ps_cu_final : pointer to cu final
289 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
290 * \param[out]ps_row_col_pu; colocated pu buffer pointer
291 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
292 * \param[in]col_start_pu_idx : pu index start value
293 *
294 * \return
295 *    None
296 *
297 *
298 * \author
299 *  Ittiam
300 *
301 *****************************************************************************
302 */
ihevce_cu_mode_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cu_analyse_t * ps_cu_analyse,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_ecd_data,pu_col_mv_t * ps_col_pu,UWORD8 * pu1_col_pu_map,WORD32 col_start_pu_idx)303 LWORD64 ihevce_cu_mode_decide(
304     ihevce_enc_loop_ctxt_t *ps_ctxt,
305     enc_loop_cu_prms_t *ps_cu_prms,
306     cu_analyse_t *ps_cu_analyse,
307     final_mode_state_t *ps_final_mode_state,
308     UWORD8 *pu1_ecd_data,
309     pu_col_mv_t *ps_col_pu,
310     UWORD8 *pu1_col_pu_map,
311     WORD32 col_start_pu_idx)
312 {
313     enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
314     cu_nbr_prms_t s_cu_nbr_prms;
315     inter_cu_mode_info_t s_inter_cu_mode_info;
316     cu_inter_cand_t *ps_best_inter_cand = NULL;
317     UWORD8 *pu1_cu_top;
318     UWORD8 *pu1_cu_top_left;
319     UWORD8 *pu1_cu_left;
320     UWORD8 *pu1_final_recon = NULL;
321     UWORD8 *pu1_curr_src = NULL;
322     void *pv_curr_src = NULL;
323     void *pv_cu_left = NULL;
324     void *pv_cu_top = NULL;
325     void *pv_cu_top_left = NULL;
326 
327     WORD32 cu_left_stride = 0;
328     WORD32 ctr;
329     WORD32 rd_opt_best_idx;
330     LWORD64 rd_opt_least_cost;
331     WORD32 rd_opt_curr_idx;
332     WORD32 num_4x4_in_ctb;
333     WORD32 nbr_4x4_left_strd = 0;
334 
335     nbr_4x4_t *ps_topleft_nbr_4x4;
336     nbr_4x4_t *ps_left_nbr_4x4 = NULL;
337     nbr_4x4_t *ps_top_nbr_4x4 = NULL;
338     nbr_4x4_t *ps_curr_nbr_4x4;
339     WORD32 enable_intra_eval_flag;
340     WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
341     WORD32 curr_cu_pos_in_row;
342     WORD32 cu_top_right_offset;
343     WORD32 cu_top_right_dep_pos;
344     WORD32 i4_ctb_x_off, i4_ctb_y_off;
345 
346     UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
347     (void)ps_final_mode_state;
348     /* default init */
349     rd_opt_least_cost = MAX_COST_64;
350     ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
351     ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
352 
353     /* Zero cbf tool is enabled by default for all presets */
354     ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
355 
356     rd_opt_best_idx = 1;
357     rd_opt_curr_idx = 0;
358     enable_intra_eval_flag = 1;
359 
360     /* CU params in enc ctxt*/
361     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
362     ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
363     ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
364 
365     num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
366     ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
367     ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
368     ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
369 
370     /* CB and Cr are pixel interleaved */
371     s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
372 
373     s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
374 
375     if(!ps_ctxt->u1_is_input_data_hbd)
376     {
377         /* --------------------------------------- */
378         /* ----- Luma Pointers Derivation -------- */
379         /* --------------------------------------- */
380 
381         /* based on CU position derive the pointers */
382         pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
383 
384         pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
385 
386         pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
387 
388         pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
389 
390         pv_curr_src = pu1_curr_src;
391 
392         /* CU left */
393         if(0 == ps_cu_analyse->b3_cu_pos_x)
394         {
395             /* CTB boundary */
396             pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
397             pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
398             cu_left_stride = 1;
399 
400             ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
401             ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
402             nbr_4x4_left_strd = 1;
403         }
404         else
405         {
406             /* inside CTB */
407             pu1_cu_left = pu1_final_recon - 1;
408             cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
409 
410             ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
411             nbr_4x4_left_strd = num_4x4_in_ctb;
412         }
413 
414         pv_cu_left = pu1_cu_left;
415 
416         /* CU top */
417         if(0 == ps_cu_analyse->b3_cu_pos_y)
418         {
419             /* CTB boundary */
420             pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
421             pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
422             pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
423 
424             ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
425             ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
426             ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
427         }
428         else
429         {
430             /* inside CTB */
431             pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
432 
433             ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
434         }
435 
436         pv_cu_top = pu1_cu_top;
437 
438         /* CU top left */
439         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
440         {
441             /* left ctb boundary but not first row */
442             pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
443             ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
444         }
445         else
446         {
447             /* rest all cases topleft is top -1 */
448             pu1_cu_top_left = pu1_cu_top - 1;
449             ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
450         }
451 
452         pv_cu_top_left = pu1_cu_top_left;
453 
454         /* Store the CU nbr information in the ctxt for final reconstruction fun. */
455         s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
456         s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
457         s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
458         s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
459         s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
460         s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
461         s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
462         s_cu_nbr_prms.cu_left_stride = cu_left_stride;
463 
464         /* ------------------------------------------------------------ */
465         /* -- Initialize the number of neigbour skip cu count for rdo --*/
466         /* ------------------------------------------------------------ */
467         {
468             nbr_avail_flags_t s_nbr;
469             WORD32 i4_num_nbr_skip_cus = 0;
470 
471             /* get the neighbour availability flags for current cu  */
472             ihevce_get_nbr_intra(
473                 &s_nbr,
474                 ps_ctxt->pu1_ctb_nbr_map,
475                 ps_ctxt->i4_nbr_map_strd,
476                 (ps_cu_analyse->b3_cu_pos_x << 1),
477                 (ps_cu_analyse->b3_cu_pos_y << 1),
478                 (ps_cu_analyse->u1_cu_size >> 2));
479             if(s_nbr.u1_top_avail)
480             {
481                 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
482             }
483 
484             if(s_nbr.u1_left_avail)
485             {
486                 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
487             }
488             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
489                 i4_num_nbr_skip_cus;
490             ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
491                 i4_num_nbr_skip_cus;
492         }
493 
494         /* --------------------------------------- */
495         /* --- Chroma Pointers Derivation -------- */
496         /* --------------------------------------- */
497 
498         /* based on CU position derive the pointers */
499         s_chrm_cu_buf_prms.pu1_final_recon =
500             ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
501 
502         s_chrm_cu_buf_prms.pu1_curr_src =
503             ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
504 
505         s_chrm_cu_buf_prms.pu1_final_recon +=
506             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
507 
508         s_chrm_cu_buf_prms.pu1_curr_src +=
509             ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
510 
511         /* CU left */
512         if(0 == ps_cu_analyse->b3_cu_pos_x)
513         {
514             /* CTB boundary */
515             s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
516             s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
517             s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
518         }
519         else
520         {
521             /* inside CTB */
522             s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
523             s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
524         }
525 
526         /* CU top */
527         if(0 == ps_cu_analyse->b3_cu_pos_y)
528         {
529             /* CTB boundary */
530             s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
531             s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
532             s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
533         }
534         else
535         {
536             /* inside CTB */
537             s_chrm_cu_buf_prms.pu1_cu_top =
538                 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
539         }
540 
541         /* CU top left */
542         if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
543         {
544             /* left ctb boundary but not first row */
545             s_chrm_cu_buf_prms.pu1_cu_top_left =
546                 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
547         }
548         else
549         {
550             /* rest all cases topleft is top -2 */
551             s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
552         }
553     }
554 
555     /* Set Variables for Dep. Checking and Setting */
556     i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
557 
558     i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
559     ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
560 
561     /* Set the pred pointer count for ME/intra to 0 to start */
562     ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
563 
564     ASSERT(
565         (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
566 
567     ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
568     s_inter_cu_mode_info.u1_num_inter_cands = 0;
569     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
570     s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
571 
572     ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
573     ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
574     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
575     ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
576     ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
577     if(0 != ps_cu_analyse->u1_num_inter_cands)
578     {
579         ihevce_inter_cand_sifter_prms_t s_prms;
580 
581         UWORD8 u1_enable_top_row_sync;
582 
583         if(ps_ctxt->u1_disable_intra_eval)
584         {
585             u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
586         }
587         else
588         {
589             u1_enable_top_row_sync = 1;
590         }
591 
592         if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
593         {
594             /* Wait till top data is ready          */
595             /* Currently checking till top right CU */
596             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
597 
598             if(i4_ctb_y_off == 0)
599             {
600                 /* No wait for 1st row */
601                 cu_top_right_offset = -(MAX_CTB_SIZE);
602                 {
603                     ihevce_tile_params_t *ps_col_tile_params =
604                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
605                          ps_ctxt->i4_tile_col_idx);
606                     /* No wait for 1st row */
607                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
608                 }
609                 cu_top_right_dep_pos = 0;
610             }
611             else
612             {
613                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
614                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
615             }
616 
617             if(0 == ps_cu_analyse->b3_cu_pos_y)
618             {
619                 ihevce_dmgr_chk_row_row_sync(
620                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
621                     curr_cu_pos_in_row,
622                     cu_top_right_offset,
623                     cu_top_right_dep_pos,
624                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
625                     ps_ctxt->thrd_id);
626             }
627         }
628 
629         if(ps_ctxt->i1_cu_qp_delta_enable)
630         {
631             ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, 4, 0);
632         }
633 
634         s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
635         s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
636         s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
637         s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
638         s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
639         s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
640         s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
641         s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
642         s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
643         s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
644         s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
645         s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
646         s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
647         s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
648         s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
649         s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
650         s_prms.pv_src = pv_curr_src;
651         s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
652         s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
653         s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
654         s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
655         s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
656         s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
657         s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
658         s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
659         s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
660         s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
661         s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
662         s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
663         s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
664         s_prms.u1_use_merge_cand_from_top_row =
665             (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
666         s_prms.u1_merge_idx_cabac_model =
667             ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
668 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
669         s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
670         s_prms.u1_reuse_me_sad = 1;
671 #else
672         s_prms.u1_reuse_me_sad = 0;
673 #endif
674 
675         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
676         {
677             if(ps_ctxt->i4_temporal_layer == 1)
678             {
679                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
680             }
681             else
682             {
683                 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
684             }
685         }
686         else
687         {
688             s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
689         }
690         s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
691 
692         if(s_prms.u1_is_cu_noisy)
693         {
694             s_prms.i4_lambda_qf =
695                 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
696         }
697         s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
698 
699         s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
700 
701         s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
702         ihevce_inter_cand_sifter(&s_prms);
703     }
704     if(u1_is_422)
705     {
706         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
707         UWORD8 u1_num_bufs_allocated;
708 
709         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
710             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
711 
712         ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
713 
714         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
715             ctr++)
716         {
717             {
718                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
719                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
720             }
721 
722             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
723 
724             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
725         }
726 
727         {
728             ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
729                 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
730         }
731 
732         ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
733 
734         ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
735     }
736     else
737     {
738         UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
739         UWORD8 u1_num_bufs_allocated;
740 
741         u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
742             au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
743 
744         ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
745 
746         for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
747             ctr++)
748         {
749             {
750                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
751                     (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
752             }
753 
754             ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
755 
756             ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
757         }
758     }
759 
760     ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
761 
762     ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
763     ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
764     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
765     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
766     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
767     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
768     ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
769     ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
770     /* --------------------------------------- */
771     /* ------ Inter RD OPT stage ------------- */
772     /* --------------------------------------- */
773     if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
774     {
775         UWORD8 u1_ssd_bit_info_ctr = 0;
776 
777         /* -- run a loop over all Inter rd opt cands ------ */
778         for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
779         {
780             cu_inter_cand_t *ps_inter_cand;
781 
782             LWORD64 rd_opt_cost = 0;
783 
784             ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
785 
786             if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
787                (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
788             {
789                 ps_inter_cand->b1_eval_mark = 1;
790             }
791 
792             /****************************************************************/
793             /* This check is only valid for derived instances.              */
794             /* check if this mode needs to be evaluated or not.             */
795             /* if it is a skip candidate, go ahead and evaluate it even if  */
796             /* it has not been marked while sorting.                        */
797             /****************************************************************/
798             if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
799             {
800                 continue;
801             }
802 
803             /* RDOPT related copies and settings */
804             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
805 
806             /* RDOPT copy States : Prev Cu best to current init */
807             COPY_CABAC_STATES(
808                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
809                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
810                 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
811             /* MVP ,MVD calc and Motion compensation */
812             rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
813                 ps_ctxt,
814                 ps_inter_cand,
815                 ps_cu_analyse->u1_cu_size,
816                 ps_cu_analyse->b3_cu_pos_x,
817                 ps_cu_analyse->b3_cu_pos_y,
818                 ps_left_nbr_4x4,
819                 ps_top_nbr_4x4,
820                 ps_topleft_nbr_4x4,
821                 nbr_4x4_left_strd,
822                 rd_opt_curr_idx);
823 
824 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
825             if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
826             {
827                 ihevce_determine_tu_tree_distribution(
828                     ps_inter_cand,
829                     (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
830                     ps_ctxt->ai2_scratch,
831                     (UWORD8 *)pv_curr_src,
832                     ps_cu_prms->i4_luma_src_stride,
833                     ps_ctxt->i4_satd_lamda,
834                     LAMBDA_Q_SHIFT,
835                     ps_cu_analyse->u1_cu_size,
836                     ps_ctxt->u1_max_tr_depth);
837             }
838 #endif
839 #if DISABLE_ZERO_ZBF_IN_INTER
840             ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
841 #else
842             ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
843 #endif
844             /* Recon loop with different TUs based on partition type*/
845             rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
846                 ps_ctxt,
847                 ps_cu_prms,
848                 pv_curr_src,
849                 ps_cu_analyse->u1_cu_size,
850                 ps_cu_analyse->b3_cu_pos_x,
851                 ps_cu_analyse->b3_cu_pos_y,
852                 rd_opt_curr_idx,
853                 &s_chrm_cu_buf_prms,
854                 ps_inter_cand,
855                 ps_cu_analyse,
856                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
857                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
858                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
859                                              100.0);
860 
861 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
862             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
863             {
864                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
865                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
866                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
867             }
868 #endif
869 
870             /* based on the rd opt cost choose the best and current index */
871             if(rd_opt_cost < rd_opt_least_cost)
872             {
873                 /* swap the best and current indx */
874                 rd_opt_best_idx = !rd_opt_best_idx;
875                 rd_opt_curr_idx = !rd_opt_curr_idx;
876 
877                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
878                 rd_opt_least_cost = rd_opt_cost;
879                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
880 
881                 /* Store the best Inter cand. for final_recon function */
882                 ps_best_inter_cand = ps_inter_cand;
883             }
884 
885             /* set the neighbour map to 0 */
886             ihevce_set_nbr_map(
887                 ps_ctxt->pu1_ctb_nbr_map,
888                 ps_ctxt->i4_nbr_map_strd,
889                 (ps_cu_analyse->b3_cu_pos_x << 1),
890                 (ps_cu_analyse->b3_cu_pos_y << 1),
891                 (ps_cu_analyse->u1_cu_size >> 2),
892                 0);
893 
894         } /* end of loop for all the Inter RD OPT cand */
895     }
896     /* --------------------------------------- */
897     /* ---- Conditional Eval of Intra -------- */
898     /* --------------------------------------- */
899     {
900         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
901         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
902 
903         /* check if inter candidates are valid */
904         if(0 != ps_cu_analyse->u1_num_inter_cands)
905         {
906             /* if skip or no residual inter candidates has won then */
907             /* evaluation of intra candidates is disabled           */
908             if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
909                (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
910             {
911                 enable_intra_eval_flag = 0;
912             }
913         }
914         /* Disable Intra Gating for HIGH QUALITY PRESET */
915 #if !ENABLE_INTRA_GATING_FOR_HQ
916         if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
917         {
918             enable_intra_eval_flag = 1;
919 
920 #if DISABLE_LARGE_INTRA_PQ
921             if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
922                (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
923             {
924                 if(ps_cu_analyse->u1_cu_size > 16)
925                 {
926                     /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
927                     enable_intra_eval_flag = 0;
928                 }
929                 else if(ps_cu_analyse->u1_cu_size == 16)
930                 {
931                     /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
932                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
933                 }
934             }
935 #endif
936         }
937 #endif
938     }
939 
940     /* --------------------------------------- */
941     /* ------ Intra RD OPT stage ------------- */
942     /* --------------------------------------- */
943 
944     /* -- run a loop over all Intra rd opt cands ------ */
945     if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
946     {
947         LWORD64 rd_opt_cost;
948         WORD32 end_flag = 0;
949         WORD32 cu_eval_done = 0;
950         WORD32 subcu_eval_done = 0;
951         WORD32 subpu_eval_done = 0;
952         WORD32 max_trans_size;
953         WORD32 sync_wait_stride;
954         max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
955         sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
956 
957         if(!ps_ctxt->u1_use_top_at_ctb_boundary)
958         {
959             /* Wait till top data is ready          */
960             /* Currently checking till top right CU */
961             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
962 
963             if(i4_ctb_y_off == 0)
964             {
965                 /* No wait for 1st row */
966                 cu_top_right_offset = -(MAX_CTB_SIZE);
967                 {
968                     ihevce_tile_params_t *ps_col_tile_params =
969                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
970                          ps_ctxt->i4_tile_col_idx);
971                     /* No wait for 1st row */
972                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
973                 }
974                 cu_top_right_dep_pos = 0;
975             }
976             else
977             {
978                 cu_top_right_offset = sync_wait_stride;
979                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
980             }
981 
982             if(0 == ps_cu_analyse->b3_cu_pos_y)
983             {
984                 ihevce_dmgr_chk_row_row_sync(
985                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
986                     curr_cu_pos_in_row,
987                     cu_top_right_offset,
988                     cu_top_right_dep_pos,
989                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
990                     ps_ctxt->thrd_id);
991             }
992         }
993         ctr = 0;
994 
995         /* Zero cbf tool is disabled for intra CUs */
996 #if ENABLE_ZERO_CBF_IN_INTRA
997         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
998 #else
999         ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
1000 #endif
1001 
1002         /* Intra Mode gating based on MPM cand list and encoder quality preset */
1003         if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
1004         {
1005             ihevce_mpm_idx_based_filter_RDOPT_cand(
1006                 ps_ctxt,
1007                 ps_cu_analyse,
1008                 ps_left_nbr_4x4,
1009                 ps_top_nbr_4x4,
1010                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1011                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1012 
1013             ihevce_mpm_idx_based_filter_RDOPT_cand(
1014                 ps_ctxt,
1015                 ps_cu_analyse,
1016                 ps_left_nbr_4x4,
1017                 ps_top_nbr_4x4,
1018                 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1019                 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1020         }
1021 
1022         /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1023         if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1024         {
1025             /* For cu_size = 64, there won't be any TU_EQ_CU case */
1026             if(64 != ps_cu_analyse->u1_cu_size)
1027             {
1028                 /* RDOPT copy States : Prev Cu best to current init */
1029                 COPY_CABAC_STATES(
1030                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1031                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1032                     IHEVC_CAB_CTXT_END);
1033 
1034                 /* RDOPT related copies and settings */
1035                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1036 
1037                 /* Calc. best SATD mode for TU_EQ_CU case */
1038                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1039                     ps_ctxt,
1040                     &s_chrm_cu_buf_prms,
1041                     ps_cu_analyse,
1042                     rd_opt_curr_idx,
1043                     TU_EQ_CU,
1044                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1045                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1046                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1047                                                  100.0,
1048                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1049 
1050 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1051                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1052                 {
1053                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1054                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1055                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1056                 }
1057 #endif
1058             }
1059 
1060             /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1061             TU_EQ_CU_DIV2 case */
1062 
1063             if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1064                 255) &&
1065                (8 != ps_cu_analyse->u1_cu_size))
1066             {
1067                 /* RDOPT copy States : Prev Cu best to current init */
1068                 COPY_CABAC_STATES(
1069                     &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1070                     &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1071                     IHEVC_CAB_CTXT_END);
1072 
1073                 /* RDOPT related copies and settings */
1074                 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1075 
1076                 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1077                 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1078                     ps_ctxt,
1079                     &s_chrm_cu_buf_prms,
1080                     ps_cu_analyse,
1081                     rd_opt_curr_idx,
1082                     TU_EQ_CU_DIV2,
1083                     !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1084                                            : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1085                                               (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1086                                                  100.0,
1087                     ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1088 
1089 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1090                 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1091                 {
1092                     ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1093                     ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1094                         ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1095                 }
1096 #endif
1097             }
1098         }
1099 
1100         while(0 == end_flag)
1101         {
1102             UWORD8 *pu1_mode = NULL;
1103             WORD32 curr_func_mode = 0;
1104             void *pv_pred;
1105 
1106             ASSERT(ctr < 36);
1107 
1108             /* TU equal to CU size evaluation of different modes */
1109             if(0 == cu_eval_done)
1110             {
1111                 /* check if the all the modes have been evaluated */
1112                 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1113                 {
1114                     cu_eval_done = 1;
1115                     ctr = 0;
1116                 }
1117                 else if(
1118                     (1 == ctr) &&
1119                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1120                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1121                     (ps_ctxt->i1_slice_type != ISLICE))
1122                 {
1123                     ctr = 0;
1124                     cu_eval_done = 1;
1125                     subcu_eval_done = 1;
1126                     subpu_eval_done = 1;
1127                 }
1128                 else
1129                 {
1130                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1131                     {
1132                         ctr++;
1133                         continue;
1134                     }
1135 
1136                     pu1_mode =
1137                         &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1138                     ctr++;
1139                     curr_func_mode = TU_EQ_CU;
1140                 }
1141             }
1142             /* Sub CU (NXN) mode evaluation of different pred modes */
1143             if((0 == subpu_eval_done) && (1 == cu_eval_done))
1144             {
1145                 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1146                 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1147                 {
1148                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1149 
1150                     curr_func_mode = TU_EQ_SUBCU;
1151                     /* check if the any modes have to be evaluated */
1152                     if(255 == *pu1_mode)
1153                     {
1154                         subpu_eval_done = 1;
1155                         ctr = 0;
1156                     }
1157                     else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1158                     {
1159                         subpu_eval_done = 1;
1160                         ctr = 0;
1161                     }
1162                     else
1163                     {
1164                         ctr++;
1165                     }
1166                 }
1167             }
1168 
1169             /* TU size equal to CU div2 mode evaluation of different pred modes */
1170             if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1171             {
1172                 /* check if the all the modes have been evaluated */
1173                 if(255 ==
1174                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1175                 {
1176                     subcu_eval_done = 1;
1177                 }
1178                 else if(
1179                     (1 == ctr) &&
1180                     ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1181                      (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1182                     (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1183                 {
1184                     subcu_eval_done = 1;
1185                 }
1186                 else
1187                 {
1188                     if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1189                     {
1190                         ctr++;
1191                         continue;
1192                     }
1193 
1194                     pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1195                                     .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1196 
1197                     ctr++;
1198                     curr_func_mode = TU_EQ_CU_DIV2;
1199                 }
1200             }
1201 
1202             /* check if all CU option have been evalueted */
1203             if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1204             {
1205                 break;
1206             }
1207 
1208             /* RDOPT related copies and settings */
1209             ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1210 
1211             /* Assign ME/Intra pred buf. to the current intra cand. since we
1212             are storing pred data for final_reon function */
1213             {
1214                 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1215             }
1216 
1217             /* RDOPT copy States : Prev Cu best to current init */
1218             COPY_CABAC_STATES(
1219                 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1220                 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1221                 IHEVC_CAB_CTXT_END);
1222 
1223             /* call the function which performs the normative Intra encode */
1224             rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1225                 ps_ctxt,
1226                 ps_cu_prms,
1227                 pv_pred,
1228                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1229                 &s_chrm_cu_buf_prms,
1230                 pu1_mode,
1231                 ps_cu_analyse,
1232                 pv_curr_src,
1233                 pv_cu_left,
1234                 pv_cu_top,
1235                 pv_cu_top_left,
1236                 ps_left_nbr_4x4,
1237                 ps_top_nbr_4x4,
1238                 nbr_4x4_left_strd,
1239                 cu_left_stride,
1240                 rd_opt_curr_idx,
1241                 curr_func_mode,
1242                 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1243                                        : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1244                                           (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1245                                              100.0);
1246 
1247 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1248             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1249             {
1250                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1251                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1252                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1253             }
1254 #endif
1255 
1256             /* based on the rd opt cost choose the best and current index */
1257             if(rd_opt_cost < rd_opt_least_cost)
1258             {
1259                 /* swap the best and current indx */
1260                 rd_opt_best_idx = !rd_opt_best_idx;
1261                 rd_opt_curr_idx = !rd_opt_curr_idx;
1262                 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1263 
1264                 rd_opt_least_cost = rd_opt_cost;
1265                 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1266             }
1267 
1268             if((TU_EQ_SUBCU == curr_func_mode) &&
1269                (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1270                (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1271             {
1272                 UWORD8 au1_tu_eq_cu_div2_modes[4];
1273                 UWORD8 au1_freq_of_mode[4];
1274 
1275                 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1276                 {
1277                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1278                         255;  //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1279                     ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1280                         255;
1281                 }
1282                 else
1283                 {
1284                     WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1285                         ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1286                         au1_tu_eq_cu_div2_modes,
1287                         au1_freq_of_mode,
1288                         4);
1289 
1290                     if(2 == i4_num_clusters)
1291                     {
1292                         if(au1_freq_of_mode[0] == 3)
1293                         {
1294                             ps_cu_analyse->s_cu_intra_cand
1295                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1296                                 au1_tu_eq_cu_div2_modes[0];
1297                             ps_cu_analyse->s_cu_intra_cand
1298                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1299                         }
1300                         else if(au1_freq_of_mode[1] == 3)
1301                         {
1302                             ps_cu_analyse->s_cu_intra_cand
1303                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1304                                 au1_tu_eq_cu_div2_modes[1];
1305                             ps_cu_analyse->s_cu_intra_cand
1306                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1307                         }
1308                         else
1309                         {
1310                             ps_cu_analyse->s_cu_intra_cand
1311                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1312                                 au1_tu_eq_cu_div2_modes[0];
1313                             ps_cu_analyse->s_cu_intra_cand
1314                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1315                                 au1_tu_eq_cu_div2_modes[1];
1316                             ps_cu_analyse->s_cu_intra_cand
1317                                 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1318                         }
1319                     }
1320                 }
1321             }
1322 
1323             /* set the neighbour map to 0 */
1324             ihevce_set_nbr_map(
1325                 ps_ctxt->pu1_ctb_nbr_map,
1326                 ps_ctxt->i4_nbr_map_strd,
1327                 (ps_cu_analyse->b3_cu_pos_x << 1),
1328                 (ps_cu_analyse->b3_cu_pos_y << 1),
1329                 (ps_cu_analyse->u1_cu_size >> 2),
1330                 0);
1331         }
1332 
1333     } /* end of Intra RD OPT cand evaluation */
1334 
1335     ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1336     ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1337     ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1338 
1339     /* --------------------------------------- */
1340     /* --------Final mode Recon ---------- */
1341     /* --------------------------------------- */
1342     {
1343         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1344         void *pv_final_pred = NULL;
1345         WORD32 final_pred_strd = 0;
1346         void *pv_final_pred_chrm = NULL;
1347         WORD32 final_pred_strd_chrm = 0;
1348         WORD32 packed_pred_mode;
1349 
1350 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1351         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1352         {
1353             pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1354         }
1355 #else
1356         pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1357 #endif
1358 
1359         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1360         packed_pred_mode =
1361             ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1362 
1363         if(!ps_ctxt->u1_is_input_data_hbd)
1364         {
1365             if(ps_enc_loop_bestprms->u1_intra_flag)
1366             {
1367                 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1368                 final_pred_strd =
1369                     ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1370             }
1371             else
1372             {
1373                 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1374                 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1375             }
1376 
1377             pv_final_pred_chrm =
1378                 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1379                 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1380                                    (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1381             final_pred_strd_chrm =
1382                 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1383         }
1384 
1385         ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1386 
1387         {
1388             final_mode_process_prms_t s_prms;
1389 
1390             void *pv_cu_luma_recon;
1391             void *pv_cu_chroma_recon;
1392             WORD32 luma_stride, chroma_stride;
1393 
1394             if(!ps_ctxt->u1_is_input_data_hbd)
1395             {
1396 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1397                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1398                 {
1399                     pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1400                     pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1401                     luma_stride = ps_cu_analyse->u1_cu_size;
1402                     chroma_stride = ps_cu_analyse->u1_cu_size;
1403                 }
1404                 else
1405                 {
1406                     /* based on CU position derive the luma pointers */
1407                     pv_cu_luma_recon = pu1_final_recon;
1408 
1409                     /* based on CU position derive the chroma pointers */
1410                     pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1411 
1412                     luma_stride = ps_cu_prms->i4_luma_recon_stride;
1413 
1414                     chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1415                 }
1416 #else
1417                 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1418                 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1419                 luma_stride = ps_cu_analyse->u1_cu_size;
1420                 chroma_stride = ps_cu_analyse->u1_cu_size;
1421 #endif
1422 
1423                 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1424                 s_prms.ps_best_inter_cand = ps_best_inter_cand;
1425                 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1426                 s_prms.packed_pred_mode = packed_pred_mode;
1427                 s_prms.rd_opt_best_idx = rd_opt_best_idx;
1428                 s_prms.pv_src = pu1_curr_src;
1429                 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1430                 s_prms.pv_pred = pv_final_pred;
1431                 s_prms.pred_strd = final_pred_strd;
1432                 s_prms.pv_pred_chrm = pv_final_pred_chrm;
1433                 s_prms.pred_chrm_strd = final_pred_strd_chrm;
1434                 s_prms.pu1_final_ecd_data = pu1_ecd_data;
1435                 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1436                 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1437                 s_prms.pv_luma_recon = pv_cu_luma_recon;
1438                 s_prms.recon_luma_strd = luma_stride;
1439                 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1440                 s_prms.recon_chrma_strd = chroma_stride;
1441                 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1442                 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1443                 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1444                 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1445                 s_prms.u1_will_cabac_state_change = 1;
1446                 s_prms.u1_recompute_sbh_and_rdoq = 0;
1447                 s_prms.u1_is_first_pass = 1;
1448             }
1449 
1450 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1451             s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1452                                         ? ps_cu_prms->u1_is_cu_noisy
1453                                         : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1454 #endif
1455 
1456             ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1457 
1458 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1459             if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1460             {
1461                 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1462                 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1463                     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1464             }
1465 #endif
1466         }
1467     }
1468 
1469     /* --------------------------------------- */
1470     /* --------Populate CU out prms ---------- */
1471     /* --------------------------------------- */
1472     {
1473         enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1474         UWORD8 *pu1_pu_map;
1475         ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1476 
1477         /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1478         /* then it has to be coded as skip CU */
1479         if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1480            (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1481            (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1482         {
1483             ps_enc_loop_bestprms->u1_skip_flag = 1;
1484         }
1485 
1486         /* update number PUs in CU */
1487         ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1488 
1489         /* ---- populate the colocated pu map index --- */
1490         for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1491         {
1492             WORD32 i;
1493             WORD32 vert_ht;
1494             WORD32 horz_wd;
1495 
1496             if(ps_enc_loop_bestprms->u1_intra_flag)
1497             {
1498                 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1499                 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1500                 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1501             }
1502             else
1503             {
1504                 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1505                 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1506             }
1507 
1508             pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1509             pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1510 
1511             for(i = 0; i < vert_ht; i++)
1512             {
1513                 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1514                 pu1_pu_map += num_4x4_in_ctb;
1515             }
1516             /* increment the index */
1517             col_start_pu_idx++;
1518         }
1519         /* ---- copy the colocated PUs to frm pu ----- */
1520         memcpy(
1521             ps_col_pu,
1522             &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1523             ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1524 
1525         /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1526         {
1527             entropy_context_t *ps_entropy_ctxt;
1528 
1529             WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1530 
1531             WORD32 log2_min_cu_qp_delta_size;
1532             UWORD32 block_addr_align;
1533             ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1534 
1535             log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1536             diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1537 
1538             log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1539             block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1540 
1541             ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1542             ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1543             /*Update the Qp value used. It will not have a valid value iff
1544             current CU is (skipped/no_cbf). In that case the Qp needed for
1545             deblocking is calculated from top/left/previous coded CU*/
1546 
1547             ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1548 
1549             if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1550                ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1551             {
1552                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1553             }
1554             else
1555             {
1556                 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1557             }
1558         }
1559 
1560         /* -- at the end of CU set the neighbour map to 1 -- */
1561         ihevce_set_nbr_map(
1562             ps_ctxt->pu1_ctb_nbr_map,
1563             ps_ctxt->i4_nbr_map_strd,
1564             (ps_cu_analyse->b3_cu_pos_x << 1),
1565             (ps_cu_analyse->b3_cu_pos_y << 1),
1566             (ps_cu_analyse->u1_cu_size >> 2),
1567             1);
1568 
1569         /* -- at the end of CU update best cabac rdopt states -- */
1570         /* -- and also set the top row skip flags  ------------- */
1571         ihevce_entropy_update_best_cu_states(
1572             &ps_ctxt->s_rdopt_entropy_ctxt,
1573             ps_cu_analyse->b3_cu_pos_x,
1574             ps_cu_analyse->b3_cu_pos_y,
1575             ps_cu_analyse->u1_cu_size,
1576             0,
1577             rd_opt_best_idx);
1578     }
1579 
1580     /* Store Output struct */
1581 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1582     {
1583         {
1584             memcpy(
1585                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1586                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1587                 sizeof(enc_loop_cu_final_prms_t));
1588         }
1589 
1590         memcpy(
1591             &ps_ctxt->as_cu_recur_nbr[0],
1592             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1593             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1594                 (ps_cu_analyse->u1_cu_size >> 2));
1595 
1596         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1597 
1598         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1599     }
1600 #else
1601     if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1602     {
1603         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1604 
1605         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1606 
1607         if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1608         {
1609             /* Wait till top data is ready          */
1610             /* Currently checking till top right CU */
1611             curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1612 
1613             if(i4_ctb_y_off == 0)
1614             {
1615                 /* No wait for 1st row */
1616                 cu_top_right_offset = -(MAX_CTB_SIZE);
1617                 {
1618                     ihevce_tile_params_t *ps_col_tile_params =
1619                         ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1620                          ps_ctxt->i4_tile_col_idx);
1621 
1622                     /* No wait for 1st row */
1623                     cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1624                 }
1625                 cu_top_right_dep_pos = 0;
1626             }
1627             else
1628             {
1629                 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1630                 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1631             }
1632 
1633             if(0 == ps_cu_analyse->b3_cu_pos_y)
1634             {
1635                 ihevce_dmgr_chk_row_row_sync(
1636                     ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1637                     curr_cu_pos_in_row,
1638                     cu_top_right_offset,
1639                     cu_top_right_dep_pos,
1640                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1641                     ps_ctxt->thrd_id);
1642             }
1643         }
1644     }
1645     else
1646     {
1647         {
1648             memcpy(
1649                 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1650                 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1651                 sizeof(enc_loop_cu_final_prms_t));
1652         }
1653 
1654         memcpy(
1655             &ps_ctxt->as_cu_recur_nbr[0],
1656             &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1657             sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1658                 (ps_cu_analyse->u1_cu_size >> 2));
1659 
1660         ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1661 
1662         ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1663     }
1664 #endif
1665 
1666     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1667         ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1668 
1669     return rd_opt_least_cost;
1670 }
1671 
1672 /*!
1673 ******************************************************************************
1674 * \if Function name : ihevce_enc_loop_process_row \endif
1675 *
1676 * \brief
1677 *    Row level enc_loop pass function
1678 *
1679 * \param[in] pv_ctxt : pointer to enc_loop module
1680 * \param[in] ps_curr_src_bufs  : pointer to input yuv buffer (row buffer)
1681 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1682 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1683 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1684 * \param[out] ps_cu_out : pointer CU output structure (row buffer)
1685 * \param[out] ps_tu_out : pointer TU output structure (row buffer)
1686 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1687 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1688 *
1689 * \return
1690 *    None
1691 *
1692 * Note : Currently the frame level calcualtions done assumes that
1693 *        framewidth of the input /recon are excat multiple of ctbsize
1694 *
1695 * \author
1696 *  Ittiam
1697 *
1698 *****************************************************************************
1699 */
ihevce_enc_loop_process_row(ihevce_enc_loop_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src_bufs,iv_enc_yuv_buf_t * ps_curr_recon_bufs,iv_enc_yuv_buf_src_t * ps_curr_recon_bufs_src,UWORD8 ** ppu1_y_subpel_planes,ctb_analyse_t * ps_ctb_in,ctb_enc_loop_out_t * ps_ctb_out,ipe_l0_ctb_analyse_for_me_t * ps_row_ipe_analyse,cur_ctb_cu_tree_t * ps_row_cu_tree,cu_enc_loop_out_t * ps_row_cu,tu_enc_loop_out_t * ps_row_tu,pu_t * ps_row_pu,pu_col_mv_t * ps_row_col_pu,UWORD16 * pu2_num_pu_map,UWORD8 * pu1_row_pu_map,UWORD8 * pu1_row_ecd_data,UWORD32 * pu4_pu_offsets,frm_ctb_ctxt_t * ps_frm_ctb_prms,WORD32 vert_ctr,recon_pic_buf_t * ps_frm_recon,void * pv_dep_mngr_encloop_dep_me,pad_interp_recon_frm_t * ps_pad_interp_recon,WORD32 i4_pass,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,ihevce_tile_params_t * ps_tile_params)1700 void ihevce_enc_loop_process_row(
1701     ihevce_enc_loop_ctxt_t *ps_ctxt,
1702     iv_enc_yuv_buf_t *ps_curr_src_bufs,
1703     iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1704     iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1705     UWORD8 **ppu1_y_subpel_planes,
1706     ctb_analyse_t *ps_ctb_in,
1707     ctb_enc_loop_out_t *ps_ctb_out,
1708     ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1709     cur_ctb_cu_tree_t *ps_row_cu_tree,
1710     cu_enc_loop_out_t *ps_row_cu,
1711     tu_enc_loop_out_t *ps_row_tu,
1712     pu_t *ps_row_pu,
1713     pu_col_mv_t *ps_row_col_pu,
1714     UWORD16 *pu2_num_pu_map,
1715     UWORD8 *pu1_row_pu_map,
1716     UWORD8 *pu1_row_ecd_data,
1717     UWORD32 *pu4_pu_offsets,
1718     frm_ctb_ctxt_t *ps_frm_ctb_prms,
1719     WORD32 vert_ctr,
1720     recon_pic_buf_t *ps_frm_recon,
1721     void *pv_dep_mngr_encloop_dep_me,
1722     pad_interp_recon_frm_t *ps_pad_interp_recon,
1723     WORD32 i4_pass,
1724     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1725     ihevce_tile_params_t *ps_tile_params)
1726 {
1727     enc_loop_cu_prms_t s_cu_prms;
1728     ctb_enc_loop_out_t *ps_ctb_out_dblk;
1729 
1730     WORD32 ctb_ctr, ctb_start, ctb_end;
1731     WORD32 col_pu_map_idx;
1732     WORD32 num_ctbs_horz_pic;
1733     WORD32 ctb_size;
1734     WORD32 last_ctb_row_flag;
1735     WORD32 last_ctb_col_flag;
1736     WORD32 last_hz_ctb_wd;
1737     WORD32 last_vt_ctb_ht;
1738     void *pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1739     void *pv_dep_mngr_enc_loop_sao = ps_ctxt->pv_dep_mngr_enc_loop_sao;
1740     void *pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1741     WORD32 dblk_offset, dblk_check_dep_pos;
1742     WORD32 sao_offset, sao_check_dep_pos;
1743     WORD32 aux_offset, aux_check_dep_pos;
1744     void *pv_dep_mngr_me_dep_encloop;
1745     ctb_enc_loop_out_t *ps_ctb_out_sao;
1746     /*Structure to store deblocking parameters at CTB-row level*/
1747     deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1748     UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1749 
1750     pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1751     num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1752     ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1753 
1754     /* Store the num_ctb_horz in sao context*/
1755     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1756     ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1757 
1758     /* Set Variables for Dep. Checking and Setting */
1759     aux_check_dep_pos = vert_ctr;
1760     aux_offset = 2; /* Should be there for 0th row also */
1761     if(vert_ctr > 0)
1762     {
1763         dblk_check_dep_pos = vert_ctr - 1;
1764         dblk_offset = 2;
1765     }
1766     else
1767     {
1768         /* First row should run without waiting */
1769         dblk_check_dep_pos = 0;
1770         dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1771     }
1772 
1773     /* Set sao_offset and sao_check_dep_pos */
1774     if(vert_ctr > 1)
1775     {
1776         sao_check_dep_pos = vert_ctr - 2;
1777         sao_offset = 2;
1778     }
1779     else
1780     {
1781         /* First row should run without waiting */
1782         sao_check_dep_pos = 0;
1783         sao_offset = -(ps_tile_params->i4_first_sample_x + 1);
1784     }
1785 
1786     /* check if the current row processed in last CTb row */
1787     last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1788 
1789     /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1790     last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1791 
1792     /* Valid Height (pixels) in the last CTB row (padding cases) */
1793     last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1794                      ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1795     /* reset the states copied flag */
1796     ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1797     ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1798 
1799     /* populate the cu prms which are common for entire ctb row */
1800     s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1801     s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1802     s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1803     s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1804     s_cu_prms.i4_ctb_size = ctb_size;
1805 
1806     ps_ctxt->i4_is_first_cu_qg_coded = 0;
1807 
1808     /* Initialize the number of PUs for the first CTB to 0 */
1809     *pu2_num_pu_map = 0;
1810 
1811     /*Getting the address of BS and Qp arrays and other info*/
1812     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1813     {
1814         WORD32 num_ctbs_horz_tile;
1815         /* Update the pointers which are accessed not by using ctb_ctr
1816         to the tile start here! */
1817         ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1818         ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1819 
1820         ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1821         ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1822         ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1823         pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1824         pu1_row_ecd_data +=
1825             (ps_tile_params->i4_first_ctb_x *
1826              ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1827                                 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1828              MAX_SCAN_COEFFS_BYTES_4x4);
1829 
1830         /* Update the pointers to the tile start */
1831         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1832             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
1833         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1834             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
1835         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1836 
1837         num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1838 
1839         ctb_start = ps_tile_params->i4_first_ctb_x;
1840         ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1841     }
1842     ps_ctb_out_dblk = ps_ctb_out;
1843 
1844     ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1845 
1846     /* --------- Loop over all the CTBs in a row --------------- */
1847     for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1848     {
1849         cu_final_update_prms s_cu_update_prms;
1850 
1851         cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1852         me_ctb_data_t *ps_cu_me_data;
1853         ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1854         cu_enc_loop_out_t *ps_cu_final;
1855         pu_col_mv_t *ps_ctb_col_pu;
1856 
1857         WORD32 cur_ctb_ht, cur_ctb_wd;
1858         WORD32 last_cu_pos_in_ctb;
1859         WORD32 last_cu_size;
1860         WORD32 num_pus_in_ctb;
1861         UWORD8 u1_is_ctb_noisy;
1862         ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1863 
1864         if(ctb_ctr)
1865         {
1866             ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1867         }
1868         /*If Sup pic rc is enabled*/
1869         if(ps_ctxt->i4_sub_pic_level_rc)
1870         {
1871             ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1872         }
1873         /* check if the current row processed in last CTb row */
1874         last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1875         if(1 == last_ctb_col_flag)
1876         {
1877             cur_ctb_wd = last_hz_ctb_wd;
1878         }
1879         else
1880         {
1881             cur_ctb_wd = ctb_size;
1882         }
1883 
1884         /* If it's the last CTB, get the actual ht of CTB */
1885         if(1 == last_ctb_row_flag)
1886         {
1887             cur_ctb_ht = last_vt_ctb_ht;
1888         }
1889         else
1890         {
1891             cur_ctb_ht = ctb_size;
1892         }
1893 
1894         ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1895         ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1896 
1897         /* Wait till reference frame recon is available */
1898 
1899         /* ------------ Wait till current data is ready from ME -------------- */
1900 
1901         /*only for ref instance and Non I pics */
1902         if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1903            ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1904         {
1905             if(ctb_ctr < (num_ctbs_horz_pic))
1906             {
1907                 ihevce_dmgr_chk_row_row_sync(
1908                     pv_dep_mngr_encloop_dep_me,
1909                     ctb_ctr,
1910                     1,
1911                     vert_ctr,
1912                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1913                     ps_ctxt->thrd_id);
1914             }
1915         }
1916 
1917         /* store the cu pointer for current ctb out */
1918         ps_ctb_out->ps_enc_cu = ps_row_cu;
1919         ps_cu_final = ps_row_cu;
1920 
1921         /* Get the base point of CU recursion tree */
1922         if(ISLICE != ps_ctxt->i1_slice_type)
1923         {
1924             ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1925             ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1926         }
1927         else
1928         {
1929             /* Initialize ptr to current CTB */
1930             ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1931         }
1932 
1933         /* Get the ME data pointer for 16x16 block data in ctb */
1934         ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1935         u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1936         s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1937         s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1938 
1939         /* store the ctb level prms in cu prms */
1940         s_cu_prms.i4_ctb_pos = ctb_ctr;
1941 
1942         s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1943         s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1944 
1945         {
1946             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1947             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1948         }
1949 
1950         s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1951 
1952         s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1953 
1954         s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1955 
1956         /* Initialize ptr to current CTB */
1957         ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr;  // * ctb_size;
1958 
1959         /* reset the map idx for current ctb */
1960         col_pu_map_idx = 0;
1961         num_pus_in_ctb = 0;
1962 
1963         /* reset the map buffer to 0*/
1964 
1965         memset(
1966             &ps_ctxt->au1_nbr_ctb_map[0][0],
1967             0,
1968             (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1969 
1970         /* set the CTB neighbour availability flags */
1971         ihevce_set_ctb_nbr(
1972             &ps_ctb_out->s_ctb_nbr_avail_flags,
1973             ps_ctxt->pu1_ctb_nbr_map,
1974             ps_ctxt->i4_nbr_map_strd,
1975             ctb_ctr,
1976             vert_ctr,
1977             ps_frm_ctb_prms);
1978 
1979         /* -------- update the cur CTB offsets for inter prediction-------- */
1980         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1981         ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1982 
1983         /* -------- update the cur CTB offsets for MV prediction-------- */
1984         ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1985         ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1986 
1987         /* -------------- Boundary Strength Initialization ----------- */
1988         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1989         {
1990             ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1991         }
1992 
1993         /* -------- update cur CTB offsets for entropy rdopt context------- */
1994         ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1995 
1996         /* --------- CU Recursion --------------- */
1997 
1998         {
1999 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2000             WORD32 i4_max_tree_depth = 4;
2001 #endif
2002             WORD32 i4_tree_depth = 0;
2003             /* Init no. of CU in CTB to 0*/
2004             ps_ctb_out->u1_num_cus_in_ctb = 0;
2005 
2006 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2007             if(ps_ctxt->i4_bitrate_instance_num == 0)
2008             {
2009                 WORD32 i4_max_tree_depth = 4;
2010                 WORD32 i;
2011                 for(i = 0; i < i4_max_tree_depth; i++)
2012                 {
2013                     COPY_CABAC_STATES(
2014                         &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2015                         &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2016                         IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2017                 }
2018             }
2019 #else
2020             if(ps_ctxt->i4_bitrate_instance_num == 0)
2021             {
2022                 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2023                 {
2024                     WORD32 i4_max_tree_depth = 4;
2025                     WORD32 i;
2026                     for(i = 0; i < i4_max_tree_depth; i++)
2027                     {
2028                         COPY_CABAC_STATES(
2029                             &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2030                             &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2031                             IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2032                     }
2033                 }
2034             }
2035 
2036 #endif
2037             if(ps_ctxt->i4_bitrate_instance_num == 0)
2038             {
2039                 /* FOR I- PIC populate the curr_ctb accordingly */
2040                 if(ISLICE == ps_ctxt->i1_slice_type)
2041                 {
2042                     ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2043                     ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2044 
2045                     ihevce_populate_cu_tree(
2046                         ps_ctb_ipe_analyse,
2047                         ps_cu_tree_analyse,
2048                         0,
2049                         (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2050                         POS_NA,
2051                         POS_NA,
2052                         POS_NA);
2053                 }
2054             }
2055             ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2056             ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2057             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2058 
2059             s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2060             s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2061             s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2062             s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2063             s_cu_update_prms.pps_cu_final = &ps_cu_final;
2064             s_cu_update_prms.pps_row_pu = &ps_row_pu;
2065             s_cu_update_prms.pps_row_tu = &ps_row_tu;
2066             s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2067 
2068             // source satd computation
2069             /* compute the source 8x8 SATD for the current CTB */
2070             /* populate  pui4_source_satd in some structure and pass it inside */
2071             if(ps_ctxt->u1_enable_psyRDOPT)
2072             {
2073                 /* declare local variables */
2074                 WORD32 i;
2075                 WORD32 ctb_size;
2076                 WORD32 num_comp_had_blocks;
2077                 UWORD8 *pu1_l0_block;
2078                 WORD32 block_ht;
2079                 WORD32 block_wd;
2080                 WORD32 ht_offset;
2081                 WORD32 wd_offset;
2082 
2083                 WORD32 num_horz_blocks;
2084                 WORD32 had_block_size;
2085                 WORD32 total_had_block_size;
2086                 WORD16 pi2_residue_had_zscan[64];
2087                 UWORD8 ai1_zeros_buffer[64];
2088 
2089                 WORD32 index_satd;
2090                 WORD32 is_hbd;
2091                 /* initialize the variables */
2092                 block_ht = cur_ctb_ht;
2093                 block_wd = cur_ctb_wd;
2094 
2095                 is_hbd = ps_ctxt->u1_is_input_data_hbd;
2096 
2097                 had_block_size = 8;
2098                 total_had_block_size = had_block_size * had_block_size;
2099 
2100                 for(i = 0; i < total_had_block_size; i++)
2101                 {
2102                     ai1_zeros_buffer[i] = 0;
2103                 }
2104 
2105                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2106                 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2107 
2108                 num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
2109                 ht_offset = -had_block_size;
2110                 wd_offset = -had_block_size;
2111 
2112                 index_satd = 0;
2113                 /*Loop over all 8x8 blocsk in the CTB*/
2114                 for(i = 0; i < num_comp_had_blocks; i++)
2115                 {
2116                     if(i % num_horz_blocks == 0)
2117                     {
2118                         wd_offset = -had_block_size;
2119                         ht_offset += had_block_size;
2120                     }
2121                     wd_offset += had_block_size;
2122 
2123                     if(!is_hbd)
2124                     {
2125                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2126                         pu1_l0_block = s_cu_prms.pu1_luma_src +
2127                                        ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2128 
2129                         ps_ctxt->ai4_source_satd_8x8[index_satd] =
2130 
2131                             ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2132                                 pu1_l0_block,
2133                                 ps_curr_src_bufs->i4_y_strd,
2134                                 ai1_zeros_buffer,
2135                                 had_block_size,
2136                                 pi2_residue_had_zscan,
2137                                 had_block_size);
2138                     }
2139                     index_satd++;
2140                 }
2141             }
2142 
2143             if(ps_ctxt->u1_enable_psyRDOPT)
2144             {
2145                 /* declare local variables */
2146                 WORD32 i;
2147                 WORD32 ctb_size;
2148                 WORD32 num_comp_had_blocks;
2149                 UWORD8 *pu1_l0_block;
2150                 UWORD8 *pu1_l0_block_prev = NULL;
2151                 WORD32 block_ht;
2152                 WORD32 block_wd;
2153                 WORD32 ht_offset;
2154                 WORD32 wd_offset;
2155 
2156                 WORD32 num_horz_blocks;
2157                 WORD32 had_block_size;
2158                 WORD16 pi2_residue_had[64];
2159                 UWORD8 ai1_zeros_buffer[64];
2160                 WORD32 index_satd = 0;
2161 
2162                 WORD32 is_hbd;
2163                 is_hbd = ps_ctxt->u1_is_input_data_hbd;  // 8 bit
2164 
2165                 /* initialize the variables */
2166                 /* change this based ont he bit depth */
2167                 // ps_ctxt->u1_chroma_array_type
2168                 if(ps_ctxt->u1_chroma_array_type == 1)
2169                 {
2170                     block_ht = cur_ctb_ht / 2;
2171                     block_wd = cur_ctb_wd / 2;
2172                 }
2173                 else
2174                 {
2175                     block_ht = cur_ctb_ht;
2176                     block_wd = cur_ctb_wd / 2;
2177                 }
2178 
2179                 had_block_size = 4;
2180                 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2181 
2182                 ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2183                 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2184 
2185                 num_horz_blocks = 2 * block_wd / had_block_size;  //ctb_width / had_block_size;
2186                 ht_offset = -had_block_size;
2187                 wd_offset = -had_block_size;
2188 
2189                 if(!is_hbd)
2190                 {
2191                     /* loop over for every 4x4 blocks in the CU for Cb */
2192                     for(i = 0; i < num_comp_had_blocks; i++)
2193                     {
2194                         if(i % num_horz_blocks == 0)
2195                         {
2196                             wd_offset = -had_block_size;
2197                             ht_offset += had_block_size;
2198                         }
2199                         wd_offset += had_block_size;
2200 
2201                         /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2202                         if(i % 2 != 0)
2203                         {
2204                             if(!is_hbd)
2205                             {
2206                                 pu1_l0_block = pu1_l0_block_prev + 1;
2207                             }
2208                         }
2209                         else
2210                         {
2211                             if(!is_hbd)
2212                             {
2213                                 pu1_l0_block = s_cu_prms.pu1_chrm_src +
2214                                                s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2215                                 pu1_l0_block_prev = pu1_l0_block;
2216                             }
2217                         }
2218 
2219                         if(had_block_size == 4)
2220                         {
2221                             if(!is_hbd)
2222                             {
2223                                 ps_ctxt->ai4_source_chroma_satd[index_satd] =
2224                                     ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2225                                         pu1_l0_block,
2226                                         s_cu_prms.i4_chrm_src_stride,
2227                                         ai1_zeros_buffer,
2228                                         had_block_size,
2229                                         pi2_residue_had,
2230                                         had_block_size);
2231                             }
2232 
2233                             index_satd++;
2234 
2235                         }  // block size of 4x4
2236 
2237                     }  // for all blocks
2238 
2239                 }  // is hbd check
2240             }
2241 
2242             ihevce_cu_recurse_decide(
2243                 ps_ctxt,
2244                 &s_cu_prms,
2245                 ps_cu_tree_analyse,
2246                 ps_cu_tree_analyse,
2247                 ps_ctb_ipe_analyse,
2248                 ps_cu_me_data,
2249                 &ps_ctb_col_pu,
2250                 &s_cu_update_prms,
2251                 pu1_row_pu_map,
2252                 &col_pu_map_idx,
2253                 i4_tree_depth,
2254                 ctb_ctr << 6,
2255                 vert_ctr << 6,
2256                 cur_ctb_ht);
2257 
2258             if(ps_ctxt->i1_slice_type != ISLICE)
2259             {
2260                 ASSERT(
2261                     (cur_ctb_wd * cur_ctb_ht) <=
2262                     ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2263             }
2264             /*If Sup pic rc is enabled*/
2265             if(1 == ps_ctxt->i4_sub_pic_level_rc)
2266             {
2267                 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2268                 ihevce_sub_pic_rc_in_data(
2269                     (void *)ps_multi_thrd_ctxt,
2270                     (void *)ps_ctxt,
2271                     (void *)ps_ctb_ipe_analyse,
2272                     (void *)ps_frm_ctb_prms);
2273             }
2274 
2275             ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2276 
2277         } /* End of CU recursion block */
2278 
2279 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2280         {
2281             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2282             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2283             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2284 
2285             do
2286             {
2287                 ihevce_update_final_cu_results(
2288                     ps_ctxt,
2289                     ps_enc_out_ctxt,
2290                     ps_cu_prms,
2291                     NULL, /* &ps_ctb_col_pu */
2292                     NULL, /* &col_pu_map_idx */
2293                     &s_cu_update_prms,
2294                     ctb_ctr,
2295                     vert_ctr);
2296 
2297                 ps_enc_out_ctxt++;
2298 
2299                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2300 
2301             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2302         }
2303 #else
2304         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2305         {
2306             ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2307             enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2308             ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2309 
2310             do
2311             {
2312                 ihevce_update_final_cu_results(
2313                     ps_ctxt,
2314                     ps_enc_out_ctxt,
2315                     ps_cu_prms,
2316                     NULL, /* &ps_ctb_col_pu */
2317                     NULL, /* &col_pu_map_idx */
2318                     &s_cu_update_prms,
2319                     ctb_ctr,
2320                     vert_ctr);
2321 
2322                 ps_enc_out_ctxt++;
2323 
2324                 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2325 
2326             } while(ps_enc_out_ctxt->u1_cu_size != 128);
2327         }
2328 #endif
2329 
2330         /* --- ctb level copy of data to left buffers--*/
2331         ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2332 
2333         if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2334         {
2335             /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2336             ihevce_bs_clear_invalid(
2337                 &ps_ctxt->s_deblk_bs_prms,
2338                 last_ctb_row_flag,
2339                 (ctb_ctr == (num_ctbs_horz_pic - 1)),
2340                 last_hz_ctb_wd,
2341                 last_vt_ctb_ht);
2342 
2343             /* -----------------Read boundary strengts for current CTB------------- */
2344 
2345             if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2346             {
2347                 /*Storing boundary strengths of current CTB*/
2348                 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2349                 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2350 
2351                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2352                 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2353             }
2354             //Increment for storing next CTB info
2355             s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2356                 (ctb_size >> 3);  //one vertical edge per 8x8 block
2357             s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2358                 (ctb_size >> 3);  //one horizontal edge per 8x8 block
2359         }
2360 
2361         /* -------------- ctb level updates ----------------- */
2362         ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2363 
2364         pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2365 
2366         /* first ctb offset will be populated by the caller */
2367         if(0 != ctb_ctr)
2368         {
2369             pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2370         }
2371         pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2372         ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2373 
2374         ps_ctb_in++;
2375         ps_ctb_out++;
2376     }
2377 
2378     /* ---------- Encloop end of row updates ----------------- */
2379 
2380     /* at the end of row processing cu pixel counter is set to */
2381     /* (num ctb * ctbzise) + ctb size                          */
2382     /* this is to set the dependency for right most cu of last */
2383     /* ctb's top right data dependency                         */
2384     /* this even takes care of entropy dependency for          */
2385     /* incomplete ctb as well                                  */
2386     ihevce_dmgr_set_row_row_sync(
2387         pv_dep_mngr_enc_loop_cu_top_right,
2388         (ctb_ctr * ctb_size + ctb_size),
2389         vert_ctr,
2390         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2391 
2392     ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2393 
2394     /* Restore structure.
2395     Getting the address of stored-BS and Qp-map and other info */
2396     memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2397     {
2398         /* Update the pointers to the tile start */
2399         s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2400             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
2401         s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2402             (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
2403         s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2404     }
2405 
2406 #if PROFILE_ENC_REG_DATA
2407     s_profile.u8_enc_reg_data[vert_ctr] = 0;
2408 #endif
2409 
2410     /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2411     if(!ps_ctxt->u1_is_input_data_hbd)
2412     {
2413         WORD32 last_col_pic, last_col_tile;
2414 
2415         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2416         {
2417             /* store the ctb level prms in cu prms */
2418             s_cu_prms.i4_ctb_pos = ctb_ctr;
2419             s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2420             s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2421 
2422             s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2423             s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2424             s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2425 
2426             s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2427 
2428             s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2429 
2430             /* If last ctb in the horizontal row */
2431             if(ctb_ctr == (num_ctbs_horz_pic - 1))
2432             {
2433                 last_col_pic = 1;
2434             }
2435             else
2436             {
2437                 last_col_pic = 0;
2438             }
2439 
2440             /* If last ctb in the tile row */
2441             if(ctb_ctr == (ctb_end - 1))
2442             {
2443                 last_col_tile = 1;
2444             }
2445             else
2446             {
2447                 last_col_tile = 0;
2448             }
2449 
2450             if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2451             {
2452                 /* for last ctb of a row check top instead of top right */
2453                 if(((ctb_ctr + 1) == ctb_end) && (vert_ctr > 0))
2454                 {
2455                     dblk_offset = 1;
2456                 }
2457                 /* Wait till top neighbour CTB has done it's deblocking*/
2458                 ihevce_dmgr_chk_row_row_sync(
2459                     pv_dep_mngr_enc_loop_dblk,
2460                     ctb_ctr,
2461                     dblk_offset,
2462                     dblk_check_dep_pos,
2463                     ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2464                     ps_ctxt->thrd_id);
2465 
2466                 if((0 == ps_ctxt->i4_deblock_type))
2467                 {
2468                     /* Populate Qp-map */
2469                     if(ctb_start == ctb_ctr)
2470                     {
2471                         ihevce_deblk_populate_qp_map(
2472                             ps_ctxt,
2473                             &s_deblk_ctb_row_params,
2474                             ps_ctb_out_dblk,
2475                             vert_ctr,
2476                             ps_frm_ctb_prms,
2477                             ps_tile_params);
2478                     }
2479                     ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2480 
2481                     /* recon pointers and stride */
2482                     ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2483                     ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2484                     ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2485                     ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2486 
2487                     ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2488                     {
2489                         ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2490                             (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2491                     }
2492                     ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2493                     //or according to slice boundary. Support yet to be added !!!!
2494 
2495                     ihevce_deblk_ctb(
2496                         &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2497 
2498                     //Increment for storing next CTB info
2499                     s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2500                         (ctb_size >> 3);  //one vertical edge per 8x8 block
2501                     s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2502                         (ctb_size >> 3);  //one horizontal edge per 8x8 block
2503                     s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2504                         (ctb_size >> 2);  //one qp per 4x4 block.
2505                 }
2506             }  // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2507 
2508             /* update the number of ctbs deblocked for this row */
2509             ihevce_dmgr_set_row_row_sync(
2510                 pv_dep_mngr_enc_loop_dblk,
2511                 (ctb_ctr + 1),
2512                 vert_ctr,
2513                 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2514 
2515         }  //end of loop over CTBs in current CTB-row
2516 
2517         /* Apply SAO over the previous CTB-row */
2518         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2519         {
2520             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2521                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2522             {
2523                 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2524 
2525                 if(vert_ctr > ps_tile_params->i4_first_ctb_y)
2526                 {
2527                     /*For last ctb check top dep only*/
2528                     if((vert_ctr > 1) && ((ctb_ctr + 1) == ctb_end))
2529                     {
2530                         sao_offset = 1;
2531                     }
2532 
2533                     ihevce_dmgr_chk_row_row_sync(
2534                         pv_dep_mngr_enc_loop_sao,
2535                         ctb_ctr,
2536                         sao_offset,
2537                         sao_check_dep_pos,
2538                         ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2539                         ps_ctxt->thrd_id);
2540 
2541                     /* Call the sao function to do sao for the current ctb*/
2542 
2543                     /* Register the curr ctb's x pos in sao context*/
2544                     ps_sao_ctxt->i4_ctb_x = ctb_ctr;
2545 
2546                     /* Register the curr ctb's y pos in sao context*/
2547                     ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2548 
2549                     ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2550                                      (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2551                     ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2552                     ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2553                     ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2554 
2555                     ps_sao_ctxt->i4_is_last_ctb_row = 0;
2556                     ps_sao_ctxt->i4_is_last_ctb_col = 0;
2557 
2558                     if((ctb_ctr + 1) == ctb_end)
2559                     {
2560                         ps_sao_ctxt->i4_is_last_ctb_col = 1;
2561                         ps_sao_ctxt->i4_sao_blk_wd =
2562                             ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2563                                         ps_tile_params->i4_curr_tile_width);
2564                     }
2565 
2566                     /* Calculate the recon buf pointer and stride for teh current ctb */
2567                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2568                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2569                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2570                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2571 
2572                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2573 
2574                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2575                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2576                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2577                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2578                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2579 
2580                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2581                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2582 
2583                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2584                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2585                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2586                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2587 
2588                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2589 
2590                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2591                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2592                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2593                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2594                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2595 
2596                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2597 
2598                     /* Calculate the pointer to buff to store the (x,y)th sao
2599                     * for the top merge of (x,y+1)th ctb
2600                     */
2601                     ps_sao_ctxt->ps_top_ctb_sao =
2602                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2603                                                          [ps_sao_ctxt->i4_ctb_x +
2604                                                           (ps_sao_ctxt->i4_ctb_y) *
2605                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2606                                                           (ps_ctxt->i4_bitrate_instance_num *
2607                                                            ps_sao_ctxt->i4_num_ctb_units)];
2608 
2609                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2610                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2611                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2612                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2613                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2614                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2615                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2616 
2617                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2618                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2619                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2620                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2621                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2622                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2623                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2624 
2625                     {
2626                         UWORD32 u4_ctb_sao_bits;
2627 
2628                         ihevce_sao_analyse(
2629                             &ps_ctxt->s_sao_ctxt_t,
2630                             ps_ctb_out_sao,
2631                             &u4_ctb_sao_bits,
2632                             ps_tile_params);
2633                         ps_ctxt
2634                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2635                                                      [ps_ctxt->i4_bitrate_instance_num]
2636                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2637                         ps_ctxt
2638                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2639                                                      [ps_ctxt->i4_bitrate_instance_num]
2640                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2641                     }
2642                     /** Subpel generation not done for non-ref picture **/
2643                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2644                     {
2645                         /* Recon Padding */
2646                         ihevce_recon_padding(
2647                             ps_pad_interp_recon,
2648                             ctb_ctr,
2649                             vert_ctr - 1,
2650                             ps_frm_ctb_prms,
2651                             ps_ctxt->ps_func_selector);
2652                     }
2653                     /* update the number of SAO ctbs for this row */
2654                     ihevce_dmgr_set_row_row_sync(
2655                         pv_dep_mngr_enc_loop_sao,
2656                         ctb_ctr + 1,
2657                         vert_ctr - 1,
2658                         ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2659                 }
2660             }
2661             else  //SAO Disabled
2662             {
2663                 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2664                 {
2665                     /* Recon Padding */
2666                     ihevce_recon_padding(
2667                         ps_pad_interp_recon,
2668                         ctb_ctr,
2669                         vert_ctr,
2670                         ps_frm_ctb_prms,
2671                         ps_ctxt->ps_func_selector);
2672                 }
2673             }
2674         }  // end of SAO for loop
2675 
2676         /* Call the sao function again for the last ctb row of frame */
2677         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2678            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2679         {
2680             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2681 
2682             if(vert_ctr ==
2683                (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2684             {
2685                 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2686                 {
2687                     /* Register the curr ctb's x pos in sao context*/
2688                     ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2689 
2690                     /* Register the curr ctb's y pos in sao context*/
2691                     ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2692 
2693                     ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2694                                      vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2695 
2696                     ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2697 
2698                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2699                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2700 
2701                     if((ctb_ctr + 1) == ctb_end)
2702                     {
2703                         ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2704                         ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2705                             ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2706                                         ps_tile_params->i4_curr_tile_width);
2707                     }
2708 
2709                     ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2710                         ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2711                                     ps_tile_params->i4_curr_tile_height);
2712 
2713                     ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2714 
2715                     /* Calculate the recon buf pointer and stride for teh current ctb */
2716                     ps_sao_ctxt->pu1_cur_luma_recon_buf =
2717                         ps_sao_ctxt->pu1_frm_luma_recon_buf +
2718                         (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2719                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2720 
2721                     ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2722 
2723                     ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2724                         ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2725                         (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2726                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2727                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2728 
2729                     ps_sao_ctxt->i4_cur_chroma_recon_stride =
2730                         ps_sao_ctxt->i4_frm_chroma_recon_stride;
2731 
2732                     ps_sao_ctxt->pu1_cur_luma_src_buf =
2733                         ps_sao_ctxt->pu1_frm_luma_src_buf +
2734                         (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2735                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2736 
2737                     ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2738 
2739                     ps_sao_ctxt->pu1_cur_chroma_src_buf =
2740                         ps_sao_ctxt->pu1_frm_chroma_src_buf +
2741                         (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2742                          (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2743                         (ps_sao_ctxt->i4_ctb_x * ctb_size);
2744 
2745                     ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2746 
2747                     /* Calculate the pointer to buff to store the (x,y)th sao
2748                     * for the top merge of (x,y+1)th ctb
2749                     */
2750                     ps_sao_ctxt->ps_top_ctb_sao =
2751                         &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2752                                                          [ps_sao_ctxt->i4_ctb_x +
2753                                                           (ps_sao_ctxt->i4_ctb_y) *
2754                                                               ps_frm_ctb_prms->i4_num_ctbs_horz +
2755                                                           (ps_ctxt->i4_bitrate_instance_num *
2756                                                            ps_sao_ctxt->i4_num_ctb_units)];
2757 
2758                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2759                     ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2760                         ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2761                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2762                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2763                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2764                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2765 
2766                     /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2767                     ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2768                         ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2769                         (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2770                         ps_sao_ctxt->i4_ctb_x * ctb_size +
2771                         ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2772                                                             ps_sao_ctxt->i4_top_chroma_buf_size);
2773 
2774                     {
2775                         UWORD32 u4_ctb_sao_bits;
2776                         ihevce_sao_analyse(
2777                             &ps_ctxt->s_sao_ctxt_t,
2778                             ps_ctb_out_sao,
2779                             &u4_ctb_sao_bits,
2780                             ps_tile_params);
2781                         ps_ctxt
2782                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2783                                                      [ps_ctxt->i4_bitrate_instance_num]
2784                             ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2785                         ps_ctxt
2786                             ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2787                                                      [ps_ctxt->i4_bitrate_instance_num]
2788                             ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2789                     }
2790                     /** Subpel generation not done for non-ref picture **/
2791                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2792                     {
2793                         /* Recon Padding */
2794                         ihevce_recon_padding(
2795                             ps_pad_interp_recon,
2796                             ctb_ctr,
2797                             vert_ctr,
2798                             ps_frm_ctb_prms,
2799                             ps_ctxt->ps_func_selector);
2800                     }
2801                 }
2802             }  //end of loop over CTBs in current CTB-row
2803         }
2804 
2805         /* Subpel Plane Generation*/
2806         for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2807         {
2808             if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2809                ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2810             {
2811                 if(0 != vert_ctr)
2812                 {
2813                     /** Subpel generation not done for non-ref picture **/
2814                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2815                     {
2816                         /* Padding and Subpel Plane Generation */
2817                         ihevce_pad_interp_recon_ctb(
2818                             ps_pad_interp_recon,
2819                             ctb_ctr,
2820                             vert_ctr - 1,
2821                             ps_ctxt->i4_quality_preset,
2822                             ps_frm_ctb_prms,
2823                             ps_ctxt->ai2_scratch,
2824                             ps_ctxt->i4_bitrate_instance_num,
2825                             ps_ctxt->ps_func_selector);
2826                     }
2827                 }
2828             }
2829             else
2830             {  // SAO Disabled
2831                 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2832                 {
2833                     /* Padding and Subpel Plane Generation */
2834                     ihevce_pad_interp_recon_ctb(
2835                         ps_pad_interp_recon,
2836                         ctb_ctr,
2837                         vert_ctr,
2838                         ps_ctxt->i4_quality_preset,
2839                         ps_frm_ctb_prms,
2840                         ps_ctxt->ai2_scratch,
2841                         ps_ctxt->i4_bitrate_instance_num,
2842                         ps_ctxt->ps_func_selector);
2843                 }
2844             }
2845         }
2846 
2847         {
2848             if(!ps_ctxt->i4_bitrate_instance_num)
2849             {
2850                 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2851                    ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2852                 {
2853                     /* If SAO is on, then signal completion of previous CTB row */
2854                     if(0 != vert_ctr)
2855                     {
2856                         {
2857                             WORD32 post_ctb_ctr;
2858 
2859                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2860                             {
2861                                 ihevce_dmgr_map_set_sync(
2862                                     pv_dep_mngr_me_dep_encloop,
2863                                     post_ctb_ctr,
2864                                     (vert_ctr - 1),
2865                                     MAP_CTB_COMPLETE);
2866                             }
2867                         }
2868                     }
2869                 }
2870                 else
2871                 {
2872                     {
2873                         WORD32 post_ctb_ctr;
2874 
2875                         for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2876                         {
2877                             ihevce_dmgr_map_set_sync(
2878                                 pv_dep_mngr_me_dep_encloop,
2879                                 post_ctb_ctr,
2880                                 vert_ctr,
2881                                 MAP_CTB_COMPLETE);
2882                         }
2883                     }
2884                 }
2885             }
2886         }
2887 
2888         /*process last ctb row*/
2889         if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2890            ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2891         {
2892             sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2893 
2894             if(vert_ctr ==
2895                (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2896             {
2897                 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2898                 {
2899                     if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2900                     {
2901                         /* Padding and Subpel Plane Generation */
2902                         ihevce_pad_interp_recon_ctb(
2903                             ps_pad_interp_recon,
2904                             ctb_ctr,
2905                             vert_ctr,
2906                             ps_ctxt->i4_quality_preset,
2907                             ps_frm_ctb_prms,
2908                             ps_ctxt->ai2_scratch,
2909                             ps_ctxt->i4_bitrate_instance_num,
2910                             ps_ctxt->ps_func_selector);
2911                     }
2912                 }
2913             }
2914             /* If SAO is on, then signal completion of the last CTB row of frame */
2915             {
2916                 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
2917                 {
2918                     if(!ps_ctxt->i4_bitrate_instance_num)
2919                     {
2920                         {
2921                             WORD32 post_ctb_ctr;
2922 
2923                             for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2924                             {
2925                                 ihevce_dmgr_map_set_sync(
2926                                     pv_dep_mngr_me_dep_encloop,
2927                                     post_ctb_ctr,
2928                                     vert_ctr,
2929                                     MAP_CTB_COMPLETE);
2930                             }
2931                         }
2932                     }
2933                 }
2934             }
2935         }
2936     }
2937 
2938     return;
2939 }
2940 
2941 /*!
2942 ******************************************************************************
2943 * \if Function name : ihevce_enc_loop_pass \endif
2944 *
2945 * \brief
2946 *    Frame level enc_loop pass function
2947 *
2948 * \param[in] pv_ctxt : pointer to enc_loop module
2949 * \param[in] ps_frm_lamda : Frame level Lambda params
2950 * \param[in] ps_inp  : pointer to input yuv buffer (frame buffer)
2951 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
2952 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
2953 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
2954 * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
2955 * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
2956 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
2957 *
2958 * \return
2959 *    None
2960 *
2961 * Note : Currently the frame level calcualtions done assumes that
2962 *        framewidth of the input /recon are excat multiple of ctbsize
2963 *
2964 * \author
2965 *  Ittiam
2966 *
2967 *****************************************************************************
2968 */
ihevce_enc_loop_process(void * pv_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,ctb_analyse_t * ps_ctb_in,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse,recon_pic_buf_t * ps_frm_recon,cur_ctb_cu_tree_t * ps_cu_tree_out,ctb_enc_loop_out_t * ps_ctb_out,cu_enc_loop_out_t * ps_cu_out,tu_enc_loop_out_t * ps_tu_out,pu_t * ps_pu_out,UWORD8 * pu1_frm_ecd_data,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_pass)2969 void ihevce_enc_loop_process(
2970     void *pv_ctxt,
2971     ihevce_lap_enc_buf_t *ps_curr_inp,
2972     ctb_analyse_t *ps_ctb_in,
2973     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
2974     recon_pic_buf_t *ps_frm_recon,
2975     cur_ctb_cu_tree_t *ps_cu_tree_out,
2976     ctb_enc_loop_out_t *ps_ctb_out,
2977     cu_enc_loop_out_t *ps_cu_out,
2978     tu_enc_loop_out_t *ps_tu_out,
2979     pu_t *ps_pu_out,
2980     UWORD8 *pu1_frm_ecd_data,
2981     frm_ctb_ctxt_t *ps_frm_ctb_prms,
2982     frm_lambda_ctxt_t *ps_frm_lamda,
2983     multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
2984     WORD32 thrd_id,
2985     WORD32 i4_enc_frm_id,
2986     WORD32 i4_pass)
2987 {
2988     WORD32 vert_ctr;
2989     WORD32 tile_col_idx;
2990     iv_enc_yuv_buf_t s_curr_src_bufs;
2991     iv_enc_yuv_buf_t s_curr_recon_bufs;
2992     iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
2993     UWORD32 *pu4_pu_offsets;
2994     WORD32 end_of_frame;
2995     UWORD8 *apu1_y_sub_pel_planes[3];
2996     pad_interp_recon_frm_t s_pad_interp_recon;
2997     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
2998 
2999     ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3000 
3001     WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3002 
3003     /* initialize the closed loop lambda for the current frame */
3004     ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3005     ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3006     ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3007     ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3008     ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3009     ps_ctxt->thrd_id = thrd_id;
3010     ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3011 
3012 #if DISABLE_SAO_WHEN_NOISY
3013     ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3014     ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3015 #endif
3016 
3017 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3018     ps_ctxt->pv_err_func_selector = ps_func_selector;
3019 #endif
3020 
3021     ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3022         ps_frm_recon->i4_deblk_pad_hpel_cur_pic ||
3023         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3024         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag;
3025 
3026     /* Share all reference pictures with nbr clients. This flag will be used only
3027     in case of dist-enc mode */
3028     ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3029     ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3030 
3031     /* Register the frame level ssd lamda for both luma and chroma*/
3032     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3033     ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3034 
3035     ihevce_populate_cl_cu_lambda_prms(
3036         ps_ctxt,
3037         ps_frm_lamda,
3038         (WORD32)ps_ctxt->i1_slice_type,
3039         ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3040         ENC_LOOP_LAMBDA_TYPE);
3041 
3042     ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3043                                      (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3044                                      (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3045 
3046     end_of_frame = 0;
3047 
3048     /* ----------------------------------------------------- */
3049     /* store the stride and dimensions of source and recon   */
3050     /* buffer pointers will be over written at every CTB row */
3051     /* ----------------------------------------------------- */
3052     memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3053 
3054     memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3055 
3056     memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3057 
3058     /* get the frame level pu offset pointer*/
3059     pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3060 
3061     s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3062 
3063     /* ------------ Loop over all the CTB rows --------------- */
3064     while(0 == end_of_frame)
3065     {
3066         UWORD8 *pu1_tmp;
3067         UWORD8 *pu1_row_pu_map;
3068         UWORD8 *pu1_row_ecd_data;
3069         ctb_analyse_t *ps_ctb_row_in;
3070         ctb_enc_loop_out_t *ps_ctb_row_out;
3071         cu_enc_loop_out_t *ps_row_cu;
3072         tu_enc_loop_out_t *ps_row_tu;
3073         pu_t *ps_row_pu;
3074         pu_col_mv_t *ps_row_col_pu;
3075         job_queue_t *ps_job;
3076         UWORD32 *pu4_pu_row_offsets;
3077         UWORD16 *pu2_num_pu_row;
3078 
3079         ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3080         cur_ctb_cu_tree_t *ps_row_cu_tree;
3081         UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3082 
3083         /* Get the current row from the job queue */
3084         ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3085             ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3086 
3087         /* Register the pointer to ctb out of the current frame*/
3088         ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3089 
3090         /* If all rows are done, set the end of process flag to 1, */
3091         /* and the current row to -1 */
3092         if(NULL == ps_job)
3093         {
3094             vert_ctr = -1;
3095             tile_col_idx = -1;
3096             end_of_frame = 1;
3097         }
3098         else
3099         {
3100             ihevce_tile_params_t *ps_col_tile_params_temp;
3101             ihevce_tile_params_t *ps_tile_params;
3102             WORD32 i4_tile_id;
3103 
3104             ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3105             /* set the output dependency */
3106             ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3107 
3108             /* Obtain the current row's details from the job */
3109             vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3110             {
3111                 /* Obtain the current colum tile index from the job */
3112                 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3113 
3114                 /* The tile parameter for the col. idx. Use only the properties
3115                 which is same for all the bottom tiles like width, start_x, etc.
3116                 Don't use height, start_y, etc.                                  */
3117                 ps_col_tile_params_temp =
3118                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3119 
3120                 /* Derive actual tile_id based on vert_ctr */
3121                 i4_tile_id =
3122                     *(ps_frm_ctb_prms->pi4_tile_id_map +
3123                       vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3124                       ps_col_tile_params_temp->i4_first_ctb_x);
3125                 /* Derive pointer to current tile prms */
3126                 ps_tile_params =
3127                     ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3128             }
3129 
3130             ps_ctxt->i4_tile_col_idx = tile_col_idx;
3131             /* derive the current ctb row pointers */
3132 
3133             /* luma src */
3134             pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3135                       (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3136                        ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3137                       ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3138 
3139             pu1_tmp +=
3140                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3141                  ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3142 
3143             s_curr_src_bufs.pv_y_buf = pu1_tmp;
3144 
3145             if(!ps_ctxt->u1_is_input_data_hbd)
3146             {
3147                 /* cb src */
3148                 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3149                 pu1_tmp +=
3150                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3151                      ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3152 
3153                 s_curr_src_bufs.pv_u_buf = pu1_tmp;
3154             }
3155 
3156             /* luma recon */
3157             pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3158             pu1_tmp +=
3159                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3160 
3161             s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3162             s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3163             s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3164             if(!ps_ctxt->u1_is_input_data_hbd)
3165             {
3166                 /* cb recon */
3167                 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3168                 pu1_tmp +=
3169                     (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3170                      ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3171 
3172                 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3173                 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3174                 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3175 
3176                 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3177 
3178                 /* Register the source buffer pointers in sao context*/
3179                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3180                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3181                     (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3182                      ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3183                     ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3184 
3185                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3186                     ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3187 
3188                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3189                     (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3190 
3191                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3192                     ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3193             }
3194 
3195             /* Subpel planes hxfy, fxhy, hxhy*/
3196             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3197             pu1_tmp +=
3198                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3199             apu1_y_sub_pel_planes[0] = pu1_tmp;
3200             s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3201 
3202             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3203             pu1_tmp +=
3204                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3205             apu1_y_sub_pel_planes[1] = pu1_tmp;
3206             s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3207 
3208             pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3209             pu1_tmp +=
3210                 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3211             apu1_y_sub_pel_planes[2] = pu1_tmp;
3212             s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3213 
3214             /* row level coeffs buffer */
3215             pu1_row_ecd_data =
3216                 pu1_frm_ecd_data +
3217                 (vert_ctr *
3218                  ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3219                                     : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3220                  MAX_SCAN_COEFFS_BYTES_4x4);
3221 
3222             /* Row level CU buffer */
3223             ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3224 
3225             /* Row level TU buffer */
3226             ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3227 
3228             /* Row level PU buffer */
3229             ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3230 
3231             /* Row level colocated PU buffer */
3232             /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3233             ps_row_col_pu =
3234                 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3235                                                ps_frm_ctb_prms->i4_num_pus_in_ctb);
3236             /* Row level col PU map buffer */
3237             /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3238             pu1_row_pu_map =
3239                 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3240                                                 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3241             /* row ctb in pointer  */
3242             ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3243 
3244             /* row ctb out pointer  */
3245             ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3246 
3247             /* row number of PUs map pointer */
3248             pu2_num_pu_row =
3249                 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3250 
3251             /* row pu offsets pointer  */
3252             pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3253             /* store the first CTB pu offset pointer */
3254             *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3255             /* Initialize ptr to current IPE row */
3256             ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3257 
3258             /* Initialize ptr to current row */
3259             ps_row_cu_tree = ps_cu_tree_out +
3260                              (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3261 
3262             /* Get the EncLoop Top-Right CU Dep Mngr */
3263             ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3264                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3265                                                                    [i4_bitrate_instance_num];
3266             /* Get the EncLoop Deblock Dep Mngr */
3267             ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3268                 ps_master_ctxt
3269                     ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3270             /* Get the EncLoop Sao Dep Mngr */
3271             ps_ctxt->pv_dep_mngr_enc_loop_sao =
3272                 ps_master_ctxt
3273                     ->aapv_dep_mngr_enc_loop_sao[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3274 
3275             ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3276 
3277             {
3278                 /* derive the pointers of top row buffers */
3279                 ps_ctxt->pv_top_row_luma =
3280                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3281                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3282                     (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3283 
3284                 ps_ctxt->pv_top_row_chroma =
3285                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3286                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3287                     (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3288 
3289                 /* derive the pointers of bottom row buffers to update current row data */
3290                 ps_ctxt->pv_bot_row_luma =
3291                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3292                     (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3293                     (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3294 
3295                 ps_ctxt->pv_bot_row_chroma =
3296                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3297                     (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3298                     (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3299 
3300                 /* Register the buffer pointers in sao context*/
3301                 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3302                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3303                 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3304                     ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3305 
3306                 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3307                     (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3308                 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3309                     ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3310 
3311                 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3312 
3313                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3314                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3315 
3316                 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3317                     ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3318             }
3319 
3320             ps_ctxt->ps_top_row_nbr =
3321                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3322                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3323                 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3324 
3325             ps_ctxt->ps_bot_row_nbr =
3326                 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3327                 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3328                 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3329 
3330             if(vert_ctr > 0)
3331             {
3332                 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3333             }
3334             else
3335             {
3336                 ps_ctxt->pu1_top_rt_cabac_state = NULL;
3337             }
3338 
3339             ASSERT(
3340                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3341                     .ps_pps->i1_sign_data_hiding_flag ==
3342                 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3343                     .ps_pps->i1_sign_data_hiding_flag);
3344 
3345             /* call the row level processing function */
3346             ihevce_enc_loop_process_row(
3347                 ps_ctxt,
3348                 &s_curr_src_bufs,
3349                 &s_curr_recon_bufs,
3350                 &s_curr_recon_bufs_src,
3351                 &apu1_y_sub_pel_planes[0],
3352                 ps_ctb_row_in,
3353                 ps_ctb_row_out,
3354                 ps_row_ipe_analyse,
3355                 ps_row_cu_tree,
3356                 ps_row_cu,
3357                 ps_row_tu,
3358                 ps_row_pu,
3359                 ps_row_col_pu,
3360                 pu2_num_pu_row,
3361                 pu1_row_pu_map,
3362                 pu1_row_ecd_data,
3363                 pu4_pu_row_offsets,
3364                 ps_frm_ctb_prms,
3365                 vert_ctr,
3366                 ps_frm_recon,
3367                 ps_ctxt->pv_dep_mngr_encloop_dep_me,
3368                 &s_pad_interp_recon,
3369                 i4_pass,
3370                 ps_multi_thrd_ctxt,
3371                 ps_tile_params);
3372         }
3373     }
3374 }
3375 
3376 /*!
3377 ******************************************************************************
3378 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3379 *
3380 * \brief Returns to the caller key attributes relevant for dependency manager,
3381 *        ie, the number of vertical units in l0 layer
3382 *
3383 * \par Description:
3384 *
3385 * \param[in] pai4_ht    : ht
3386 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3387 *                                         for deblocking
3388 *
3389 * \return
3390 *    None
3391 *
3392 * \author
3393 *  Ittiam
3394 *
3395 *****************************************************************************
3396 */
ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht,WORD32 * pi4_num_vert_units_in_lyr)3397 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3398 {
3399     /* Blk ht at a given layer*/
3400     WORD32 unit_ht_c;
3401     WORD32 ctb_size = 64;
3402 
3403     /* compute blk ht and unit ht */
3404     unit_ht_c = ctb_size;
3405 
3406     /* set the numebr of vertical units */
3407     *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3408 }
3409 
3410 /*!
3411 ******************************************************************************
3412 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3413 *
3414 * \brief
3415 *    Number of memory records are returned for enc_loop module
3416 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3417 *
3418 * \return
3419 *    None
3420 *
3421 * \author
3422 *  Ittiam
3423 *
3424 *****************************************************************************
3425 */
3426 WORD32
ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel)3427     ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3428 {
3429     WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3430     WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3431         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3432     WORD32 enc_loop_sao_dep_mngr_mem_recs =
3433         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3434     WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3435         i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3436     WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3437         i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3438 
3439     return (
3440         (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + enc_loop_sao_dep_mngr_mem_recs +
3441          enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3442 }
3443 /*!
3444 ******************************************************************************
3445 * \if Function name : ihevce_enc_loop_get_mem_recs \endif
3446 *
3447 * \brief
3448 *    Memory requirements are returned for ENC_LOOP.
3449 *
3450 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
3451 * \param[in] ps_init_prms : Create time static parameters
3452 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
3453 * \param[in] i4_mem_space : memspace in whihc memory request should be done
3454 *
3455 * \return
3456 *    None
3457 *
3458 * \author
3459 *  Ittiam
3460 *
3461 *****************************************************************************
3462 */
ihevce_enc_loop_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel,WORD32 i4_mem_space,WORD32 i4_resolution_id)3463 WORD32 ihevce_enc_loop_get_mem_recs(
3464     iv_mem_rec_t *ps_mem_tab,
3465     ihevce_static_cfg_params_t *ps_init_prms,
3466     WORD32 i4_num_proc_thrds,
3467     WORD32 i4_num_bitrate_inst,
3468     WORD32 i4_num_enc_loop_frm_pllel,
3469     WORD32 i4_mem_space,
3470     WORD32 i4_resolution_id)
3471 {
3472     UWORD32 u4_width, u4_height, n_tabs;
3473     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3474     WORD32 ctr;
3475     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3476 
3477     /* derive frame dimensions */
3478     /*width of the input YUV to be encoded */
3479     u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3480     /*making the width a multiple of CTB size*/
3481     u4_width += SET_CTB_ALIGN(
3482         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3483 
3484     /*height of the input YUV to be encoded */
3485     u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3486     /*making the height a multiple of CTB size*/
3487     u4_height += SET_CTB_ALIGN(
3488         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3489     u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3490     u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3491     /* memories should be requested assuming worst case requirememnts */
3492 
3493     /* Module context structure */
3494     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3495 
3496     ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3497 
3498     ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3499 
3500     /* Thread context structure */
3501     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3502         i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3503 
3504     ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3505 
3506     ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3507 
3508     /* Scale matrices */
3509     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3510 
3511     ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3512 
3513     ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3514 
3515     /* Rescale matrices */
3516     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3517 
3518     ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3519 
3520     ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3521 
3522     /* top row luma one row of pixel data per CTB row */
3523     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3524     {
3525         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3526                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3527                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3528     }
3529     else
3530     {
3531         ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3532                                                     (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3533                                                     i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3534     }
3535 
3536     ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3537 
3538     ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3539 
3540     /* top row chroma */
3541     if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3542     {
3543         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3544             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3545             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3546     }
3547     else
3548     {
3549         ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3550             (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3551             i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3552     }
3553 
3554     ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3555 
3556     ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3557 
3558     /* top row neighbour 4x4 */
3559     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3560         (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3561         i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3562 
3563     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3564 
3565     ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3566 
3567     /* memory to dump rate control parameters by each thread for each bit-rate instance */
3568     /* RC params collated by each thread for each bit-rate instance separately */
3569     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3570                                                  i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3571 
3572     ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3573 
3574     ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3575     /* Memory required for deblocking */
3576     {
3577         /* Memory to store Qp of top4x4 blocks for each CTB row.
3578         This memory is allocated at frame level and shared across
3579         all cores. The Qp values are needed to form Qp-map(described
3580         in the ENC_LOOP_DEBLOCKING section below)*/
3581 
3582         UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3583         UWORD32 u4_size_top_4x4_qp_memory;
3584 
3585         /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3586         /*Space required per CTB*/
3587         u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3588         /*Space required for entire CTB row*/
3589         u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3590         /*Space required for entire frame*/
3591         u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3592         /*Space required for multiple bitrate*/
3593         u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3594         /*Space required for multiple frames in parallel*/
3595         u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3596 
3597         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3598         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3599         ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3600 
3601         /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3602         ## Boundary Strength(Vertical):
3603         BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3604         of the row followed by 8 entries of second CTB and so on.
3605         8 entries: Includes left edge of current CTB and excludes right edge.
3606         ## Boundary Strength(Horizontal):
3607         Same as Vertical.
3608         8 entries:  Includes top edge of current CTB and excludes bottom edge.
3609 
3610         ## Qp-map storage:
3611         T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3612         00 01 02 03 04 05 ..........to the end of the CTB row
3613         10 11 12 13 14 15 ..........to the end of the CTB row
3614         20 21 22 23 24 25 ..........to the end of the CTB row
3615         30 31 32 33 34 35 ..........to the end of the CTB row
3616         40 41 42 43 44 45 ..........to the end of the CTB row
3617         ............................to the end of the CTB row
3618         upto height_of_CTB..........to the end of the CTB row
3619 
3620         Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3621         A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3622         where,
3623         => height_of_CTB = number of 4x4 blocks in a CTB  vertically,
3624         => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3625         in order to deblock top edge of current CTB.
3626         => width_of_CTB  = number of 4x4 blocks in a CTB  horizontally,
3627         */
3628 
3629         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3630         /*1 vertical edge per 8 pixel*/
3631         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3632         /*Vertical edges for entire width of CTB row*/
3633         u4_size_bs_memory *= u4_ctb_in_a_row;
3634         /*Each vertical edge of CTB row is 4 bytes*/
3635         u4_size_bs_memory = u4_size_bs_memory << 2;
3636         /*Adding Memory required for storing horizontal BS by doubling*/
3637         u4_size_bs_memory = u4_size_bs_memory << 1;
3638 
3639         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3640         /*Number of 4x4 blocks in the width of a CTB*/
3641         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3642         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3643         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3644         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3645         /*Storage for entire CTB row*/
3646         u4_size_qp_memory *= u4_ctb_in_a_row;
3647 
3648         /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3649         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3650             i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3651 
3652         ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3653 
3654         ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3655     }
3656 
3657     /* Memory required to store pred for 422 chroma */
3658     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3659         i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3660         (i4_chroma_format == IV_YUV_422SP_UV) *
3661         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3662 
3663     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3664 
3665     ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3666 
3667     /* Memory for inter pred buffers */
3668     {
3669         WORD32 i4_num_bufs_per_thread = 0;
3670 
3671         WORD32 i4_buf_size_per_cand =
3672             (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3673             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3674         WORD32 i4_quality_preset =
3675             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3676         switch(i4_quality_preset)
3677         {
3678         case IHEVCE_QUALITY_P0:
3679         {
3680             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3681             break;
3682         }
3683         case IHEVCE_QUALITY_P2:
3684         {
3685             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3686             break;
3687         }
3688         case IHEVCE_QUALITY_P3:
3689         {
3690             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3691             break;
3692         }
3693         case IHEVCE_QUALITY_P4:
3694         {
3695             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3696             break;
3697         }
3698         case IHEVCE_QUALITY_P5:
3699         case IHEVCE_QUALITY_P6:
3700         case IHEVCE_QUALITY_P7:
3701         {
3702             i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3703             break;
3704         }
3705         default:
3706         {
3707             ASSERT(0);
3708         }
3709         }
3710 
3711         i4_num_bufs_per_thread += 4;
3712 
3713         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3714             i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3715 
3716         ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3717 
3718         ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3719     }
3720 
3721     /* Memory required to store chroma intra pred */
3722     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3723         i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3724         ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3725         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3726 
3727     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3728 
3729     ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3730 
3731     /* Memory required to store pred for reference substitution output */
3732     /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3733        allocate 16 bytes to the left and 7 bytes to the right to facilitate
3734        SIMD access */
3735     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3736         i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3737         + INTRAPRED_SIMD_LEFT_PADDING)*
3738         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3739 
3740     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3741 
3742     ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3743 
3744     /* Memory required to store pred for reference filtering output */
3745     /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3746        allocate 16 bytes to the left and 7 bytes to the right to facilitate
3747        SIMD access */
3748     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3749         i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3750         + INTRAPRED_SIMD_LEFT_PADDING)*
3751         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3752 
3753     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3754 
3755     ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3756 
3757 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3758     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3759 #endif
3760     {
3761         /* Memory assignments for recon storage during CU Recursion */
3762         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3763             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3764             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3765 
3766         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3767 
3768         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3769 
3770         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3771             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3772             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3773             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3774 
3775         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3776 
3777         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3778     }
3779 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3780     else
3781     {
3782         /* Memory assignments for recon storage during CU Recursion */
3783         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3784 
3785         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3786 
3787         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3788 
3789         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3790 
3791         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3792 
3793         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3794     }
3795 #endif
3796 
3797 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3798     if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3799 #endif
3800     {
3801         /* Memory assignments for pred storage during CU Recursion */
3802         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3803             i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3804             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3805 
3806         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3807 
3808         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3809 
3810         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3811             i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3812             ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3813             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3814 
3815         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3816 
3817         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3818     }
3819 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3820     else
3821     {
3822         /* Memory assignments for pred storage during CU Recursion */
3823         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3824 
3825         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3826 
3827         ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3828 
3829         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3830 
3831         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3832 
3833         ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3834     }
3835 #endif
3836 
3837     /* Memory assignments for CTB left luma data storage */
3838     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3839         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3840         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3841 
3842     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3843 
3844     ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3845 
3846     /* Memory assignments for CTB left chroma data storage */
3847     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3848         i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3849         ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3850     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3851         ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3852 
3853     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3854 
3855     ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3856 
3857     /* Memory required for SAO */
3858     {
3859         WORD32 num_vert_units;
3860         WORD32 num_horz_units;
3861         WORD32 ctb_aligned_ht, ctb_aligned_wd;
3862         WORD32 luma_buf, chroma_buf;
3863 
3864         num_vert_units = u4_height / MAX_CTB_SIZE;
3865         num_horz_units = u4_width / MAX_CTB_SIZE;
3866 
3867         ctb_aligned_ht = u4_height;
3868         ctb_aligned_wd = u4_width;
3869 
3870         /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3871         * and 1 extra location is required for top left buf ptr for row 0
3872         * Also 1 extra byte is required for every row for top left pixel if
3873         * the top left ptr is to be passed to leaf level unconditionally
3874         */
3875         luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3876                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3877         chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3878                      ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3879 
3880         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3881             (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3882 
3883         /* Add the memory required to store the sao information of top ctb for top merge
3884         * This is frame level buffer.
3885         */
3886         ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
3887             ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
3888             (i4_num_enc_loop_frm_pllel);
3889 
3890         ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3891 
3892         ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
3893     }
3894 
3895     /* Memory for CU level Coeff data buffer */
3896     {
3897         /* 16 additional bytes are required to ensure alignment */
3898         {
3899             ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
3900                 i4_num_proc_thrds *
3901                 (((MAX_LUMA_COEFFS_CTB +
3902                    (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
3903                   16) *
3904                  (2) * sizeof(UWORD8));
3905         }
3906 
3907         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3908 
3909         ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
3910 
3911         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
3912             i4_num_proc_thrds *
3913             (MAX_LUMA_COEFFS_CTB +
3914              (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
3915             sizeof(UWORD8);
3916 
3917         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3918 
3919         ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
3920     }
3921 
3922     /* Memory for CU dequant data buffer */
3923     {
3924         /* 16 additional bytes are required to ensure alignment */
3925         {
3926             ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
3927                 i4_num_proc_thrds *
3928                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
3929                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
3930                  8) *
3931                 (2) * sizeof(WORD16);
3932         }
3933 
3934         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3935 
3936         ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
3937     }
3938 
3939     /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
3940     {
3941         WORD32 i4_memSize_perThread;
3942 
3943         WORD32 i4_chroma_memSize_perThread = 0;
3944         /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3945         /* used in RDOPT to store cur and best modes' data */
3946         WORD32 i4_luma_memSize_perThread =
3947             4 * MAX_CU_SIZE * MAX_CU_SIZE *
3948             ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3949 
3950         /* 'Glossary' for comments in the following codeBlock */
3951         /* 1 - 2 Bufs for storing recons of the best modes determined in the */
3952         /* function 'ihevce_intra_chroma_pred_mode_selector' */
3953         /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3954         /* used in RDOPT to store cur and best modes' data */
3955         if(i4_chroma_format == IV_YUV_422SP_UV)
3956         {
3957             WORD32 i4_quality_preset =
3958                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3959             switch(i4_quality_preset)
3960             {
3961             case IHEVCE_QUALITY_P0:
3962             {
3963                 /* 1 */
3964                 i4_chroma_memSize_perThread +=
3965                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
3966                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3967 
3968                 /* 2 */
3969                 i4_chroma_memSize_perThread +=
3970                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
3971                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3972 
3973                 break;
3974             }
3975             case IHEVCE_QUALITY_P2:
3976             {
3977                 /* 1 */
3978                 i4_chroma_memSize_perThread +=
3979                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
3980                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3981 
3982                 /* 2 */
3983                 i4_chroma_memSize_perThread +=
3984                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
3985                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3986 
3987                 break;
3988             }
3989             case IHEVCE_QUALITY_P3:
3990             {
3991                 /* 1 */
3992                 i4_chroma_memSize_perThread +=
3993                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
3994                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3995 
3996                 /* 2 */
3997                 i4_chroma_memSize_perThread +=
3998                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
3999                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4000 
4001                 break;
4002             }
4003             case IHEVCE_QUALITY_P4:
4004             {
4005                 /* 1 */
4006                 i4_chroma_memSize_perThread +=
4007                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4008                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4009 
4010                 /* 2 */
4011                 i4_chroma_memSize_perThread +=
4012                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4013                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4014 
4015                 break;
4016             }
4017             case IHEVCE_QUALITY_P5:
4018             {
4019                 /* 1 */
4020                 i4_chroma_memSize_perThread +=
4021                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4022                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4023 
4024                 /* 2 */
4025                 i4_chroma_memSize_perThread +=
4026                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4027                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4028 
4029                 break;
4030             }
4031             case IHEVCE_QUALITY_P6:
4032             case IHEVCE_QUALITY_P7:
4033             {
4034                 /* 1 */
4035                 i4_chroma_memSize_perThread +=
4036                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4037                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4038 
4039                 /* 2 */
4040                 i4_chroma_memSize_perThread +=
4041                     2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4042                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4043 
4044                 break;
4045             }
4046             }
4047         }
4048         else
4049         {
4050             WORD32 i4_quality_preset =
4051                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4052             switch(i4_quality_preset)
4053             {
4054             case IHEVCE_QUALITY_P0:
4055             {
4056                 /* 1 */
4057                 i4_chroma_memSize_perThread +=
4058                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4059                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4060 
4061                 /* 2 */
4062                 i4_chroma_memSize_perThread +=
4063                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4064                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4065                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4066 
4067                 break;
4068             }
4069             case IHEVCE_QUALITY_P2:
4070             {
4071                 /* 1 */
4072                 i4_chroma_memSize_perThread +=
4073                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4074                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4075 
4076                 /* 2 */
4077                 i4_chroma_memSize_perThread +=
4078                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4079                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4080                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4081 
4082                 break;
4083             }
4084             case IHEVCE_QUALITY_P3:
4085             {
4086                 /* 1 */
4087                 i4_chroma_memSize_perThread +=
4088                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4089                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4090 
4091                 /* 2 */
4092                 i4_chroma_memSize_perThread +=
4093                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4094                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4095                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4096 
4097                 break;
4098             }
4099             case IHEVCE_QUALITY_P4:
4100             {
4101                 /* 1 */
4102                 i4_chroma_memSize_perThread +=
4103                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4104                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4105 
4106                 /* 2 */
4107                 i4_chroma_memSize_perThread +=
4108                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4109                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4110                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4111 
4112                 break;
4113             }
4114             case IHEVCE_QUALITY_P5:
4115             {
4116                 /* 1 */
4117                 i4_chroma_memSize_perThread +=
4118                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4119                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4120 
4121                 /* 2 */
4122                 i4_chroma_memSize_perThread +=
4123                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4124                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4125                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4126 
4127                 break;
4128             }
4129             case IHEVCE_QUALITY_P6:
4130             case IHEVCE_QUALITY_P7:
4131             {
4132                 /* 1 */
4133                 i4_chroma_memSize_perThread +=
4134                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4135                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4136 
4137                 /* 2 */
4138                 i4_chroma_memSize_perThread +=
4139                     2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4140                     ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4141                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4142 
4143                 break;
4144             }
4145             }
4146         }
4147 
4148         i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4149 
4150         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4151             i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4152 
4153         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4154 
4155         ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4156     }
4157 
4158     n_tabs = NUM_ENC_LOOP_MEM_RECS;
4159 
4160     /*************************************************************************/
4161     /* --- EncLoop Deblock and SAO sync Dep Mngr Mem requests --                     */
4162     /*************************************************************************/
4163 
4164     /* Fill the memtabs for  EncLoop Deblock Dep Mngr */
4165     {
4166         WORD32 count;
4167         WORD32 num_vert_units;
4168         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4169 
4170         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4171         ASSERT(num_vert_units > 0);
4172         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4173         {
4174             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4175             {
4176                 n_tabs += ihevce_dmgr_get_mem_recs(
4177                     &ps_mem_tab[n_tabs],
4178                     DEP_MNGR_ROW_ROW_SYNC,
4179                     num_vert_units,
4180                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4181                     i4_num_proc_thrds,
4182                     i4_mem_space);
4183             }
4184         }
4185 
4186         /* Fill the memtabs for  EncLoop SAO Dep Mngr */
4187         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4188         {
4189             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4190             {
4191                 n_tabs += ihevce_dmgr_get_mem_recs(
4192                     &ps_mem_tab[n_tabs],
4193                     DEP_MNGR_ROW_ROW_SYNC,
4194                     num_vert_units,
4195                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4196                     i4_num_proc_thrds,
4197                     i4_mem_space);
4198             }
4199         }
4200     }
4201 
4202     /*************************************************************************/
4203     /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests --                */
4204     /*************************************************************************/
4205 
4206     /* Fill the memtabs for  Top-Right CU sync Dep Mngr */
4207     {
4208         WORD32 count;
4209         WORD32 num_vert_units;
4210         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4211         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4212         ASSERT(num_vert_units > 0);
4213 
4214         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4215         {
4216             for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4217             {
4218                 n_tabs += ihevce_dmgr_get_mem_recs(
4219                     &ps_mem_tab[n_tabs],
4220                     DEP_MNGR_ROW_ROW_SYNC,
4221                     num_vert_units,
4222                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4223                     i4_num_proc_thrds,
4224                     i4_mem_space);
4225             }
4226         }
4227     }
4228 
4229     /*************************************************************************/
4230     /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests --        */
4231     /*************************************************************************/
4232 
4233     /* Fill the memtabs for  EncLoop Aux. on Ref. bitrate Dep Mngr */
4234     {
4235         WORD32 count;
4236         WORD32 num_vert_units;
4237         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4238 
4239         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4240         ASSERT(num_vert_units > 0);
4241 
4242         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4243         {
4244             for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4245             {
4246                 n_tabs += ihevce_dmgr_get_mem_recs(
4247                     &ps_mem_tab[n_tabs],
4248                     DEP_MNGR_ROW_ROW_SYNC,
4249                     num_vert_units,
4250                     ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4251                     i4_num_proc_thrds,
4252                     i4_mem_space);
4253             }
4254         }
4255     }
4256 
4257     return (n_tabs);
4258 }
4259 
4260 /*!
4261 ******************************************************************************
4262 * \if Function name : ihevce_enc_loop_init \endif
4263 *
4264 * \brief
4265 *    Intialization for ENC_LOOP context state structure .
4266 *
4267 * \param[in] ps_mem_tab : pointer to memory descriptors table
4268 * \param[in] ps_init_prms : Create time static parameters
4269 * \param[in] pv_osal_handle : Osal handle
4270 *
4271 * \return
4272 *    None
4273 *
4274 * \author
4275 *  Ittiam
4276 *
4277 *****************************************************************************
4278 */
ihevce_enc_loop_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,func_selector_t * ps_func_selector,rc_quant_t * ps_rc_quant_ctxt,ihevce_tile_params_t * ps_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_enc_loop_frm_pllel,UWORD8 u1_is_popcnt_available)4279 void *ihevce_enc_loop_init(
4280     iv_mem_rec_t *ps_mem_tab,
4281     ihevce_static_cfg_params_t *ps_init_prms,
4282     WORD32 i4_num_proc_thrds,
4283     void *pv_osal_handle,
4284     func_selector_t *ps_func_selector,
4285     rc_quant_t *ps_rc_quant_ctxt,
4286     ihevce_tile_params_t *ps_tile_params_base,
4287     WORD32 i4_resolution_id,
4288     WORD32 i4_num_enc_loop_frm_pllel,
4289     UWORD8 u1_is_popcnt_available)
4290 {
4291     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4292     ihevce_enc_loop_ctxt_t *ps_ctxt;
4293     WORD32 ctr, n_tabs;
4294     UWORD32 u4_width, u4_height;
4295     UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4296     UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4297     UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4298     WORD32 i;
4299     WORD32 i4_num_bitrate_inst =
4300         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4301     enc_loop_rc_params_t *ps_enc_loop_rc_params;
4302     UWORD8 *pu1_sao_base; /* store the base address of sao*/
4303     UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4304     WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4305     WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4306     WORD32 i4_enc_frm_id;
4307     WORD32 num_cu_in_ctb;
4308     WORD32 i4_num_tile_cols = 1;  //Default value is 1
4309 
4310     /* ENC_LOOP state structure */
4311     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4312 
4313     ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4314 
4315     ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4316     ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4317     ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4318     /*Calculation of memory sizes for deblocking*/
4319     {
4320         /*width of the input YUV to be encoded. */
4321         u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4322         /*making the width a multiple of CTB size*/
4323         u4_width += SET_CTB_ALIGN(
4324             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4325 
4326         u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4327 
4328         /*height of the input YUV to be encoded */
4329         u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4330         /*making the height a multiple of CTB size*/
4331         u4_height += SET_CTB_ALIGN(
4332             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4333 
4334         u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4335 
4336         /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4337         /*1 vertical edge per 8 pixel*/
4338         u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4339         /*Vertical edges for entire width of CTB row*/
4340         u4_size_bs_memory *= u4_ctb_in_a_row;
4341         /*Each vertical edge of CTB row is 4 bytes*/
4342         u4_size_bs_memory = u4_size_bs_memory << 2;
4343         /*Adding Memory required for storing horizontal BS by doubling*/
4344         u4_size_bs_memory = u4_size_bs_memory << 1;
4345 
4346         /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4347         /*Number of 4x4 blocks in the width of a CTB*/
4348         u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4349         /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4350         4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4351         u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4352         /*Storage for entire CTB row*/
4353         u4_size_qp_memory *= u4_ctb_in_a_row;
4354 
4355         pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4356     }
4357 
4358     /*Derive the base pointer of sao*/
4359     pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4360     ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4361     u4_ctb_aligned_wd = u4_width;
4362     u4_ctb_aligned_ht = u4_height;
4363     num_vert_units = (u4_height) / ctb_size;
4364 
4365     for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4366     {
4367         ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4368         /* Store Tile params base into EncLoop context */
4369         ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4370         ihevce_cmn_utils_instr_set_router(
4371             &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4372         ihevce_sifter_sad_fxn_assigner(
4373             (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4374         ps_ctxt->i4_max_search_range_horizontal =
4375             ps_init_prms->s_config_prms.i4_max_search_range_horz;
4376         ps_ctxt->i4_max_search_range_vertical =
4377             ps_init_prms->s_config_prms.i4_max_search_range_vert;
4378 
4379         ps_ctxt->i4_quality_preset =
4380             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4381 
4382         if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4383         {
4384             ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4385         }
4386 
4387         ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4388 
4389         ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4390 
4391         ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4392 
4393         ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4394 
4395         ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4396 
4397         ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4398 
4399         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4400         {
4401             ps_ctxt->i4_use_ctb_level_lamda = 0;
4402         }
4403         else
4404         {
4405             ps_ctxt->i4_use_ctb_level_lamda = 0;
4406         }
4407 
4408         /** Register the function selector pointer*/
4409         ps_ctxt->ps_func_selector = ps_func_selector;
4410 
4411         ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4412 
4413         /* Initiallization for non-distributed mode */
4414         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4415         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4416         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4417         ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4418 
4419         ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4420         ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4421 
4422         ps_ctxt->i4_frm_top_row_luma_size =
4423             ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4424 
4425         ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4426 
4427         ps_ctxt->i4_frm_top_row_chroma_size =
4428             ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4429 
4430         {
4431             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4432             {
4433                 /* +1 is to provision top left pel */
4434                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4435                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4436                     (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4437 
4438                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4439                 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4440                     (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4441                     ps_ctxt->i4_top_row_luma_stride;
4442 
4443                 /* +2 is to provision top left pel */
4444                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4445                     (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4446                     (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4447 
4448                 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4449                 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4450                     (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4451                     ps_ctxt->i4_top_row_chroma_stride;
4452             }
4453         }
4454 
4455         /* +1 is to provision top left nbr */
4456         ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4457         ps_ctxt->i4_frm_top_row_nbr_size =
4458             ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4459         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4460         {
4461             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4462                 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4463                 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4464             ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4465         }
4466 
4467         num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4468         num_cu_in_ctb *= num_cu_in_ctb;
4469 
4470         /* pointer incremented by 1 row to avoid OOB access in 0th row */
4471 
4472         /* Memory for CU level Coeff data buffer */
4473         {
4474             WORD32 i4_16byte_boundary_overshoot;
4475             WORD32 buf_size_per_cu;
4476             WORD32 buf_size_per_thread_wo_alignment_req;
4477             WORD32 buf_size_per_thread;
4478 
4479             buf_size_per_cu =
4480                 ((MAX_LUMA_COEFFS_CTB +
4481                   (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4482                  16) *
4483                 sizeof(UWORD8);
4484             buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4485 
4486             {
4487                 buf_size_per_thread = buf_size_per_cu * (2);
4488 
4489                 for(i = 0; i < 2; i++)
4490                 {
4491                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4492                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4493                         (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4494 
4495                     i4_16byte_boundary_overshoot =
4496                         ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4497 
4498                     ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4499                 }
4500             }
4501 
4502             ps_ctxt->pu1_cu_recur_coeffs =
4503                 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4504                 (ctr * buf_size_per_thread_wo_alignment_req);
4505         }
4506 
4507         /* Memory for CU dequant data buffer */
4508         {
4509             WORD32 buf_size_per_thread;
4510             WORD32 i4_16byte_boundary_overshoot;
4511 
4512             WORD32 buf_size_per_cu =
4513                 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4514                                                         : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4515                  8) *
4516                 sizeof(WORD16);
4517 
4518             {
4519                 buf_size_per_thread = buf_size_per_cu * 2;
4520 
4521                 for(i = 0; i < 2; i++)
4522                 {
4523                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4524                         (WORD16
4525                              *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4526 
4527                     i4_16byte_boundary_overshoot =
4528                         ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4529 
4530                     ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4531                         (WORD16
4532                              *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4533                 }
4534             }
4535         }
4536 
4537         /*------ Deblocking memory's pointers assignements starts ------*/
4538 
4539         /*Assign stride = 4x4 blocks in horizontal edge*/
4540         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4541 
4542         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4543             ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4544 
4545         /*Assign frame level memory to store the Qp of
4546         top 4x4 neighbours of each CTB row*/
4547         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4548         {
4549             ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4550                 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4551                 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4552                  i4_enc_frm_id);
4553         }
4554 
4555         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4556 
4557         ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4558             (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4559 
4560         ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4561 
4562         /*Assign stride = 4x4 blocks in horizontal edge*/
4563         ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4564 
4565         pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4566 
4567         /*------Deblocking memory's pointers assignements ends ------*/
4568 
4569         /*------SAO memory's pointer assignment starts------------*/
4570         if(!is_hbd_mode)
4571         {
4572             /* 2 is added to allocate top left pixel */
4573             ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4574                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4575             ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4576                 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4577             ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4578                 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4579 
4580             for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4581             {
4582                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4583                     pu1_sao_base +
4584                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4585                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4586                      i4_num_bitrate_inst * i4_enc_frm_id) +  // move to the next frame_id
4587                     u4_ctb_aligned_wd +
4588                     2;
4589 
4590                 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4591                     pu1_sao_base +
4592                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4593                       ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4594                      i4_num_bitrate_inst * i4_enc_frm_id) +
4595                     +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4596                     u4_ctb_aligned_wd + 4;
4597 
4598                 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4599                     ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4600                     *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4601                     (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4602             }
4603             ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4604                 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4605             ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4606         }
4607 
4608         /*------SAO memory's pointer assignment ends------------*/
4609 
4610         /* perform all one time initialisation here */
4611         ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4612 
4613         ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4614 
4615         ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4616 
4617         /* move the pointer to 1,2 location */
4618         ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4619         ps_ctxt->pu1_ctb_nbr_map++;
4620 
4621         ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4622 
4623         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4624 
4625         CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4626 
4627         CREATE_SUBBLOCK2CSBFID_MAP(
4628             gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4629 
4630         CREATE_SUBBLOCK2CSBFID_MAP(
4631             gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4632 
4633         /* For both instance initialise the chroma dequant start idx */
4634         ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4635         ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4636 
4637         /* initialise all the function pointer tables */
4638         {
4639             ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4640                 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4641 
4642             ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4643 
4644 #if ENABLE_RDO_BASED_TU_RECURSION
4645             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4646             {
4647                 ps_ctxt->pv_inter_rdopt_cu_ntu =
4648                     (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4649             }
4650 #endif
4651             ps_ctxt->pv_intra_chroma_pred_mode_selector =
4652                 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4653             ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4654             ps_ctxt->pv_final_rdopt_mode_prcs =
4655                 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4656             ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4657             ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4658             ps_ctxt->pv_enc_loop_ctb_left_copy =
4659                 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4660 
4661             /* Memory assignments for chroma intra pred buffer */
4662             {
4663                 WORD32 pred_buf_size =
4664                     MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4665                 WORD32 pred_buf_size_per_thread =
4666                     NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4667                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4668                                    (ctr * pred_buf_size_per_thread);
4669 
4670                 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4671                 {
4672                     ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4673                     pu1_base += pred_buf_size;
4674                 }
4675             }
4676 
4677             /* Memory assignments for reference substitution output */
4678             {
4679                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4680                                        + INTRAPRED_SIMD_LEFT_PADDING);
4681                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4682                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4683                                    (ctr * pred_buf_size_per_thread);
4684 
4685                 ps_ctxt->pv_ref_sub_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4686             }
4687 
4688             /* Memory assignments for reference filtering output */
4689             {
4690                 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4691                                        + INTRAPRED_SIMD_LEFT_PADDING);
4692                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4693                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4694                                    (ctr * pred_buf_size_per_thread);
4695 
4696                 ps_ctxt->pv_ref_filt_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4697             }
4698 
4699             /* Memory assignments for recon storage during CU Recursion */
4700 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4701             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4702 #endif
4703             {
4704                 {
4705                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4706                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4707                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4708                                        (ctr * pred_buf_size_per_thread);
4709 
4710                     ps_ctxt->pv_cu_luma_recon = pu1_base;
4711                 }
4712 
4713                 {
4714                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4715                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4716                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4717                     UWORD8 *pu1_base =
4718                         (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4719                         (ctr * pred_buf_size_per_thread);
4720 
4721                     ps_ctxt->pv_cu_chrma_recon = pu1_base;
4722                 }
4723             }
4724 
4725             /* Memory assignments for pred storage during CU Recursion */
4726 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4727             if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4728 #endif
4729             {
4730                 {
4731                     WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4732                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4733                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4734                                        (ctr * pred_buf_size_per_thread);
4735 
4736                     ps_ctxt->pv_CTB_pred_luma = pu1_base;
4737                 }
4738 
4739                 {
4740                     WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4741                                            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4742                     WORD32 pred_buf_size_per_thread = pred_buf_size;
4743                     UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4744                                        (ctr * pred_buf_size_per_thread);
4745 
4746                     ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4747                 }
4748             }
4749 
4750             /* Memory assignments for CTB left luma data storage */
4751             {
4752                 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4753                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4754                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4755                                    (ctr * pred_buf_size_per_thread);
4756 
4757                 ps_ctxt->pv_left_luma_data = pu1_base;
4758             }
4759 
4760             /* Memory assignments for CTB left chroma data storage */
4761             {
4762                 WORD32 pred_buf_size =
4763                     (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4764                 WORD32 pred_buf_size_per_thread = pred_buf_size;
4765                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4766                                    (ctr * pred_buf_size_per_thread);
4767 
4768                 ps_ctxt->pv_left_chrm_data = pu1_base;
4769             }
4770         }
4771 
4772         /* Memory for inter pred buffers */
4773         {
4774             WORD32 i4_num_bufs_per_thread;
4775 
4776             WORD32 i4_buf_size_per_cand =
4777                 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4778                 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4779 
4780             i4_num_bufs_per_thread =
4781                 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4782                 i4_buf_size_per_cand;
4783 
4784             ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4785 
4786             ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4787 
4788             {
4789                 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4790                                    +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4791 
4792                 for(i = 0; i < i4_num_bufs_per_thread; i++)
4793                 {
4794                     ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4795                         pu1_base + i * i4_buf_size_per_cand;
4796                     ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4797                 }
4798             }
4799         }
4800 
4801         /* Memory required to store pred for 422 chroma */
4802         if(i4_chroma_format == IV_YUV_422SP_UV)
4803         {
4804             WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4805             WORD32 pred_buf_size_per_thread =
4806                 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4807                 sizeof(UWORD8);
4808             void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4809                             (ctr * pred_buf_size_per_thread);
4810 
4811             ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4812         }
4813         else
4814         {
4815             ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4816         }
4817 
4818         /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4819         {
4820             WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4821             WORD32 i4_chromaBufSize =
4822                 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4823             WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4824                                           (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4825             WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4826             {
4827                 UWORD8 *pu1_mem_base =
4828                     (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4829                      ctr * i4_memSize_perThread);
4830 
4831                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4832                     pu1_mem_base + i4_lumaBufSize * 0;
4833                 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4834                     pu1_mem_base + i4_lumaBufSize * 1;
4835                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4836                     pu1_mem_base + i4_lumaBufSize * 2;
4837                 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4838                     pu1_mem_base + i4_lumaBufSize * 3;
4839 
4840                 pu1_mem_base += i4_lumaBufSize * 4;
4841 
4842                 switch(i4_quality_preset)
4843                 {
4844                 case IHEVCE_QUALITY_P0:
4845                 {
4846 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4847                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4848                         pu1_mem_base + i4_chromaBufSize * 0;
4849                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4850                         pu1_mem_base + i4_chromaBufSize * 1;
4851 #else
4852                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4853                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4854 #endif
4855 
4856 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4857                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4858                         pu1_mem_base + i4_chromaBufSize * 2;
4859                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4860                         pu1_mem_base + i4_chromaBufSize * 3;
4861                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4862                         pu1_mem_base + i4_chromaBufSize * 2;
4863                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4864                         pu1_mem_base + i4_chromaBufSize * 3;
4865 #else
4866                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4867                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4868                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4869                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4870 #endif
4871 
4872                     break;
4873                 }
4874                 case IHEVCE_QUALITY_P2:
4875                 {
4876 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4877                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4878                         pu1_mem_base + i4_chromaBufSize * 0;
4879                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4880                         pu1_mem_base + i4_chromaBufSize * 1;
4881 #else
4882                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4883                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4884 #endif
4885 
4886 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4887                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4888                         pu1_mem_base + i4_chromaBufSize * 2;
4889                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4890                         pu1_mem_base + i4_chromaBufSize * 3;
4891                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4892                         pu1_mem_base + i4_chromaBufSize * 2;
4893                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4894                         pu1_mem_base + i4_chromaBufSize * 3;
4895 #else
4896                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4897                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4898                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4899                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4900 #endif
4901 
4902                     break;
4903                 }
4904                 case IHEVCE_QUALITY_P3:
4905                 {
4906 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
4907                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4908                         pu1_mem_base + i4_chromaBufSize * 0;
4909                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4910                         pu1_mem_base + i4_chromaBufSize * 1;
4911 #else
4912                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4913                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4914 #endif
4915 
4916 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
4917                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4918                         pu1_mem_base + i4_chromaBufSize * 2;
4919                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4920                         pu1_mem_base + i4_chromaBufSize * 3;
4921                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4922                         pu1_mem_base + i4_chromaBufSize * 2;
4923                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4924                         pu1_mem_base + i4_chromaBufSize * 3;
4925 #else
4926                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4927                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4928                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4929                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4930 #endif
4931 
4932                     break;
4933                 }
4934                 case IHEVCE_QUALITY_P4:
4935                 {
4936 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
4937                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4938                         pu1_mem_base + i4_chromaBufSize * 0;
4939                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4940                         pu1_mem_base + i4_chromaBufSize * 1;
4941 #else
4942                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4943                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4944 #endif
4945 
4946 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
4947                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4948                         pu1_mem_base + i4_chromaBufSize * 2;
4949                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4950                         pu1_mem_base + i4_chromaBufSize * 3;
4951                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4952                         pu1_mem_base + i4_chromaBufSize * 2;
4953                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4954                         pu1_mem_base + i4_chromaBufSize * 3;
4955 #else
4956                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4957                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4958                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4959                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4960 #endif
4961 
4962                     break;
4963                 }
4964                 case IHEVCE_QUALITY_P5:
4965                 {
4966 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
4967                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4968                         pu1_mem_base + i4_chromaBufSize * 0;
4969                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4970                         pu1_mem_base + i4_chromaBufSize * 1;
4971 #else
4972                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4973                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4974 #endif
4975 
4976 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
4977                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4978                         pu1_mem_base + i4_chromaBufSize * 2;
4979                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4980                         pu1_mem_base + i4_chromaBufSize * 3;
4981                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4982                         pu1_mem_base + i4_chromaBufSize * 2;
4983                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4984                         pu1_mem_base + i4_chromaBufSize * 3;
4985 #else
4986                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4987                     ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4988                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4989                     ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4990 #endif
4991 
4992                     break;
4993                 }
4994                 }
4995             }
4996 
4997             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4998             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4999             ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5000             ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5001 
5002         } /* Recon Datastore */
5003 
5004         /****************************************************/
5005         /****************************************************/
5006         /* ps_pps->i1_sign_data_hiding_flag  == UNHIDDEN    */
5007         /* when NO_SBH. else HIDDEN                         */
5008         /****************************************************/
5009         /****************************************************/
5010         /* Zero cbf tool is enabled by default for all presets */
5011         ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5012 
5013         if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5014         {
5015             ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5016             ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5017             ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5018             ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5019         }
5020         else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5021         {
5022             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5023             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5024             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5025             ps_ctxt->i4_sbh_level = NO_SBH;
5026         }
5027         else
5028         {
5029             ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5030             ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5031             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5032             ps_ctxt->i4_sbh_level = NO_SBH;
5033         }
5034 
5035 #if DISABLE_QUANT_ROUNDING
5036         ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5037         ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5038 #endif
5039         /*Disabling RDOQ only when spatial modulation is enabled
5040                 as RDOQ degrades visual quality*/
5041         if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5042         {
5043             ps_ctxt->i4_rdoq_level = NO_RDOQ;
5044         }
5045 
5046 #if DISABLE_RDOQ
5047         ps_ctxt->i4_rdoq_level = NO_RDOQ;
5048 #endif
5049 
5050 #if DISABLE_SBH
5051         ps_ctxt->i4_sbh_level = NO_SBH;
5052 #endif
5053 
5054         /*Rounding factor calc based on previous cabac states */
5055 
5056         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5057         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5058         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5059         ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5060 
5061         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5062         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5063         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5064         ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5065 
5066         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5067         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5068         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5069 
5070         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5071         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5072         ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5073 
5074         /****************************************************************************************/
5075         /* Setting the perform rdoq and sbh flags appropriately                                 */
5076         /****************************************************************************************/
5077         {
5078             /******************************************/
5079             /* For best cand rdoq and/or sbh          */
5080             /******************************************/
5081             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5082                 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5083             /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5084             we would have to do RDOQ again.*/
5085             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5086                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5087                 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5088                  (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5089 
5090             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5091                 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5092 
5093             /* SBH should be performed if
5094             a) i4_sbh_level is BEST_CAND_SBH.
5095             b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5096             if SBH has to be done because for these presets the quant, iquant and scan coeff
5097             data are calculated in this function and not during the RDOPT stage*/
5098 
5099             /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5100             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5101                 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5102                 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5103                  (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5104 
5105             /******************************************/
5106             /* For all cand rdoq and/or sbh          */
5107             /******************************************/
5108             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5109                 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5110             ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5111                 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5112             ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5113                 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5114         }
5115 
5116         if(!is_hbd_mode)
5117         {
5118             if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5119             {
5120                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5121                 {
5122                     ps_ctxt->apf_quant_iquant_ssd[0] =
5123                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5124                     ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5125                 }
5126                 else
5127                 {
5128                     ps_ctxt->apf_quant_iquant_ssd[0] =
5129                         ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5130                     ps_ctxt->apf_quant_iquant_ssd[2] =
5131                         ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5132                 }
5133 
5134                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5135                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5136                 {
5137                     ps_ctxt->apf_quant_iquant_ssd[1] =
5138                         ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5139                     ps_ctxt->apf_quant_iquant_ssd[3] =
5140                         ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5141                 }
5142                 else
5143                 {
5144                     ps_ctxt->apf_quant_iquant_ssd[1] =
5145                         ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5146                     ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5147                 }
5148             }
5149             else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5150             {
5151                 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5152                 {
5153                     ps_ctxt->apf_quant_iquant_ssd[0] =
5154                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5155                     ps_ctxt->apf_quant_iquant_ssd[2] =
5156                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5157                 }
5158                 else
5159                 {
5160                     ps_ctxt->apf_quant_iquant_ssd[0] =
5161                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5162                     ps_ctxt->apf_quant_iquant_ssd[2] =
5163                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5164                 }
5165 
5166                 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5167                 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5168                 {
5169                     ps_ctxt->apf_quant_iquant_ssd[1] =
5170                         ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5171                     ps_ctxt->apf_quant_iquant_ssd[3] =
5172                         ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5173                 }
5174                 else
5175                 {
5176                     ps_ctxt->apf_quant_iquant_ssd[1] =
5177                         ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5178                     ps_ctxt->apf_quant_iquant_ssd[3] =
5179                         ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5180                 }
5181             }
5182 
5183             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5184                 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5185             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5186                 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5187             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5188                 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5189             ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5190                 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5191 
5192             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5193                 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5194             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5195                 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5196             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5197                 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5198             ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5199                 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5200 
5201             ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5202             ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5203             ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5204             ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5205             ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5206 
5207             ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5208             ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5209             ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5210 
5211             ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5212             ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5213             ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5214             ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5215             ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5216 
5217             ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5218             ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5219             ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5220 
5221             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5222                 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5223             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5224             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5225                 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5226             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5227                 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5228             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5229                 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5230             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5231                 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5232             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5233                 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5234             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5235                 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5236             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5237             ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5238                 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5239 
5240             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5241                 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5242             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5243                 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5244             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5245                 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5246             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5247                 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5248             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5249                 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5250             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5251                 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5252             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5253                 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5254             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5255                 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5256             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5257                 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5258             ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5259                 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5260 
5261             ps_ctxt->apf_chrm_resd_trns_had[0] =
5262                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5263             ps_ctxt->apf_chrm_resd_trns_had[1] =
5264                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5265             ps_ctxt->apf_chrm_resd_trns_had[2] =
5266                 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5267         }
5268 
5269         if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5270         {
5271             /* initialise the scale & rescale matricies */
5272             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5273             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5274             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5275             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5276             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5277             /*init for inter matrix*/
5278             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5279             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5280             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5281             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5282             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5283 
5284             /*init for rescale matrix*/
5285             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5286             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5287             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5288             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5289             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5290             /*init for rescale inter matrix*/
5291             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5292             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5293             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5294             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5295             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5296         }
5297         else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5298         {
5299             /* initialise the scale & rescale matricies */
5300             ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5301             ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5302             ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5303             ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5304             ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5305             /*init for inter matrix*/
5306             ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5307             ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5308             ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5309             ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5310             ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5311 
5312             /*init for rescale matrix*/
5313             ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5314             ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5315             ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5316             ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5317             ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5318             /*init for rescale inter matrix*/
5319             ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5320             ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5321             ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5322             ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5323             ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5324         }
5325         else
5326         {
5327             ASSERT(0);
5328         }
5329 
5330         /* Not recomputing Luma pred-data and header data for any preset now */
5331         ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5332         ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5333         ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5334 
5335         switch(ps_ctxt->i4_quality_preset)
5336         {
5337         case IHEVCE_QUALITY_P0:
5338         {
5339             ps_ctxt->i4_max_merge_candidates = 5;
5340             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5341             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5342             ps_ctxt->u1_use_early_cbf_data = 0;
5343             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5344             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5345                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5346 
5347             break;
5348         }
5349         case IHEVCE_QUALITY_P2:
5350         {
5351             ps_ctxt->i4_max_merge_candidates = 5;
5352             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5353             ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5354             ps_ctxt->u1_use_early_cbf_data = 0;
5355 
5356             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5357             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5358                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5359 
5360             break;
5361         }
5362         case IHEVCE_QUALITY_P3:
5363         {
5364             ps_ctxt->i4_max_merge_candidates = 3;
5365             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5366             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5367 
5368             ps_ctxt->u1_use_early_cbf_data = 0;
5369             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5370             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5371                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5372 
5373             break;
5374         }
5375         case IHEVCE_QUALITY_P4:
5376         {
5377             ps_ctxt->i4_max_merge_candidates = 2;
5378             ps_ctxt->i4_use_satd_for_merge_eval = 1;
5379             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5380             ps_ctxt->u1_use_early_cbf_data = 0;
5381             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5382             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5383                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5384 
5385             break;
5386         }
5387         case IHEVCE_QUALITY_P5:
5388         {
5389             ps_ctxt->i4_max_merge_candidates = 2;
5390             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5391             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5392             ps_ctxt->u1_use_early_cbf_data = 0;
5393             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5394             ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5395                 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5396 
5397             break;
5398         }
5399         case IHEVCE_QUALITY_P6:
5400         {
5401             ps_ctxt->i4_max_merge_candidates = 2;
5402             ps_ctxt->i4_use_satd_for_merge_eval = 0;
5403             ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5404             ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5405             break;
5406         }
5407         default:
5408         {
5409             ASSERT(0);
5410         }
5411         }
5412 
5413 #if DISABLE_SKIP_AND_MERGE_EVAL
5414         ps_ctxt->i4_max_merge_candidates = 0;
5415 #endif
5416 
5417         ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5418             !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5419 
5420         /*initialize memory for RC related parameters required/populated by enc_loop */
5421         /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5422         |-------|-> Thread 0, instance 0
5423         |       |
5424         |       |
5425         |       |
5426         |-------|-> thread 0, instance 1
5427         |       |
5428         |       |
5429         |       |
5430         |-------|-> thread 0, intance 2
5431         |       |
5432         |       |
5433         |       |
5434         |-------|-> thread 1, instance 0
5435         |       |
5436         |       |
5437         |       |
5438         |-------|-> thread 1, instance 1
5439         |       |
5440         |       |
5441         |       |
5442         |-------|-> thread 1, instance 2
5443         ...         ...
5444 
5445         Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5446         Finally, one thread will become master and collate the data from all the threads */
5447         for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5448         {
5449             for(i = 0; i < i4_num_bitrate_inst; i++)
5450             {
5451                 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5452                 ps_enc_loop_rc_params++;
5453             }
5454         }
5455         /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5456 
5457 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5458         ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5459 #endif
5460 
5461         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5462             MAX_TU_SIZE;
5463         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5464             MAX_TU_SIZE;
5465         /*Multiplying by two to account for interleaving of cb and cr*/
5466         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5467                                                                                        << 1;
5468         ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5469             MAX_TU_SIZE << 1;
5470 
5471         /*     Memory for a frame level memory to store tile-id                  */
5472         /*              corresponding to each CTB of frame                       */
5473         ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5474 
5475         ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5476         /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5477         /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5478         if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5479            (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5480         {
5481             UWORD32 psy_strength;
5482             UWORD32 psy_strength_mask =
5483                 224;  // only bits 5,6,7 are ones. These three bits represent the psy strength
5484             psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5485             ps_ctxt->u1_enable_psyRDOPT = 1;
5486             ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5487             if(psy_strength == 0)
5488             {
5489                 ps_ctxt->u1_enable_psyRDOPT = 0;
5490                 ps_ctxt->u4_psy_strength = 0;
5491             }
5492         }
5493 
5494         ps_ctxt->u1_is_stasino_enabled =
5495             ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5496               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5497              (ps_init_prms->s_coding_tools_prms.i4_vqet &
5498               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5499 
5500         ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5501         ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5502         ps_ctxt++;
5503     }
5504     /* Store Tile params base into EncLoop Master context */
5505     ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5506 
5507     if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5508     {
5509         i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5510     }
5511 
5512     /* Updating  ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5513     /* Loop over all tile-cols in frame */
5514     for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5515     {
5516         WORD32 i4_tile_col_wd_in_ctb_unit =
5517             (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5518         WORD32 offset_x;
5519 
5520         if(ctr == (i4_num_tile_cols - 1))
5521         { /* Last tile-row of frame */
5522             WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5523 
5524             WORD32 cu_aligned_pic_wd =
5525                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5526                 SET_CTB_ALIGN(
5527                     ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5528                     min_cu_size);
5529 
5530             WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5531 
5532             offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5533             offset_x += last_hz_ctb_wd;
5534         }
5535         else
5536         { /* Not the last tile-row of frame */
5537             offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5538         }
5539 
5540         offset_x /= 4;
5541         offset_x -= 1;
5542 
5543         ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5544     }
5545 
5546     n_tabs = NUM_ENC_LOOP_MEM_RECS;
5547 
5548     /*store num bit-rate instances in the master context */
5549     ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5550     ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5551     /*************************************************************************/
5552     /* --- EncLoop Deblock and SAO sync Dep Mngr Mem init --                         */
5553     /*************************************************************************/
5554     {
5555         WORD32 count;
5556         WORD32 num_vert_units, num_blks_in_row;
5557         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5558         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5559 
5560         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5561         ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5562         ASSERT(num_vert_units > 0);
5563         ASSERT(num_blks_in_row > 0);
5564 
5565         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5566         {
5567             for(i = 0; i < i4_num_bitrate_inst; i++)
5568             {
5569                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5570                     &ps_mem_tab[n_tabs],
5571                     pv_osal_handle,
5572                     DEP_MNGR_ROW_ROW_SYNC,
5573                     num_vert_units,
5574                     num_blks_in_row,
5575                     i4_num_tile_cols, /* Number of Col Tiles */
5576                     i4_num_proc_thrds,
5577                     0 /*Sem Disabled*/
5578                 );
5579 
5580                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5581             }
5582         }
5583 
5584         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5585         {
5586             for(i = 0; i < i4_num_bitrate_inst; i++)
5587             {
5588                 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[count][i] = ihevce_dmgr_init(
5589                     &ps_mem_tab[n_tabs],
5590                     pv_osal_handle,
5591                     DEP_MNGR_ROW_ROW_SYNC,
5592                     num_vert_units,
5593                     num_blks_in_row,
5594                     i4_num_tile_cols, /* Number of Col Tiles */
5595                     i4_num_proc_thrds,
5596                     0 /*Sem Disabled*/
5597                 );
5598 
5599                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5600             }
5601         }
5602     }
5603     /*************************************************************************/
5604     /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init --                   */
5605     /*************************************************************************/
5606     {
5607         WORD32 count;
5608         WORD32 num_vert_units, num_blks_in_row;
5609         WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5610         WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5611 
5612         WORD32 i4_sem = 0;
5613 
5614         if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5615            IHEVCE_QUALITY_P4)
5616             i4_sem = 0;
5617         else
5618             i4_sem = 1;
5619         ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5620         /* For Top-Right CU sync, adding one more CTB since value updation */
5621         /* happens in that way for the last CTB in the row                 */
5622         num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5623         num_blks_in_row += MAX_CTB_SIZE;
5624 
5625         ASSERT(num_vert_units > 0);
5626         ASSERT(num_blks_in_row > 0);
5627 
5628         for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5629         {
5630             for(i = 0; i < i4_num_bitrate_inst; i++)
5631             {
5632                 /* For ES/HS, CU level updates uses spin-locks than semaphore */
5633                 {
5634                     ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5635                         ihevce_dmgr_init(
5636                             &ps_mem_tab[n_tabs],
5637                             pv_osal_handle,
5638                             DEP_MNGR_ROW_ROW_SYNC,
5639                             num_vert_units,
5640                             num_blks_in_row,
5641                             i4_num_tile_cols, /* Number of Col Tiles */
5642                             i4_num_proc_thrds,
5643                             i4_sem /*Sem Disabled*/
5644                         );
5645                 }
5646                 n_tabs += ihevce_dmgr_get_num_mem_recs();
5647             }
5648         }
5649     }
5650 
5651     for(i = 1; i < 5; i++)
5652     {
5653         WORD32 i4_log2_trans_size = i + 1;
5654         WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5655 
5656         ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5657     }
5658 
5659     ga_trans_shift[0] = ga_trans_shift[1];
5660 
5661     /* return the handle to caller */
5662     return ((void *)ps_master_ctxt);
5663 }
5664 
5665 /*!
5666 ******************************************************************************
5667 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5668 *
5669 * \brief
5670 *    Intialization for ENC_LOOP context state structure .
5671 *
5672 * \param[in] ps_mem_tab : pointer to memory descriptors table
5673 * \param[in] ppv_sem_hdls : Array of semaphore handles
5674 * \param[in] i4_num_proc_thrds : Number of processing threads
5675 *
5676 * \return
5677 *    None
5678 *
5679 * \author
5680 *  Ittiam
5681 *
5682 *****************************************************************************
5683 */
ihevce_enc_loop_reg_sem_hdls(void * pv_enc_loop_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)5684 void ihevce_enc_loop_reg_sem_hdls(
5685     void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5686 {
5687     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5688     WORD32 i, enc_frm_id;
5689 
5690     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5691 
5692     /*************************************************************************/
5693     /* --- EncLoop Deblock and SAO sync Dep Mngr reg Semaphores --                   */
5694     /*************************************************************************/
5695     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5696     {
5697         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5698         {
5699             ihevce_dmgr_reg_sem_hdls(
5700                 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5701                 ppv_sem_hdls,
5702                 i4_num_proc_thrds);
5703         }
5704     }
5705 
5706     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5707     {
5708         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5709         {
5710             ihevce_dmgr_reg_sem_hdls(
5711                 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][i],
5712                 ppv_sem_hdls,
5713                 i4_num_proc_thrds);
5714         }
5715     }
5716 
5717     /*************************************************************************/
5718     /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores --             */
5719     /*************************************************************************/
5720     for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5721     {
5722         for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5723         {
5724             ihevce_dmgr_reg_sem_hdls(
5725                 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5726                 ppv_sem_hdls,
5727                 i4_num_proc_thrds);
5728         }
5729     }
5730 
5731     return;
5732 }
5733 
5734 /*!
5735 ******************************************************************************
5736 * \if Function name : ihevce_enc_loop_delete \endif
5737 *
5738 * \brief
5739 *    Destroy EncLoop module
5740 * Note : Only Destroys the resources allocated in the module like
5741 *   semaphore,etc. Memory free is done Separately using memtabs
5742 *
5743 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5744 *
5745 * \return
5746 *    None
5747 *
5748 * \author
5749 *  Ittiam
5750 *
5751 *****************************************************************************
5752 */
ihevce_enc_loop_delete(void * pv_enc_loop_ctxt)5753 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5754 {
5755     ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5756     WORD32 ctr, enc_frm_id;
5757 
5758     ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5759 
5760     for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5761     {
5762         for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5763         {
5764             /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5765             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5766             /* --- EncLoop Sao sync Dep Mngr Delete --*/
5767             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][ctr]);
5768             /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5769             ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5770         }
5771     }
5772 }
5773 
5774 /*!
5775 ******************************************************************************
5776 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5777 *
5778 * \brief
5779 *    Frame level Reset for the Dependency Mngrs local to EncLoop.,
5780 *    ie CU_TopRight and Dblk
5781 *
5782 * \param[in] pv_enc_loop_ctxt       : Enc_loop context pointer
5783 *
5784 * \return
5785 *    None
5786 *
5787 * \author
5788 *  Ittiam
5789 *
5790 *****************************************************************************
5791 */
ihevce_enc_loop_dep_mngr_frame_reset(void * pv_enc_loop_ctxt,WORD32 enc_frm_id)5792 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5793 {
5794     WORD32 ctr, frame_id;
5795     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5796 
5797     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5798 
5799     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5800     {
5801         frame_id = 0;
5802     }
5803     else
5804     {
5805         frame_id = enc_frm_id;
5806     }
5807 
5808     for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5809     {
5810         /* Dep. Mngr : Reset the num ctb Deblocked in every row  for ENC sync */
5811         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5812 
5813         /* Dep. Mngr : Reset the num SAO ctb in every row  for ENC sync */
5814         ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[frame_id][ctr]);
5815 
5816         /* Dep. Mngr : Reset the TopRight CU Processed in every row  for ENC sync */
5817         ihevce_dmgr_rst_row_row_sync(
5818             ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5819     }
5820 }
5821 
5822 /*!
5823 ******************************************************************************
5824 * \if Function name : ihevce_enc_loop_frame_init \endif
5825 *
5826 * \brief
5827 *    Frame level init of enocde loop function .
5828 *
5829 * \param[in] pv_enc_loop_ctxt           : Enc_loop context pointer
5830 * \param[in] pi4_cu_processed           : ptr to cur frame cu process in pix.
5831 * \param[in] aps_ref_list               : ref pic list for the current frame
5832 * \param[in] ps_slice_hdr               : ptr to current slice header params
5833 * \param[in] ps_pps                     : ptr to active pps params
5834 * \param[in] ps_sps                     : ptr to active sps params
5835 * \param[in] ps_vps                     : ptr to active vps params
5836 
5837 
5838 * \param[in] i1_weighted_pred_flag      : weighted pred enable flag (unidir)
5839 * \param[in] i1_weighted_bipred_flag    : weighted pred enable flag (bidir)
5840 * \param[in] log2_luma_wght_denom       : down shift factor for weighted pred of luma
5841 * \param[in] log2_chroma_wght_denom       : down shift factor for weighted pred of chroma
5842 * \param[in] cur_poc                    : currennt frame poc
5843 * \param[in] i4_bitrate_instance_num    : number indicating the instance of bit-rate for multi-rate encoder
5844 *
5845 * \return
5846 *    None
5847 *
5848 * \author
5849 *  Ittiam
5850 *
5851 *****************************************************************************
5852 */
ihevce_enc_loop_frame_init(void * pv_enc_loop_ctxt,WORD32 i4_frm_qp,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],recon_pic_buf_t * ps_frm_recon,slice_header_t * ps_slice_hdr,pps_t * ps_pps,sps_t * ps_sps,vps_t * ps_vps,WORD8 i1_weighted_pred_flag,WORD8 i1_weighted_bipred_flag,WORD32 log2_luma_wght_denom,WORD32 log2_chroma_wght_denom,WORD32 cur_poc,WORD32 i4_display_num,enc_ctxt_t * ps_enc_ctxt,me_enc_rdopt_ctxt_t * ps_curr_inp_prms,WORD32 i4_bitrate_instance_num,WORD32 i4_thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_num_bitrates,WORD32 i4_quality_preset,void * pv_dep_mngr_encloop_dep_me)5853 void ihevce_enc_loop_frame_init(
5854     void *pv_enc_loop_ctxt,
5855     WORD32 i4_frm_qp,
5856     recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5857     recon_pic_buf_t *ps_frm_recon,
5858     slice_header_t *ps_slice_hdr,
5859     pps_t *ps_pps,
5860     sps_t *ps_sps,
5861     vps_t *ps_vps,
5862     WORD8 i1_weighted_pred_flag,
5863     WORD8 i1_weighted_bipred_flag,
5864     WORD32 log2_luma_wght_denom,
5865     WORD32 log2_chroma_wght_denom,
5866     WORD32 cur_poc,
5867     WORD32 i4_display_num,
5868     enc_ctxt_t *ps_enc_ctxt,
5869     me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5870     WORD32 i4_bitrate_instance_num,
5871     WORD32 i4_thrd_id,
5872     WORD32 i4_enc_frm_id,
5873     WORD32 i4_num_bitrates,
5874     WORD32 i4_quality_preset,
5875     void *pv_dep_mngr_encloop_dep_me)
5876 {
5877     /* local variables */
5878     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5879     ihevce_enc_loop_ctxt_t *ps_ctxt;
5880     WORD32 chroma_qp_offset, i4_div_factor;
5881     WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5882     WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5883 
5884     /* ENC_LOOP master state structure */
5885     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5886 
5887     /* Nithya: Store the current POC in the slice header */
5888     ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5889 
5890     /* Update the POC list of the current frame to the recon buffer */
5891     if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5892     {
5893         int i4_i;
5894         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5895         {
5896             ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5897         }
5898     }
5899     if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5900     {
5901         int i4_i;
5902         for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5903         {
5904             ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5905         }
5906     }
5907 
5908     /* loop over all the threads */
5909     // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5910     {
5911         /* ENC_LOOP state structure */
5912         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5913 
5914         /* SAO ctxt structure initialization*/
5915         ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5916         ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5917         ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5918 
5919         /*bit-rate instance number for Multi-bitrate (MBR) encode */
5920         ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5921         ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5922         ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5923         ps_ctxt->i4_is_first_query = 1;
5924         ps_ctxt->i4_is_ctb_qp_modified = 0;
5925 
5926         /* enc_frm_id for multiframe encode */
5927 
5928         if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5929         {
5930             ps_ctxt->i4_enc_frm_id = 0;
5931             i4_enc_frm_id = 0;
5932         }
5933         else
5934         {
5935             ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
5936         }
5937 
5938         /*Initialize the sub pic rc buf appropriately */
5939 
5940         /*Set the thrd id flag */
5941         ps_enc_ctxt->s_multi_thrd
5942             .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
5943 
5944         ps_enc_ctxt->s_multi_thrd
5945             .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5946         ps_enc_ctxt->s_multi_thrd
5947             .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5948 
5949         ps_enc_ctxt->s_multi_thrd
5950             .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5951         ps_enc_ctxt->s_multi_thrd
5952             .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5953 
5954         ps_enc_ctxt->s_multi_thrd
5955             .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5956         ps_enc_ctxt->s_multi_thrd
5957             .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5958         ps_enc_ctxt->s_multi_thrd
5959             .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5960         ps_enc_ctxt->s_multi_thrd
5961             .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5962         ps_enc_ctxt->s_multi_thrd
5963             .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5964         ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
5965             i4_frm_qp;
5966 
5967         /*Frame level data for Sub Pic rc is initalized here */
5968         /*Can be sent once per frame*/
5969         {
5970             WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
5971                                       ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
5972 
5973             /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
5974             ps_ctxt->u4_total_cu_bits = 0;
5975             ps_ctxt->u4_total_cu_hdr_bits = 0;
5976 
5977             ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
5978             ps_ctxt->u4_cu_tot_bits = 0;
5979             ps_ctxt->u4_total_cu_bits_mul_qs = 0;
5980             ps_ctxt->i4_display_num = i4_display_num;
5981             ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
5982             /*The Qscale is to be generated every 10th of total frame ctb is completed */
5983             //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
5984             ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
5985 
5986             ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
5987             /*Sub Pic RC frame level params */
5988             ps_ctxt->i8_frame_l1_ipe_sad =
5989                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
5990             ps_ctxt->i8_frame_l0_ipe_satd =
5991                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
5992             ps_ctxt->i8_frame_l1_me_sad =
5993                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
5994             ps_ctxt->i8_frame_l1_activity_fact =
5995                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
5996             if(ps_ctxt->i4_sub_pic_level_rc)
5997             {
5998                 ASSERT(
5999                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6000                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6001 
6002                 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6003                                                  [ps_ctxt->i4_bitrate_instance_num] =
6004                     ps_curr_inp_prms->ps_curr_inp->s_lap_out
6005                         .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6006             }
6007             //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6008 
6009             ps_ctxt->i4_is_I_scenecut =
6010                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6011                  (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6012                   ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6013 
6014             ps_ctxt->i4_is_non_I_scenecut =
6015                 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6016                  (ps_ctxt->i4_is_I_scenecut == 0));
6017 
6018             /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6019             ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6020             ps_ctxt->i4_is_model_valid =
6021                 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6022         }
6023         /* cb and cr offsets are assumed to be same */
6024         chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6025 
6026         /* assumption of cb = cr qp */
6027         ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6028         ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6029 
6030         ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6031 
6032         ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6033 
6034         ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6035         ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6036 
6037         /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6038         ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6039         ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6040         ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6041 
6042         ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6043         ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6044         ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6045         ps_ctxt->i4_use_const_lamda_modifier =
6046             ps_ctxt->i4_use_const_lamda_modifier ||
6047             ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6048               (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6049              ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6050                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6051               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6052                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6053               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6054                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6055               (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6056                (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6057 
6058         {
6059             ps_ctxt->f_i_pic_lamda_modifier =
6060                 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6061         }
6062 
6063         ps_ctxt->i4_frame_qp = i4_frm_qp;
6064         ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6065         ps_ctxt->i4_cu_qp = i4_frm_qp;
6066         ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6067         ps_ctxt->i4_chrm_cu_qp =
6068             (ps_ctxt->u1_chroma_array_type == 2)
6069                 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6070                 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6071 
6072         ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6073         i4_div_factor = (i4_frm_qp + 3) / 6;
6074         i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6075         ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6076 
6077         ps_ctxt->i4_chrm_cu_qp_div6 =
6078             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6079         ps_ctxt->i4_chrm_cu_qp_mod6 =
6080             (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6081 
6082 #define INTER_RND_QP_BY_6
6083 #ifdef INTER_RND_QP_BY_6
6084 
6085         { /*1/6 rounding for 8 bit b frames*/
6086             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6087                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6088         }
6089 #else
6090         /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6091         ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6092 #endif
6093 
6094         if(ISLICE == i1_slice_type)
6095         {
6096             /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6097             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6098                 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6099         }
6100         else
6101         {
6102             /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6103             ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6104                 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6105             /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6106         }
6107 
6108         ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6109 
6110         ps_ctxt->i1_slice_type = i1_slice_type;
6111 
6112         /* intialize the inter pred (MC) context at frame level */
6113         ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6114         ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6115         ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6116         ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6117         ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6118 
6119         /* intialize the MV pred context at frame level */
6120         ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6121         ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6122         ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6123         ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6124             ps_pps->i1_log2_parallel_merge_level - 2;
6125 
6126 #if ADAPT_COLOCATED_FROM_L0_FLAG
6127         if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6128         {
6129             if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6130                (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6131                 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6132             {
6133                 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6134             }
6135         }
6136 #endif
6137         /* Initialization of deblocking params */
6138         ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6139         ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6140 
6141         ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6142 
6143         ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6144         /*init frame level stat accumualtion parameters */
6145         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6146             ->u4_frame_sad_acc = 0;
6147         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6148             ->u4_frame_intra_sad_acc = 0;
6149         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6150             ->u4_frame_open_loop_intra_sad = 0;
6151         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6152             ->i8_frame_open_loop_ssd = 0;
6153         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6154             ->u4_frame_inter_sad_acc = 0;
6155 
6156         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6157             ->i8_frame_cost_acc = 0;
6158         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6159             ->i8_frame_intra_cost_acc = 0;
6160         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6161             ->i8_frame_inter_cost_acc = 0;
6162 
6163         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6164             ->u4_frame_intra_sad = 0;
6165         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6166             ->u4_frame_rdopt_bits = 0;
6167         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6168             ->u4_frame_rdopt_header_bits = 0;
6169         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6170             ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6171         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6172             ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6173         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6174             ->i4_8x8_cu_sum[0] = 0;
6175         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6176             ->i4_8x8_cu_sum[1] = 0;
6177         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6178             ->i8_sad_by_qscale[0] = 0;
6179         ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6180             ->i8_sad_by_qscale[1] = 0;
6181         /* Compute the frame_qstep */
6182         GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6183 
6184         ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6185 
6186         ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6187         /* intialize the cabac rdopt context at frame level */
6188         ihevce_entropy_rdo_frame_init(
6189             &ps_ctxt->s_rdopt_entropy_ctxt,
6190             ps_slice_hdr,
6191             ps_pps,
6192             ps_sps,
6193             ps_vps,
6194             ps_master_ctxt->au1_cu_skip_top_row,
6195             &ps_enc_ctxt->s_rc_quant);
6196 
6197         /* register the dep mngr instance for forward ME sync */
6198         ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6199     }
6200 }
6201 /*
6202 ******************************************************************************
6203 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6204 *
6205 * \brief
6206 *    returns Nil
6207 *
6208 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6209 * \param[out]ps_rc_prms       : ptr to frame level info structure
6210 *
6211 * \return
6212 *    None
6213 *
6214 * \author
6215 *  Ittiam
6216 *
6217 *****************************************************************************
6218 */
ihevce_enc_loop_get_frame_rc_prms(void * pv_enc_loop_ctxt,rc_bits_sad_t * ps_rc_prms,WORD32 i4_br_id,WORD32 i4_enc_frm_id)6219 void ihevce_enc_loop_get_frame_rc_prms(
6220     void *pv_enc_loop_ctxt,
6221     rc_bits_sad_t *ps_rc_prms,
6222     WORD32 i4_br_id,  //bitrate instance id
6223     WORD32 i4_enc_frm_id)  // frame id
6224 {
6225     /*Get the master thread pointer*/
6226     ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6227     ihevce_enc_loop_ctxt_t *ps_ctxt;
6228     UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6229     LWORD64 i8_total_ssd_frame = 0;
6230     UWORD32 total_frame_sad = 0;
6231     UWORD32 total_frame_rdopt_bits = 0;
6232     UWORD32 total_frame_rdopt_header_bits = 0;
6233     WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6234     WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6235     LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6236     WORD32 i4_curr_qp_acc = 0;
6237     WORD32 i;
6238 
6239     /* ENC_LOOP master state structure */
6240     ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6241 
6242     if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6243     {
6244         i4_enc_frm_id = 0;
6245     }
6246     /*loop through all threads and accumulate intra sad across all threads*/
6247     for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6248     {
6249         /* ENC_LOOP state structure */
6250         ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6251         total_frame_open_loop_intra_sad +=
6252             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6253         i8_total_ssd_frame +=
6254             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6255         total_frame_intra_sad +=
6256             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6257         total_frame_sad +=
6258             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6259         total_frame_rdopt_bits +=
6260             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6261         total_frame_rdopt_header_bits +=
6262             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6263         i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6264                                               ->i4_qp_normalized_8x8_cu_sum[0];
6265         i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6266                                               ->i4_qp_normalized_8x8_cu_sum[1];
6267         i4_8x8_cu_sum[0] +=
6268             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6269         i4_8x8_cu_sum[1] +=
6270             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6271         i8_sad_by_qscale[0] +=
6272             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6273         i8_sad_by_qscale[1] +=
6274             ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6275     }
6276 
6277     ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6278     ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6279     ps_rc_prms->u4_total_sad = total_frame_sad;
6280     ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6281     ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6282     /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6283     ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6284     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6285     ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6286     ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6287     ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6288     ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6289     ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6290 }
6291