1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_enc_loop_pass.c
24 *
25 * \brief
26 * This file contains Encoder normative loop pass related functions
27 *
28 * \date
29 * 18/09/2012
30 *
31 * \author
32 * Ittiam
33 *
34 *
35 * List of Functions
36 *
37 *
38 ******************************************************************************
39 */
40
41 /*****************************************************************************/
42 /* File Includes */
43 /*****************************************************************************/
44 /* System include files */
45 #include <stdio.h>
46 #include <string.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <stdarg.h>
50 #include <math.h>
51 #include <limits.h>
52
53 /* User include files */
54 #include "ihevc_typedefs.h"
55 #include "itt_video_api.h"
56 #include "ihevce_api.h"
57
58 #include "rc_cntrl_param.h"
59 #include "rc_frame_info_collector.h"
60 #include "rc_look_ahead_params.h"
61
62 #include "ihevc_defs.h"
63 #include "ihevc_macros.h"
64 #include "ihevc_debug.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_platform_macros.h"
67 #include "ihevc_deblk.h"
68 #include "ihevc_itrans_recon.h"
69 #include "ihevc_chroma_itrans_recon.h"
70 #include "ihevc_chroma_intra_pred.h"
71 #include "ihevc_intra_pred.h"
72 #include "ihevc_inter_pred.h"
73 #include "ihevc_mem_fns.h"
74 #include "ihevc_padding.h"
75 #include "ihevc_weighted_pred.h"
76 #include "ihevc_sao.h"
77 #include "ihevc_resi_trans.h"
78 #include "ihevc_quant_iquant_ssd.h"
79 #include "ihevc_cabac_tables.h"
80 #include "ihevc_common_tables.h"
81 #include "ihevc_quant_tables.h"
82
83 #include "ihevce_defs.h"
84 #include "ihevce_hle_interface.h"
85 #include "ihevce_lap_enc_structs.h"
86 #include "ihevce_multi_thrd_structs.h"
87 #include "ihevce_multi_thrd_funcs.h"
88 #include "ihevce_me_common_defs.h"
89 #include "ihevce_had_satd.h"
90 #include "ihevce_error_codes.h"
91 #include "ihevce_bitstream.h"
92 #include "ihevce_cabac.h"
93 #include "ihevce_rdoq_macros.h"
94 #include "ihevce_function_selector.h"
95 #include "ihevce_enc_structs.h"
96 #include "ihevce_entropy_structs.h"
97 #include "ihevce_cmn_utils_instr_set_router.h"
98 #include "ihevce_ipe_instr_set_router.h"
99 #include "ihevce_decomp_pre_intra_structs.h"
100 #include "ihevce_decomp_pre_intra_pass.h"
101 #include "ihevce_enc_loop_structs.h"
102 #include "ihevce_nbr_avail.h"
103 #include "ihevce_enc_loop_utils.h"
104 #include "ihevce_sub_pic_rc.h"
105 #include "ihevce_global_tables.h"
106 #include "ihevce_bs_compute_ctb.h"
107 #include "ihevce_cabac_rdo.h"
108 #include "ihevce_deblk.h"
109 #include "ihevce_frame_process.h"
110 #include "ihevce_rc_enc_structs.h"
111 #include "hme_datatype.h"
112 #include "hme_interface.h"
113 #include "hme_common_defs.h"
114 #include "hme_defs.h"
115 #include "ihevce_me_instr_set_router.h"
116 #include "ihevce_enc_subpel_gen.h"
117 #include "ihevce_inter_pred.h"
118 #include "ihevce_mv_pred.h"
119 #include "ihevce_mv_pred_merge.h"
120 #include "ihevce_enc_loop_inter_mode_sifter.h"
121 #include "ihevce_enc_cu_recursion.h"
122 #include "ihevce_enc_loop_pass.h"
123 #include "ihevce_common_utils.h"
124 #include "ihevce_dep_mngr_interface.h"
125 #include "ihevce_sao.h"
126 #include "ihevce_tile_interface.h"
127 #include "ihevce_profile.h"
128
129 #include "cast_types.h"
130 #include "osal.h"
131 #include "osal_defaults.h"
132
133 /*****************************************************************************/
134 /* Globals */
135 /*****************************************************************************/
136 extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137
138 extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139
140 /*****************************************************************************/
141 /* Constant Macros */
142 /*****************************************************************************/
143 #define UPDATE_QP_AT_CTB 6
144 #define INTRAPRED_SIMD_LEFT_PADDING 16
145 #define INTRAPRED_SIMD_RIGHT_PADDING 8
146
147 /*****************************************************************************/
148 /* Function Definitions */
149 /*****************************************************************************/
150
151 /*!
152 ******************************************************************************
153 * \if Function name : ihevce_enc_loop_ctb_left_copy \endif
154 *
155 * \brief
156 * This function copy the right data of CTB to context buffers
157 *
158 * \date
159 * 18/09/2012
160 *
161 * \author
162 * Ittiam
163 *
164 * \return
165 *
166 * List of Functions
167 *
168 *
169 ******************************************************************************
170 */
ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms)171 void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
172 {
173 /* ------------------------------------------------------------------ */
174 /* copy the right coloum data to the context buffers */
175 /* ------------------------------------------------------------------ */
176
177 nbr_4x4_t *ps_left_nbr;
178 nbr_4x4_t *ps_nbr;
179 UWORD8 *pu1_buff;
180 WORD32 num_pels;
181 UWORD8 *pu1_luma_left, *pu1_chrm_left;
182
183 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
184
185 pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
186 pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
187 ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
188
189 /* copy right luma data */
190 pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
191
192 for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
193 {
194 WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
195
196 pu1_luma_left[num_pels] = pu1_buff[i4_indx];
197 }
198
199 /* copy right chroma data */
200 pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
201
202 for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
203 {
204 WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
205
206 *pu1_chrm_left++ = pu1_buff[i4_indx];
207 *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
208 }
209
210 /* store the nbr 4x4 data at ctb level */
211 {
212 WORD32 ctr;
213 WORD32 nbr_strd;
214
215 nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
216
217 /* copy right nbr data */
218 ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
219 ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
220
221 for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
222 {
223 WORD32 i4_indx = nbr_strd * ctr;
224
225 ps_left_nbr[ctr] = ps_nbr[i4_indx];
226 }
227 }
228 return;
229 }
230
231 /*!
232 ******************************************************************************
233 * \if Function name : ihevce_mark_all_modes_to_evaluate \endif
234 *
235 * \brief
236 * Mark all modes for inter/intra for evaluation. This function will be
237 * called by ref instance
238 *
239 * \param[in] pv_ctxt : pointer to enc_loop module
240 * \param[in] ps_cu_analyse : pointer to cu analyse
241 *
242 * \return
243 * None
244 *
245 * \author
246 * Ittiam
247 *
248 *****************************************************************************
249 */
ihevce_mark_all_modes_to_evaluate(void * pv_ctxt,cu_analyse_t * ps_cu_analyse)250 void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
251 {
252 UWORD8 ctr;
253 WORD32 i4_part;
254
255 (void)pv_ctxt;
256 /* run a loop over all Inter cands */
257 for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
258 {
259 ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
260 }
261
262 /* run a loop over all intra candidates */
263 if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
264 {
265 for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
266 {
267 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
268 ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
269
270 for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
271 {
272 ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
273 }
274 }
275 }
276 }
277
278 /*!
279 ******************************************************************************
280 * \if Function name : ihevce_cu_mode_decide \endif
281 *
282 * \brief
283 * Coding Unit mode decide function. Performs RD opt and decides the best mode
284 *
285 * \param[in] ps_ctxt : pointer to enc_loop module
286 * \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
287 * \param[in] ps_cu_analyse : pointer to cu analyse
288 * \param[out] ps_cu_final : pointer to cu final
289 * \param[out] pu1_ecd_data :pointer to store coeff data for ECD
290 * \param[out]ps_row_col_pu; colocated pu buffer pointer
291 * \param[out]pu1_row_pu_map; colocated pu map buffer pointer
292 * \param[in]col_start_pu_idx : pu index start value
293 *
294 * \return
295 * None
296 *
297 *
298 * \author
299 * Ittiam
300 *
301 *****************************************************************************
302 */
ihevce_cu_mode_decide(ihevce_enc_loop_ctxt_t * ps_ctxt,enc_loop_cu_prms_t * ps_cu_prms,cu_analyse_t * ps_cu_analyse,final_mode_state_t * ps_final_mode_state,UWORD8 * pu1_ecd_data,pu_col_mv_t * ps_col_pu,UWORD8 * pu1_col_pu_map,WORD32 col_start_pu_idx)303 LWORD64 ihevce_cu_mode_decide(
304 ihevce_enc_loop_ctxt_t *ps_ctxt,
305 enc_loop_cu_prms_t *ps_cu_prms,
306 cu_analyse_t *ps_cu_analyse,
307 final_mode_state_t *ps_final_mode_state,
308 UWORD8 *pu1_ecd_data,
309 pu_col_mv_t *ps_col_pu,
310 UWORD8 *pu1_col_pu_map,
311 WORD32 col_start_pu_idx)
312 {
313 enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
314 cu_nbr_prms_t s_cu_nbr_prms;
315 inter_cu_mode_info_t s_inter_cu_mode_info;
316 cu_inter_cand_t *ps_best_inter_cand = NULL;
317 UWORD8 *pu1_cu_top;
318 UWORD8 *pu1_cu_top_left;
319 UWORD8 *pu1_cu_left;
320 UWORD8 *pu1_final_recon = NULL;
321 UWORD8 *pu1_curr_src = NULL;
322 void *pv_curr_src = NULL;
323 void *pv_cu_left = NULL;
324 void *pv_cu_top = NULL;
325 void *pv_cu_top_left = NULL;
326
327 WORD32 cu_left_stride = 0;
328 WORD32 ctr;
329 WORD32 rd_opt_best_idx;
330 LWORD64 rd_opt_least_cost;
331 WORD32 rd_opt_curr_idx;
332 WORD32 num_4x4_in_ctb;
333 WORD32 nbr_4x4_left_strd = 0;
334
335 nbr_4x4_t *ps_topleft_nbr_4x4;
336 nbr_4x4_t *ps_left_nbr_4x4 = NULL;
337 nbr_4x4_t *ps_top_nbr_4x4 = NULL;
338 nbr_4x4_t *ps_curr_nbr_4x4;
339 WORD32 enable_intra_eval_flag;
340 WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
341 WORD32 curr_cu_pos_in_row;
342 WORD32 cu_top_right_offset;
343 WORD32 cu_top_right_dep_pos;
344 WORD32 i4_ctb_x_off, i4_ctb_y_off;
345
346 UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
347 (void)ps_final_mode_state;
348 /* default init */
349 rd_opt_least_cost = MAX_COST_64;
350 ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
351 ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
352
353 /* Zero cbf tool is enabled by default for all presets */
354 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
355
356 rd_opt_best_idx = 1;
357 rd_opt_curr_idx = 0;
358 enable_intra_eval_flag = 1;
359
360 /* CU params in enc ctxt*/
361 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
362 ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
363 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
364
365 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
366 ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
367 ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
368 ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
369
370 /* CB and Cr are pixel interleaved */
371 s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
372
373 s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
374
375 if(!ps_ctxt->u1_is_input_data_hbd)
376 {
377 /* --------------------------------------- */
378 /* ----- Luma Pointers Derivation -------- */
379 /* --------------------------------------- */
380
381 /* based on CU position derive the pointers */
382 pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
383
384 pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
385
386 pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
387
388 pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
389
390 pv_curr_src = pu1_curr_src;
391
392 /* CU left */
393 if(0 == ps_cu_analyse->b3_cu_pos_x)
394 {
395 /* CTB boundary */
396 pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
397 pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
398 cu_left_stride = 1;
399
400 ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
401 ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
402 nbr_4x4_left_strd = 1;
403 }
404 else
405 {
406 /* inside CTB */
407 pu1_cu_left = pu1_final_recon - 1;
408 cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
409
410 ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
411 nbr_4x4_left_strd = num_4x4_in_ctb;
412 }
413
414 pv_cu_left = pu1_cu_left;
415
416 /* CU top */
417 if(0 == ps_cu_analyse->b3_cu_pos_y)
418 {
419 /* CTB boundary */
420 pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
421 pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
422 pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
423
424 ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
425 ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
426 ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
427 }
428 else
429 {
430 /* inside CTB */
431 pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
432
433 ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
434 }
435
436 pv_cu_top = pu1_cu_top;
437
438 /* CU top left */
439 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
440 {
441 /* left ctb boundary but not first row */
442 pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
443 ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
444 }
445 else
446 {
447 /* rest all cases topleft is top -1 */
448 pu1_cu_top_left = pu1_cu_top - 1;
449 ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
450 }
451
452 pv_cu_top_left = pu1_cu_top_left;
453
454 /* Store the CU nbr information in the ctxt for final reconstruction fun. */
455 s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
456 s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
457 s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
458 s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
459 s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
460 s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
461 s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
462 s_cu_nbr_prms.cu_left_stride = cu_left_stride;
463
464 /* ------------------------------------------------------------ */
465 /* -- Initialize the number of neigbour skip cu count for rdo --*/
466 /* ------------------------------------------------------------ */
467 {
468 nbr_avail_flags_t s_nbr;
469 WORD32 i4_num_nbr_skip_cus = 0;
470
471 /* get the neighbour availability flags for current cu */
472 ihevce_get_nbr_intra(
473 &s_nbr,
474 ps_ctxt->pu1_ctb_nbr_map,
475 ps_ctxt->i4_nbr_map_strd,
476 (ps_cu_analyse->b3_cu_pos_x << 1),
477 (ps_cu_analyse->b3_cu_pos_y << 1),
478 (ps_cu_analyse->u1_cu_size >> 2));
479 if(s_nbr.u1_top_avail)
480 {
481 i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
482 }
483
484 if(s_nbr.u1_left_avail)
485 {
486 i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
487 }
488 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
489 i4_num_nbr_skip_cus;
490 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
491 i4_num_nbr_skip_cus;
492 }
493
494 /* --------------------------------------- */
495 /* --- Chroma Pointers Derivation -------- */
496 /* --------------------------------------- */
497
498 /* based on CU position derive the pointers */
499 s_chrm_cu_buf_prms.pu1_final_recon =
500 ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
501
502 s_chrm_cu_buf_prms.pu1_curr_src =
503 ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
504
505 s_chrm_cu_buf_prms.pu1_final_recon +=
506 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
507
508 s_chrm_cu_buf_prms.pu1_curr_src +=
509 ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
510
511 /* CU left */
512 if(0 == ps_cu_analyse->b3_cu_pos_x)
513 {
514 /* CTB boundary */
515 s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
516 s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
517 s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
518 }
519 else
520 {
521 /* inside CTB */
522 s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
523 s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
524 }
525
526 /* CU top */
527 if(0 == ps_cu_analyse->b3_cu_pos_y)
528 {
529 /* CTB boundary */
530 s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
531 s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
532 s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
533 }
534 else
535 {
536 /* inside CTB */
537 s_chrm_cu_buf_prms.pu1_cu_top =
538 s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
539 }
540
541 /* CU top left */
542 if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
543 {
544 /* left ctb boundary but not first row */
545 s_chrm_cu_buf_prms.pu1_cu_top_left =
546 s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
547 }
548 else
549 {
550 /* rest all cases topleft is top -2 */
551 s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
552 }
553 }
554
555 /* Set Variables for Dep. Checking and Setting */
556 i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
557
558 i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
559 ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
560
561 /* Set the pred pointer count for ME/intra to 0 to start */
562 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
563
564 ASSERT(
565 (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
566
567 ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
568 s_inter_cu_mode_info.u1_num_inter_cands = 0;
569 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
570 s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
571
572 ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
573 ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
574 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
575 ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
576 ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
577 if(0 != ps_cu_analyse->u1_num_inter_cands)
578 {
579 ihevce_inter_cand_sifter_prms_t s_prms;
580
581 UWORD8 u1_enable_top_row_sync;
582
583 if(ps_ctxt->u1_disable_intra_eval)
584 {
585 u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
586 }
587 else
588 {
589 u1_enable_top_row_sync = 1;
590 }
591
592 if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
593 {
594 /* Wait till top data is ready */
595 /* Currently checking till top right CU */
596 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
597
598 if(i4_ctb_y_off == 0)
599 {
600 /* No wait for 1st row */
601 cu_top_right_offset = -(MAX_CTB_SIZE);
602 {
603 ihevce_tile_params_t *ps_col_tile_params =
604 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
605 ps_ctxt->i4_tile_col_idx);
606 /* No wait for 1st row */
607 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
608 }
609 cu_top_right_dep_pos = 0;
610 }
611 else
612 {
613 cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
614 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
615 }
616
617 if(0 == ps_cu_analyse->b3_cu_pos_y)
618 {
619 ihevce_dmgr_chk_row_row_sync(
620 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
621 curr_cu_pos_in_row,
622 cu_top_right_offset,
623 cu_top_right_dep_pos,
624 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
625 ps_ctxt->thrd_id);
626 }
627 }
628
629 if(ps_ctxt->i1_cu_qp_delta_enable)
630 {
631 ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, 4, 0);
632 }
633
634 s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
635 s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
636 s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
637 s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
638 s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
639 s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
640 s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
641 s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
642 s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
643 s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
644 s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
645 s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
646 s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
647 s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
648 s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
649 s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
650 s_prms.pv_src = pv_curr_src;
651 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
652 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
653 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
654 s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
655 s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
656 s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
657 s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
658 s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
659 s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
660 s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
661 s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
662 s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
663 s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
664 s_prms.u1_use_merge_cand_from_top_row =
665 (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
666 s_prms.u1_merge_idx_cabac_model =
667 ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
668 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
669 s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
670 s_prms.u1_reuse_me_sad = 1;
671 #else
672 s_prms.u1_reuse_me_sad = 0;
673 #endif
674
675 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
676 {
677 if(ps_ctxt->i4_temporal_layer == 1)
678 {
679 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
680 }
681 else
682 {
683 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
684 }
685 }
686 else
687 {
688 s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
689 }
690 s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
691
692 if(s_prms.u1_is_cu_noisy)
693 {
694 s_prms.i4_lambda_qf =
695 ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
696 }
697 s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
698
699 s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
700
701 s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
702 ihevce_inter_cand_sifter(&s_prms);
703 }
704 if(u1_is_422)
705 {
706 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
707 UWORD8 u1_num_bufs_allocated;
708
709 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
710 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
711
712 ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
713
714 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
715 ctr++)
716 {
717 {
718 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
719 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
720 }
721
722 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
723
724 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
725 }
726
727 {
728 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
729 (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
730 }
731
732 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
733
734 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
735 }
736 else
737 {
738 UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
739 UWORD8 u1_num_bufs_allocated;
740
741 u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
742 au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
743
744 ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
745
746 for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
747 ctr++)
748 {
749 {
750 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
751 (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
752 }
753
754 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
755
756 ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
757 }
758 }
759
760 ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
761
762 ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
763 ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
764 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
765 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
766 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
767 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
768 ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
769 ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
770 /* --------------------------------------- */
771 /* ------ Inter RD OPT stage ------------- */
772 /* --------------------------------------- */
773 if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
774 {
775 UWORD8 u1_ssd_bit_info_ctr = 0;
776
777 /* -- run a loop over all Inter rd opt cands ------ */
778 for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
779 {
780 cu_inter_cand_t *ps_inter_cand;
781
782 LWORD64 rd_opt_cost = 0;
783
784 ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
785
786 if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
787 (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
788 {
789 ps_inter_cand->b1_eval_mark = 1;
790 }
791
792 /****************************************************************/
793 /* This check is only valid for derived instances. */
794 /* check if this mode needs to be evaluated or not. */
795 /* if it is a skip candidate, go ahead and evaluate it even if */
796 /* it has not been marked while sorting. */
797 /****************************************************************/
798 if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
799 {
800 continue;
801 }
802
803 /* RDOPT related copies and settings */
804 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
805
806 /* RDOPT copy States : Prev Cu best to current init */
807 COPY_CABAC_STATES(
808 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
809 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
810 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
811 /* MVP ,MVD calc and Motion compensation */
812 rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
813 ps_ctxt,
814 ps_inter_cand,
815 ps_cu_analyse->u1_cu_size,
816 ps_cu_analyse->b3_cu_pos_x,
817 ps_cu_analyse->b3_cu_pos_y,
818 ps_left_nbr_4x4,
819 ps_top_nbr_4x4,
820 ps_topleft_nbr_4x4,
821 nbr_4x4_left_strd,
822 rd_opt_curr_idx);
823
824 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
825 if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
826 {
827 ihevce_determine_tu_tree_distribution(
828 ps_inter_cand,
829 (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
830 ps_ctxt->ai2_scratch,
831 (UWORD8 *)pv_curr_src,
832 ps_cu_prms->i4_luma_src_stride,
833 ps_ctxt->i4_satd_lamda,
834 LAMBDA_Q_SHIFT,
835 ps_cu_analyse->u1_cu_size,
836 ps_ctxt->u1_max_tr_depth);
837 }
838 #endif
839 #if DISABLE_ZERO_ZBF_IN_INTER
840 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
841 #else
842 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
843 #endif
844 /* Recon loop with different TUs based on partition type*/
845 rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
846 ps_ctxt,
847 ps_cu_prms,
848 pv_curr_src,
849 ps_cu_analyse->u1_cu_size,
850 ps_cu_analyse->b3_cu_pos_x,
851 ps_cu_analyse->b3_cu_pos_y,
852 rd_opt_curr_idx,
853 &s_chrm_cu_buf_prms,
854 ps_inter_cand,
855 ps_cu_analyse,
856 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
857 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
858 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
859 100.0);
860
861 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
862 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
863 {
864 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
865 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
866 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
867 }
868 #endif
869
870 /* based on the rd opt cost choose the best and current index */
871 if(rd_opt_cost < rd_opt_least_cost)
872 {
873 /* swap the best and current indx */
874 rd_opt_best_idx = !rd_opt_best_idx;
875 rd_opt_curr_idx = !rd_opt_curr_idx;
876
877 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
878 rd_opt_least_cost = rd_opt_cost;
879 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
880
881 /* Store the best Inter cand. for final_recon function */
882 ps_best_inter_cand = ps_inter_cand;
883 }
884
885 /* set the neighbour map to 0 */
886 ihevce_set_nbr_map(
887 ps_ctxt->pu1_ctb_nbr_map,
888 ps_ctxt->i4_nbr_map_strd,
889 (ps_cu_analyse->b3_cu_pos_x << 1),
890 (ps_cu_analyse->b3_cu_pos_y << 1),
891 (ps_cu_analyse->u1_cu_size >> 2),
892 0);
893
894 } /* end of loop for all the Inter RD OPT cand */
895 }
896 /* --------------------------------------- */
897 /* ---- Conditional Eval of Intra -------- */
898 /* --------------------------------------- */
899 {
900 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
901 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
902
903 /* check if inter candidates are valid */
904 if(0 != ps_cu_analyse->u1_num_inter_cands)
905 {
906 /* if skip or no residual inter candidates has won then */
907 /* evaluation of intra candidates is disabled */
908 if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
909 (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
910 {
911 enable_intra_eval_flag = 0;
912 }
913 }
914 /* Disable Intra Gating for HIGH QUALITY PRESET */
915 #if !ENABLE_INTRA_GATING_FOR_HQ
916 if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
917 {
918 enable_intra_eval_flag = 1;
919
920 #if DISABLE_LARGE_INTRA_PQ
921 if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
922 (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
923 {
924 if(ps_cu_analyse->u1_cu_size > 16)
925 {
926 /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
927 enable_intra_eval_flag = 0;
928 }
929 else if(ps_cu_analyse->u1_cu_size == 16)
930 {
931 /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
932 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
933 }
934 }
935 #endif
936 }
937 #endif
938 }
939
940 /* --------------------------------------- */
941 /* ------ Intra RD OPT stage ------------- */
942 /* --------------------------------------- */
943
944 /* -- run a loop over all Intra rd opt cands ------ */
945 if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
946 {
947 LWORD64 rd_opt_cost;
948 WORD32 end_flag = 0;
949 WORD32 cu_eval_done = 0;
950 WORD32 subcu_eval_done = 0;
951 WORD32 subpu_eval_done = 0;
952 WORD32 max_trans_size;
953 WORD32 sync_wait_stride;
954 max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
955 sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
956
957 if(!ps_ctxt->u1_use_top_at_ctb_boundary)
958 {
959 /* Wait till top data is ready */
960 /* Currently checking till top right CU */
961 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
962
963 if(i4_ctb_y_off == 0)
964 {
965 /* No wait for 1st row */
966 cu_top_right_offset = -(MAX_CTB_SIZE);
967 {
968 ihevce_tile_params_t *ps_col_tile_params =
969 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
970 ps_ctxt->i4_tile_col_idx);
971 /* No wait for 1st row */
972 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
973 }
974 cu_top_right_dep_pos = 0;
975 }
976 else
977 {
978 cu_top_right_offset = sync_wait_stride;
979 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
980 }
981
982 if(0 == ps_cu_analyse->b3_cu_pos_y)
983 {
984 ihevce_dmgr_chk_row_row_sync(
985 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
986 curr_cu_pos_in_row,
987 cu_top_right_offset,
988 cu_top_right_dep_pos,
989 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
990 ps_ctxt->thrd_id);
991 }
992 }
993 ctr = 0;
994
995 /* Zero cbf tool is disabled for intra CUs */
996 #if ENABLE_ZERO_CBF_IN_INTRA
997 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
998 #else
999 ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
1000 #endif
1001
1002 /* Intra Mode gating based on MPM cand list and encoder quality preset */
1003 if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
1004 {
1005 ihevce_mpm_idx_based_filter_RDOPT_cand(
1006 ps_ctxt,
1007 ps_cu_analyse,
1008 ps_left_nbr_4x4,
1009 ps_top_nbr_4x4,
1010 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1011 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1012
1013 ihevce_mpm_idx_based_filter_RDOPT_cand(
1014 ps_ctxt,
1015 ps_cu_analyse,
1016 ps_left_nbr_4x4,
1017 ps_top_nbr_4x4,
1018 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1019 &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1020 }
1021
1022 /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1023 if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1024 {
1025 /* For cu_size = 64, there won't be any TU_EQ_CU case */
1026 if(64 != ps_cu_analyse->u1_cu_size)
1027 {
1028 /* RDOPT copy States : Prev Cu best to current init */
1029 COPY_CABAC_STATES(
1030 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1031 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1032 IHEVC_CAB_CTXT_END);
1033
1034 /* RDOPT related copies and settings */
1035 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1036
1037 /* Calc. best SATD mode for TU_EQ_CU case */
1038 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1039 ps_ctxt,
1040 &s_chrm_cu_buf_prms,
1041 ps_cu_analyse,
1042 rd_opt_curr_idx,
1043 TU_EQ_CU,
1044 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1045 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1046 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1047 100.0,
1048 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1049
1050 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1051 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1052 {
1053 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1054 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1055 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1056 }
1057 #endif
1058 }
1059
1060 /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1061 TU_EQ_CU_DIV2 case */
1062
1063 if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1064 255) &&
1065 (8 != ps_cu_analyse->u1_cu_size))
1066 {
1067 /* RDOPT copy States : Prev Cu best to current init */
1068 COPY_CABAC_STATES(
1069 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1070 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1071 IHEVC_CAB_CTXT_END);
1072
1073 /* RDOPT related copies and settings */
1074 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1075
1076 /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1077 ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1078 ps_ctxt,
1079 &s_chrm_cu_buf_prms,
1080 ps_cu_analyse,
1081 rd_opt_curr_idx,
1082 TU_EQ_CU_DIV2,
1083 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1084 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1085 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1086 100.0,
1087 ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1088
1089 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1090 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1091 {
1092 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1093 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1094 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1095 }
1096 #endif
1097 }
1098 }
1099
1100 while(0 == end_flag)
1101 {
1102 UWORD8 *pu1_mode = NULL;
1103 WORD32 curr_func_mode = 0;
1104 void *pv_pred;
1105
1106 ASSERT(ctr < 36);
1107
1108 /* TU equal to CU size evaluation of different modes */
1109 if(0 == cu_eval_done)
1110 {
1111 /* check if the all the modes have been evaluated */
1112 if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1113 {
1114 cu_eval_done = 1;
1115 ctr = 0;
1116 }
1117 else if(
1118 (1 == ctr) &&
1119 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1120 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1121 (ps_ctxt->i1_slice_type != ISLICE))
1122 {
1123 ctr = 0;
1124 cu_eval_done = 1;
1125 subcu_eval_done = 1;
1126 subpu_eval_done = 1;
1127 }
1128 else
1129 {
1130 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1131 {
1132 ctr++;
1133 continue;
1134 }
1135
1136 pu1_mode =
1137 &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1138 ctr++;
1139 curr_func_mode = TU_EQ_CU;
1140 }
1141 }
1142 /* Sub CU (NXN) mode evaluation of different pred modes */
1143 if((0 == subpu_eval_done) && (1 == cu_eval_done))
1144 {
1145 /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1146 /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1147 {
1148 pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1149
1150 curr_func_mode = TU_EQ_SUBCU;
1151 /* check if the any modes have to be evaluated */
1152 if(255 == *pu1_mode)
1153 {
1154 subpu_eval_done = 1;
1155 ctr = 0;
1156 }
1157 else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1158 {
1159 subpu_eval_done = 1;
1160 ctr = 0;
1161 }
1162 else
1163 {
1164 ctr++;
1165 }
1166 }
1167 }
1168
1169 /* TU size equal to CU div2 mode evaluation of different pred modes */
1170 if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1171 {
1172 /* check if the all the modes have been evaluated */
1173 if(255 ==
1174 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1175 {
1176 subcu_eval_done = 1;
1177 }
1178 else if(
1179 (1 == ctr) &&
1180 ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1181 (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1182 (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1183 {
1184 subcu_eval_done = 1;
1185 }
1186 else
1187 {
1188 if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1189 {
1190 ctr++;
1191 continue;
1192 }
1193
1194 pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1195 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1196
1197 ctr++;
1198 curr_func_mode = TU_EQ_CU_DIV2;
1199 }
1200 }
1201
1202 /* check if all CU option have been evalueted */
1203 if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1204 {
1205 break;
1206 }
1207
1208 /* RDOPT related copies and settings */
1209 ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1210
1211 /* Assign ME/Intra pred buf. to the current intra cand. since we
1212 are storing pred data for final_reon function */
1213 {
1214 pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1215 }
1216
1217 /* RDOPT copy States : Prev Cu best to current init */
1218 COPY_CABAC_STATES(
1219 &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1220 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1221 IHEVC_CAB_CTXT_END);
1222
1223 /* call the function which performs the normative Intra encode */
1224 rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1225 ps_ctxt,
1226 ps_cu_prms,
1227 pv_pred,
1228 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1229 &s_chrm_cu_buf_prms,
1230 pu1_mode,
1231 ps_cu_analyse,
1232 pv_curr_src,
1233 pv_cu_left,
1234 pv_cu_top,
1235 pv_cu_top_left,
1236 ps_left_nbr_4x4,
1237 ps_top_nbr_4x4,
1238 nbr_4x4_left_strd,
1239 cu_left_stride,
1240 rd_opt_curr_idx,
1241 curr_func_mode,
1242 !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1243 : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1244 (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1245 100.0);
1246
1247 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1248 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1249 {
1250 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1251 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1252 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1253 }
1254 #endif
1255
1256 /* based on the rd opt cost choose the best and current index */
1257 if(rd_opt_cost < rd_opt_least_cost)
1258 {
1259 /* swap the best and current indx */
1260 rd_opt_best_idx = !rd_opt_best_idx;
1261 rd_opt_curr_idx = !rd_opt_curr_idx;
1262 i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1263
1264 rd_opt_least_cost = rd_opt_cost;
1265 ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1266 }
1267
1268 if((TU_EQ_SUBCU == curr_func_mode) &&
1269 (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1270 (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1271 {
1272 UWORD8 au1_tu_eq_cu_div2_modes[4];
1273 UWORD8 au1_freq_of_mode[4];
1274
1275 if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1276 {
1277 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1278 255; //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1279 ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1280 255;
1281 }
1282 else
1283 {
1284 WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1285 ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1286 au1_tu_eq_cu_div2_modes,
1287 au1_freq_of_mode,
1288 4);
1289
1290 if(2 == i4_num_clusters)
1291 {
1292 if(au1_freq_of_mode[0] == 3)
1293 {
1294 ps_cu_analyse->s_cu_intra_cand
1295 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1296 au1_tu_eq_cu_div2_modes[0];
1297 ps_cu_analyse->s_cu_intra_cand
1298 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1299 }
1300 else if(au1_freq_of_mode[1] == 3)
1301 {
1302 ps_cu_analyse->s_cu_intra_cand
1303 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1304 au1_tu_eq_cu_div2_modes[1];
1305 ps_cu_analyse->s_cu_intra_cand
1306 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1307 }
1308 else
1309 {
1310 ps_cu_analyse->s_cu_intra_cand
1311 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1312 au1_tu_eq_cu_div2_modes[0];
1313 ps_cu_analyse->s_cu_intra_cand
1314 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1315 au1_tu_eq_cu_div2_modes[1];
1316 ps_cu_analyse->s_cu_intra_cand
1317 .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1318 }
1319 }
1320 }
1321 }
1322
1323 /* set the neighbour map to 0 */
1324 ihevce_set_nbr_map(
1325 ps_ctxt->pu1_ctb_nbr_map,
1326 ps_ctxt->i4_nbr_map_strd,
1327 (ps_cu_analyse->b3_cu_pos_x << 1),
1328 (ps_cu_analyse->b3_cu_pos_y << 1),
1329 (ps_cu_analyse->u1_cu_size >> 2),
1330 0);
1331 }
1332
1333 } /* end of Intra RD OPT cand evaluation */
1334
1335 ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1336 ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1337 ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1338
1339 /* --------------------------------------- */
1340 /* --------Final mode Recon ---------- */
1341 /* --------------------------------------- */
1342 {
1343 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1344 void *pv_final_pred = NULL;
1345 WORD32 final_pred_strd = 0;
1346 void *pv_final_pred_chrm = NULL;
1347 WORD32 final_pred_strd_chrm = 0;
1348 WORD32 packed_pred_mode;
1349
1350 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1351 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1352 {
1353 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1354 }
1355 #else
1356 pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1357 #endif
1358
1359 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1360 packed_pred_mode =
1361 ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1362
1363 if(!ps_ctxt->u1_is_input_data_hbd)
1364 {
1365 if(ps_enc_loop_bestprms->u1_intra_flag)
1366 {
1367 pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1368 final_pred_strd =
1369 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1370 }
1371 else
1372 {
1373 pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1374 final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1375 }
1376
1377 pv_final_pred_chrm =
1378 ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1379 rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1380 (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1381 final_pred_strd_chrm =
1382 ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1383 }
1384
1385 ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1386
1387 {
1388 final_mode_process_prms_t s_prms;
1389
1390 void *pv_cu_luma_recon;
1391 void *pv_cu_chroma_recon;
1392 WORD32 luma_stride, chroma_stride;
1393
1394 if(!ps_ctxt->u1_is_input_data_hbd)
1395 {
1396 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1397 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1398 {
1399 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1400 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1401 luma_stride = ps_cu_analyse->u1_cu_size;
1402 chroma_stride = ps_cu_analyse->u1_cu_size;
1403 }
1404 else
1405 {
1406 /* based on CU position derive the luma pointers */
1407 pv_cu_luma_recon = pu1_final_recon;
1408
1409 /* based on CU position derive the chroma pointers */
1410 pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1411
1412 luma_stride = ps_cu_prms->i4_luma_recon_stride;
1413
1414 chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1415 }
1416 #else
1417 pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1418 pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1419 luma_stride = ps_cu_analyse->u1_cu_size;
1420 chroma_stride = ps_cu_analyse->u1_cu_size;
1421 #endif
1422
1423 s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1424 s_prms.ps_best_inter_cand = ps_best_inter_cand;
1425 s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1426 s_prms.packed_pred_mode = packed_pred_mode;
1427 s_prms.rd_opt_best_idx = rd_opt_best_idx;
1428 s_prms.pv_src = pu1_curr_src;
1429 s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1430 s_prms.pv_pred = pv_final_pred;
1431 s_prms.pred_strd = final_pred_strd;
1432 s_prms.pv_pred_chrm = pv_final_pred_chrm;
1433 s_prms.pred_chrm_strd = final_pred_strd_chrm;
1434 s_prms.pu1_final_ecd_data = pu1_ecd_data;
1435 s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1436 s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1437 s_prms.pv_luma_recon = pv_cu_luma_recon;
1438 s_prms.recon_luma_strd = luma_stride;
1439 s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1440 s_prms.recon_chrma_strd = chroma_stride;
1441 s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1442 s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1443 s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1444 s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1445 s_prms.u1_will_cabac_state_change = 1;
1446 s_prms.u1_recompute_sbh_and_rdoq = 0;
1447 s_prms.u1_is_first_pass = 1;
1448 }
1449
1450 #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1451 s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1452 ? ps_cu_prms->u1_is_cu_noisy
1453 : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1454 #endif
1455
1456 ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1457
1458 #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1459 if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1460 {
1461 ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1462 ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1463 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1464 }
1465 #endif
1466 }
1467 }
1468
1469 /* --------------------------------------- */
1470 /* --------Populate CU out prms ---------- */
1471 /* --------------------------------------- */
1472 {
1473 enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1474 UWORD8 *pu1_pu_map;
1475 ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1476
1477 /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1478 /* then it has to be coded as skip CU */
1479 if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1480 (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1481 (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1482 {
1483 ps_enc_loop_bestprms->u1_skip_flag = 1;
1484 }
1485
1486 /* update number PUs in CU */
1487 ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1488
1489 /* ---- populate the colocated pu map index --- */
1490 for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1491 {
1492 WORD32 i;
1493 WORD32 vert_ht;
1494 WORD32 horz_wd;
1495
1496 if(ps_enc_loop_bestprms->u1_intra_flag)
1497 {
1498 ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1499 vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1500 horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1501 }
1502 else
1503 {
1504 vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1505 horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1506 }
1507
1508 pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1509 pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1510
1511 for(i = 0; i < vert_ht; i++)
1512 {
1513 memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1514 pu1_pu_map += num_4x4_in_ctb;
1515 }
1516 /* increment the index */
1517 col_start_pu_idx++;
1518 }
1519 /* ---- copy the colocated PUs to frm pu ----- */
1520 memcpy(
1521 ps_col_pu,
1522 &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1523 ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1524
1525 /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1526 {
1527 entropy_context_t *ps_entropy_ctxt;
1528
1529 WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1530
1531 WORD32 log2_min_cu_qp_delta_size;
1532 UWORD32 block_addr_align;
1533 ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1534
1535 log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1536 diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1537
1538 log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1539 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1540
1541 ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1542 ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1543 /*Update the Qp value used. It will not have a valid value iff
1544 current CU is (skipped/no_cbf). In that case the Qp needed for
1545 deblocking is calculated from top/left/previous coded CU*/
1546
1547 ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1548
1549 if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1550 ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1551 {
1552 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1553 }
1554 else
1555 {
1556 ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1557 }
1558 }
1559
1560 /* -- at the end of CU set the neighbour map to 1 -- */
1561 ihevce_set_nbr_map(
1562 ps_ctxt->pu1_ctb_nbr_map,
1563 ps_ctxt->i4_nbr_map_strd,
1564 (ps_cu_analyse->b3_cu_pos_x << 1),
1565 (ps_cu_analyse->b3_cu_pos_y << 1),
1566 (ps_cu_analyse->u1_cu_size >> 2),
1567 1);
1568
1569 /* -- at the end of CU update best cabac rdopt states -- */
1570 /* -- and also set the top row skip flags ------------- */
1571 ihevce_entropy_update_best_cu_states(
1572 &ps_ctxt->s_rdopt_entropy_ctxt,
1573 ps_cu_analyse->b3_cu_pos_x,
1574 ps_cu_analyse->b3_cu_pos_y,
1575 ps_cu_analyse->u1_cu_size,
1576 0,
1577 rd_opt_best_idx);
1578 }
1579
1580 /* Store Output struct */
1581 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1582 {
1583 {
1584 memcpy(
1585 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1586 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1587 sizeof(enc_loop_cu_final_prms_t));
1588 }
1589
1590 memcpy(
1591 &ps_ctxt->as_cu_recur_nbr[0],
1592 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1593 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1594 (ps_cu_analyse->u1_cu_size >> 2));
1595
1596 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1597
1598 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1599 }
1600 #else
1601 if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1602 {
1603 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1604
1605 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1606
1607 if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1608 {
1609 /* Wait till top data is ready */
1610 /* Currently checking till top right CU */
1611 curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1612
1613 if(i4_ctb_y_off == 0)
1614 {
1615 /* No wait for 1st row */
1616 cu_top_right_offset = -(MAX_CTB_SIZE);
1617 {
1618 ihevce_tile_params_t *ps_col_tile_params =
1619 ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1620 ps_ctxt->i4_tile_col_idx);
1621
1622 /* No wait for 1st row */
1623 cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1624 }
1625 cu_top_right_dep_pos = 0;
1626 }
1627 else
1628 {
1629 cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1630 cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1631 }
1632
1633 if(0 == ps_cu_analyse->b3_cu_pos_y)
1634 {
1635 ihevce_dmgr_chk_row_row_sync(
1636 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1637 curr_cu_pos_in_row,
1638 cu_top_right_offset,
1639 cu_top_right_dep_pos,
1640 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1641 ps_ctxt->thrd_id);
1642 }
1643 }
1644 }
1645 else
1646 {
1647 {
1648 memcpy(
1649 &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1650 &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1651 sizeof(enc_loop_cu_final_prms_t));
1652 }
1653
1654 memcpy(
1655 &ps_ctxt->as_cu_recur_nbr[0],
1656 &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1657 sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1658 (ps_cu_analyse->u1_cu_size >> 2));
1659
1660 ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1661
1662 ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1663 }
1664 #endif
1665
1666 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1667 ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1668
1669 return rd_opt_least_cost;
1670 }
1671
1672 /*!
1673 ******************************************************************************
1674 * \if Function name : ihevce_enc_loop_process_row \endif
1675 *
1676 * \brief
1677 * Row level enc_loop pass function
1678 *
1679 * \param[in] pv_ctxt : pointer to enc_loop module
1680 * \param[in] ps_curr_src_bufs : pointer to input yuv buffer (row buffer)
1681 * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1682 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1683 * \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1684 * \param[out] ps_cu_out : pointer CU output structure (row buffer)
1685 * \param[out] ps_tu_out : pointer TU output structure (row buffer)
1686 * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1687 * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1688 *
1689 * \return
1690 * None
1691 *
1692 * Note : Currently the frame level calcualtions done assumes that
1693 * framewidth of the input /recon are excat multiple of ctbsize
1694 *
1695 * \author
1696 * Ittiam
1697 *
1698 *****************************************************************************
1699 */
ihevce_enc_loop_process_row(ihevce_enc_loop_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src_bufs,iv_enc_yuv_buf_t * ps_curr_recon_bufs,iv_enc_yuv_buf_src_t * ps_curr_recon_bufs_src,UWORD8 ** ppu1_y_subpel_planes,ctb_analyse_t * ps_ctb_in,ctb_enc_loop_out_t * ps_ctb_out,ipe_l0_ctb_analyse_for_me_t * ps_row_ipe_analyse,cur_ctb_cu_tree_t * ps_row_cu_tree,cu_enc_loop_out_t * ps_row_cu,tu_enc_loop_out_t * ps_row_tu,pu_t * ps_row_pu,pu_col_mv_t * ps_row_col_pu,UWORD16 * pu2_num_pu_map,UWORD8 * pu1_row_pu_map,UWORD8 * pu1_row_ecd_data,UWORD32 * pu4_pu_offsets,frm_ctb_ctxt_t * ps_frm_ctb_prms,WORD32 vert_ctr,recon_pic_buf_t * ps_frm_recon,void * pv_dep_mngr_encloop_dep_me,pad_interp_recon_frm_t * ps_pad_interp_recon,WORD32 i4_pass,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,ihevce_tile_params_t * ps_tile_params)1700 void ihevce_enc_loop_process_row(
1701 ihevce_enc_loop_ctxt_t *ps_ctxt,
1702 iv_enc_yuv_buf_t *ps_curr_src_bufs,
1703 iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1704 iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1705 UWORD8 **ppu1_y_subpel_planes,
1706 ctb_analyse_t *ps_ctb_in,
1707 ctb_enc_loop_out_t *ps_ctb_out,
1708 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1709 cur_ctb_cu_tree_t *ps_row_cu_tree,
1710 cu_enc_loop_out_t *ps_row_cu,
1711 tu_enc_loop_out_t *ps_row_tu,
1712 pu_t *ps_row_pu,
1713 pu_col_mv_t *ps_row_col_pu,
1714 UWORD16 *pu2_num_pu_map,
1715 UWORD8 *pu1_row_pu_map,
1716 UWORD8 *pu1_row_ecd_data,
1717 UWORD32 *pu4_pu_offsets,
1718 frm_ctb_ctxt_t *ps_frm_ctb_prms,
1719 WORD32 vert_ctr,
1720 recon_pic_buf_t *ps_frm_recon,
1721 void *pv_dep_mngr_encloop_dep_me,
1722 pad_interp_recon_frm_t *ps_pad_interp_recon,
1723 WORD32 i4_pass,
1724 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1725 ihevce_tile_params_t *ps_tile_params)
1726 {
1727 enc_loop_cu_prms_t s_cu_prms;
1728 ctb_enc_loop_out_t *ps_ctb_out_dblk;
1729
1730 WORD32 ctb_ctr, ctb_start, ctb_end;
1731 WORD32 col_pu_map_idx;
1732 WORD32 num_ctbs_horz_pic;
1733 WORD32 ctb_size;
1734 WORD32 last_ctb_row_flag;
1735 WORD32 last_ctb_col_flag;
1736 WORD32 last_hz_ctb_wd;
1737 WORD32 last_vt_ctb_ht;
1738 void *pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1739 void *pv_dep_mngr_enc_loop_sao = ps_ctxt->pv_dep_mngr_enc_loop_sao;
1740 void *pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1741 WORD32 dblk_offset, dblk_check_dep_pos;
1742 WORD32 sao_offset, sao_check_dep_pos;
1743 WORD32 aux_offset, aux_check_dep_pos;
1744 void *pv_dep_mngr_me_dep_encloop;
1745 ctb_enc_loop_out_t *ps_ctb_out_sao;
1746 /*Structure to store deblocking parameters at CTB-row level*/
1747 deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1748 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1749
1750 pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1751 num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1752 ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1753
1754 /* Store the num_ctb_horz in sao context*/
1755 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1756 ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1757
1758 /* Set Variables for Dep. Checking and Setting */
1759 aux_check_dep_pos = vert_ctr;
1760 aux_offset = 2; /* Should be there for 0th row also */
1761 if(vert_ctr > 0)
1762 {
1763 dblk_check_dep_pos = vert_ctr - 1;
1764 dblk_offset = 2;
1765 }
1766 else
1767 {
1768 /* First row should run without waiting */
1769 dblk_check_dep_pos = 0;
1770 dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1771 }
1772
1773 /* Set sao_offset and sao_check_dep_pos */
1774 if(vert_ctr > 1)
1775 {
1776 sao_check_dep_pos = vert_ctr - 2;
1777 sao_offset = 2;
1778 }
1779 else
1780 {
1781 /* First row should run without waiting */
1782 sao_check_dep_pos = 0;
1783 sao_offset = -(ps_tile_params->i4_first_sample_x + 1);
1784 }
1785
1786 /* check if the current row processed in last CTb row */
1787 last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1788
1789 /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1790 last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1791
1792 /* Valid Height (pixels) in the last CTB row (padding cases) */
1793 last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1794 ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1795 /* reset the states copied flag */
1796 ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1797 ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1798
1799 /* populate the cu prms which are common for entire ctb row */
1800 s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1801 s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1802 s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1803 s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1804 s_cu_prms.i4_ctb_size = ctb_size;
1805
1806 ps_ctxt->i4_is_first_cu_qg_coded = 0;
1807
1808 /* Initialize the number of PUs for the first CTB to 0 */
1809 *pu2_num_pu_map = 0;
1810
1811 /*Getting the address of BS and Qp arrays and other info*/
1812 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1813 {
1814 WORD32 num_ctbs_horz_tile;
1815 /* Update the pointers which are accessed not by using ctb_ctr
1816 to the tile start here! */
1817 ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1818 ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1819
1820 ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1821 ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1822 ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1823 pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1824 pu1_row_ecd_data +=
1825 (ps_tile_params->i4_first_ctb_x *
1826 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1827 : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1828 MAX_SCAN_COEFFS_BYTES_4x4);
1829
1830 /* Update the pointers to the tile start */
1831 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1832 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
1833 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1834 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
1835 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1836
1837 num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1838
1839 ctb_start = ps_tile_params->i4_first_ctb_x;
1840 ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1841 }
1842 ps_ctb_out_dblk = ps_ctb_out;
1843
1844 ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1845
1846 /* --------- Loop over all the CTBs in a row --------------- */
1847 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1848 {
1849 cu_final_update_prms s_cu_update_prms;
1850
1851 cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1852 me_ctb_data_t *ps_cu_me_data;
1853 ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1854 cu_enc_loop_out_t *ps_cu_final;
1855 pu_col_mv_t *ps_ctb_col_pu;
1856
1857 WORD32 cur_ctb_ht, cur_ctb_wd;
1858 WORD32 last_cu_pos_in_ctb;
1859 WORD32 last_cu_size;
1860 WORD32 num_pus_in_ctb;
1861 UWORD8 u1_is_ctb_noisy;
1862 ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1863
1864 if(ctb_ctr)
1865 {
1866 ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1867 }
1868 /*If Sup pic rc is enabled*/
1869 if(ps_ctxt->i4_sub_pic_level_rc)
1870 {
1871 ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1872 }
1873 /* check if the current row processed in last CTb row */
1874 last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1875 if(1 == last_ctb_col_flag)
1876 {
1877 cur_ctb_wd = last_hz_ctb_wd;
1878 }
1879 else
1880 {
1881 cur_ctb_wd = ctb_size;
1882 }
1883
1884 /* If it's the last CTB, get the actual ht of CTB */
1885 if(1 == last_ctb_row_flag)
1886 {
1887 cur_ctb_ht = last_vt_ctb_ht;
1888 }
1889 else
1890 {
1891 cur_ctb_ht = ctb_size;
1892 }
1893
1894 ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1895 ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1896
1897 /* Wait till reference frame recon is available */
1898
1899 /* ------------ Wait till current data is ready from ME -------------- */
1900
1901 /*only for ref instance and Non I pics */
1902 if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1903 ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1904 {
1905 if(ctb_ctr < (num_ctbs_horz_pic))
1906 {
1907 ihevce_dmgr_chk_row_row_sync(
1908 pv_dep_mngr_encloop_dep_me,
1909 ctb_ctr,
1910 1,
1911 vert_ctr,
1912 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1913 ps_ctxt->thrd_id);
1914 }
1915 }
1916
1917 /* store the cu pointer for current ctb out */
1918 ps_ctb_out->ps_enc_cu = ps_row_cu;
1919 ps_cu_final = ps_row_cu;
1920
1921 /* Get the base point of CU recursion tree */
1922 if(ISLICE != ps_ctxt->i1_slice_type)
1923 {
1924 ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1925 ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1926 }
1927 else
1928 {
1929 /* Initialize ptr to current CTB */
1930 ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1931 }
1932
1933 /* Get the ME data pointer for 16x16 block data in ctb */
1934 ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1935 u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1936 s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1937 s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1938
1939 /* store the ctb level prms in cu prms */
1940 s_cu_prms.i4_ctb_pos = ctb_ctr;
1941
1942 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1943 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1944
1945 {
1946 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1947 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1948 }
1949
1950 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1951
1952 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1953
1954 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1955
1956 /* Initialize ptr to current CTB */
1957 ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr; // * ctb_size;
1958
1959 /* reset the map idx for current ctb */
1960 col_pu_map_idx = 0;
1961 num_pus_in_ctb = 0;
1962
1963 /* reset the map buffer to 0*/
1964
1965 memset(
1966 &ps_ctxt->au1_nbr_ctb_map[0][0],
1967 0,
1968 (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1969
1970 /* set the CTB neighbour availability flags */
1971 ihevce_set_ctb_nbr(
1972 &ps_ctb_out->s_ctb_nbr_avail_flags,
1973 ps_ctxt->pu1_ctb_nbr_map,
1974 ps_ctxt->i4_nbr_map_strd,
1975 ctb_ctr,
1976 vert_ctr,
1977 ps_frm_ctb_prms);
1978
1979 /* -------- update the cur CTB offsets for inter prediction-------- */
1980 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1981 ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1982
1983 /* -------- update the cur CTB offsets for MV prediction-------- */
1984 ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1985 ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1986
1987 /* -------------- Boundary Strength Initialization ----------- */
1988 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1989 {
1990 ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1991 }
1992
1993 /* -------- update cur CTB offsets for entropy rdopt context------- */
1994 ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1995
1996 /* --------- CU Recursion --------------- */
1997
1998 {
1999 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2000 WORD32 i4_max_tree_depth = 4;
2001 #endif
2002 WORD32 i4_tree_depth = 0;
2003 /* Init no. of CU in CTB to 0*/
2004 ps_ctb_out->u1_num_cus_in_ctb = 0;
2005
2006 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2007 if(ps_ctxt->i4_bitrate_instance_num == 0)
2008 {
2009 WORD32 i4_max_tree_depth = 4;
2010 WORD32 i;
2011 for(i = 0; i < i4_max_tree_depth; i++)
2012 {
2013 COPY_CABAC_STATES(
2014 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2015 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2016 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2017 }
2018 }
2019 #else
2020 if(ps_ctxt->i4_bitrate_instance_num == 0)
2021 {
2022 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2023 {
2024 WORD32 i4_max_tree_depth = 4;
2025 WORD32 i;
2026 for(i = 0; i < i4_max_tree_depth; i++)
2027 {
2028 COPY_CABAC_STATES(
2029 &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2030 &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2031 IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2032 }
2033 }
2034 }
2035
2036 #endif
2037 if(ps_ctxt->i4_bitrate_instance_num == 0)
2038 {
2039 /* FOR I- PIC populate the curr_ctb accordingly */
2040 if(ISLICE == ps_ctxt->i1_slice_type)
2041 {
2042 ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2043 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2044
2045 ihevce_populate_cu_tree(
2046 ps_ctb_ipe_analyse,
2047 ps_cu_tree_analyse,
2048 0,
2049 (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2050 POS_NA,
2051 POS_NA,
2052 POS_NA);
2053 }
2054 }
2055 ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2056 ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2057 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2058
2059 s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2060 s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2061 s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2062 s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2063 s_cu_update_prms.pps_cu_final = &ps_cu_final;
2064 s_cu_update_prms.pps_row_pu = &ps_row_pu;
2065 s_cu_update_prms.pps_row_tu = &ps_row_tu;
2066 s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2067
2068 // source satd computation
2069 /* compute the source 8x8 SATD for the current CTB */
2070 /* populate pui4_source_satd in some structure and pass it inside */
2071 if(ps_ctxt->u1_enable_psyRDOPT)
2072 {
2073 /* declare local variables */
2074 WORD32 i;
2075 WORD32 ctb_size;
2076 WORD32 num_comp_had_blocks;
2077 UWORD8 *pu1_l0_block;
2078 WORD32 block_ht;
2079 WORD32 block_wd;
2080 WORD32 ht_offset;
2081 WORD32 wd_offset;
2082
2083 WORD32 num_horz_blocks;
2084 WORD32 had_block_size;
2085 WORD32 total_had_block_size;
2086 WORD16 pi2_residue_had_zscan[64];
2087 UWORD8 ai1_zeros_buffer[64];
2088
2089 WORD32 index_satd;
2090 WORD32 is_hbd;
2091 /* initialize the variables */
2092 block_ht = cur_ctb_ht;
2093 block_wd = cur_ctb_wd;
2094
2095 is_hbd = ps_ctxt->u1_is_input_data_hbd;
2096
2097 had_block_size = 8;
2098 total_had_block_size = had_block_size * had_block_size;
2099
2100 for(i = 0; i < total_had_block_size; i++)
2101 {
2102 ai1_zeros_buffer[i] = 0;
2103 }
2104
2105 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
2106 num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2107
2108 num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
2109 ht_offset = -had_block_size;
2110 wd_offset = -had_block_size;
2111
2112 index_satd = 0;
2113 /*Loop over all 8x8 blocsk in the CTB*/
2114 for(i = 0; i < num_comp_had_blocks; i++)
2115 {
2116 if(i % num_horz_blocks == 0)
2117 {
2118 wd_offset = -had_block_size;
2119 ht_offset += had_block_size;
2120 }
2121 wd_offset += had_block_size;
2122
2123 if(!is_hbd)
2124 {
2125 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2126 pu1_l0_block = s_cu_prms.pu1_luma_src +
2127 ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2128
2129 ps_ctxt->ai4_source_satd_8x8[index_satd] =
2130
2131 ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2132 pu1_l0_block,
2133 ps_curr_src_bufs->i4_y_strd,
2134 ai1_zeros_buffer,
2135 had_block_size,
2136 pi2_residue_had_zscan,
2137 had_block_size);
2138 }
2139 index_satd++;
2140 }
2141 }
2142
2143 if(ps_ctxt->u1_enable_psyRDOPT)
2144 {
2145 /* declare local variables */
2146 WORD32 i;
2147 WORD32 ctb_size;
2148 WORD32 num_comp_had_blocks;
2149 UWORD8 *pu1_l0_block;
2150 UWORD8 *pu1_l0_block_prev = NULL;
2151 WORD32 block_ht;
2152 WORD32 block_wd;
2153 WORD32 ht_offset;
2154 WORD32 wd_offset;
2155
2156 WORD32 num_horz_blocks;
2157 WORD32 had_block_size;
2158 WORD16 pi2_residue_had[64];
2159 UWORD8 ai1_zeros_buffer[64];
2160 WORD32 index_satd = 0;
2161
2162 WORD32 is_hbd;
2163 is_hbd = ps_ctxt->u1_is_input_data_hbd; // 8 bit
2164
2165 /* initialize the variables */
2166 /* change this based ont he bit depth */
2167 // ps_ctxt->u1_chroma_array_type
2168 if(ps_ctxt->u1_chroma_array_type == 1)
2169 {
2170 block_ht = cur_ctb_ht / 2;
2171 block_wd = cur_ctb_wd / 2;
2172 }
2173 else
2174 {
2175 block_ht = cur_ctb_ht;
2176 block_wd = cur_ctb_wd / 2;
2177 }
2178
2179 had_block_size = 4;
2180 memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2181
2182 ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
2183 num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2184
2185 num_horz_blocks = 2 * block_wd / had_block_size; //ctb_width / had_block_size;
2186 ht_offset = -had_block_size;
2187 wd_offset = -had_block_size;
2188
2189 if(!is_hbd)
2190 {
2191 /* loop over for every 4x4 blocks in the CU for Cb */
2192 for(i = 0; i < num_comp_had_blocks; i++)
2193 {
2194 if(i % num_horz_blocks == 0)
2195 {
2196 wd_offset = -had_block_size;
2197 ht_offset += had_block_size;
2198 }
2199 wd_offset += had_block_size;
2200
2201 /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2202 if(i % 2 != 0)
2203 {
2204 if(!is_hbd)
2205 {
2206 pu1_l0_block = pu1_l0_block_prev + 1;
2207 }
2208 }
2209 else
2210 {
2211 if(!is_hbd)
2212 {
2213 pu1_l0_block = s_cu_prms.pu1_chrm_src +
2214 s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2215 pu1_l0_block_prev = pu1_l0_block;
2216 }
2217 }
2218
2219 if(had_block_size == 4)
2220 {
2221 if(!is_hbd)
2222 {
2223 ps_ctxt->ai4_source_chroma_satd[index_satd] =
2224 ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2225 pu1_l0_block,
2226 s_cu_prms.i4_chrm_src_stride,
2227 ai1_zeros_buffer,
2228 had_block_size,
2229 pi2_residue_had,
2230 had_block_size);
2231 }
2232
2233 index_satd++;
2234
2235 } // block size of 4x4
2236
2237 } // for all blocks
2238
2239 } // is hbd check
2240 }
2241
2242 ihevce_cu_recurse_decide(
2243 ps_ctxt,
2244 &s_cu_prms,
2245 ps_cu_tree_analyse,
2246 ps_cu_tree_analyse,
2247 ps_ctb_ipe_analyse,
2248 ps_cu_me_data,
2249 &ps_ctb_col_pu,
2250 &s_cu_update_prms,
2251 pu1_row_pu_map,
2252 &col_pu_map_idx,
2253 i4_tree_depth,
2254 ctb_ctr << 6,
2255 vert_ctr << 6,
2256 cur_ctb_ht);
2257
2258 if(ps_ctxt->i1_slice_type != ISLICE)
2259 {
2260 ASSERT(
2261 (cur_ctb_wd * cur_ctb_ht) <=
2262 ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2263 }
2264 /*If Sup pic rc is enabled*/
2265 if(1 == ps_ctxt->i4_sub_pic_level_rc)
2266 {
2267 /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2268 ihevce_sub_pic_rc_in_data(
2269 (void *)ps_multi_thrd_ctxt,
2270 (void *)ps_ctxt,
2271 (void *)ps_ctb_ipe_analyse,
2272 (void *)ps_frm_ctb_prms);
2273 }
2274
2275 ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2276
2277 } /* End of CU recursion block */
2278
2279 #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2280 {
2281 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2282 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2283 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2284
2285 do
2286 {
2287 ihevce_update_final_cu_results(
2288 ps_ctxt,
2289 ps_enc_out_ctxt,
2290 ps_cu_prms,
2291 NULL, /* &ps_ctb_col_pu */
2292 NULL, /* &col_pu_map_idx */
2293 &s_cu_update_prms,
2294 ctb_ctr,
2295 vert_ctr);
2296
2297 ps_enc_out_ctxt++;
2298
2299 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2300
2301 } while(ps_enc_out_ctxt->u1_cu_size != 128);
2302 }
2303 #else
2304 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2305 {
2306 ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2307 enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2308 ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2309
2310 do
2311 {
2312 ihevce_update_final_cu_results(
2313 ps_ctxt,
2314 ps_enc_out_ctxt,
2315 ps_cu_prms,
2316 NULL, /* &ps_ctb_col_pu */
2317 NULL, /* &col_pu_map_idx */
2318 &s_cu_update_prms,
2319 ctb_ctr,
2320 vert_ctr);
2321
2322 ps_enc_out_ctxt++;
2323
2324 ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2325
2326 } while(ps_enc_out_ctxt->u1_cu_size != 128);
2327 }
2328 #endif
2329
2330 /* --- ctb level copy of data to left buffers--*/
2331 ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2332
2333 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2334 {
2335 /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2336 ihevce_bs_clear_invalid(
2337 &ps_ctxt->s_deblk_bs_prms,
2338 last_ctb_row_flag,
2339 (ctb_ctr == (num_ctbs_horz_pic - 1)),
2340 last_hz_ctb_wd,
2341 last_vt_ctb_ht);
2342
2343 /* -----------------Read boundary strengts for current CTB------------- */
2344
2345 if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2346 {
2347 /*Storing boundary strengths of current CTB*/
2348 UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2349 UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2350
2351 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2352 memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2353 }
2354 //Increment for storing next CTB info
2355 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2356 (ctb_size >> 3); //one vertical edge per 8x8 block
2357 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2358 (ctb_size >> 3); //one horizontal edge per 8x8 block
2359 }
2360
2361 /* -------------- ctb level updates ----------------- */
2362 ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2363
2364 pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2365
2366 /* first ctb offset will be populated by the caller */
2367 if(0 != ctb_ctr)
2368 {
2369 pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2370 }
2371 pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2372 ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2373
2374 ps_ctb_in++;
2375 ps_ctb_out++;
2376 }
2377
2378 /* ---------- Encloop end of row updates ----------------- */
2379
2380 /* at the end of row processing cu pixel counter is set to */
2381 /* (num ctb * ctbzise) + ctb size */
2382 /* this is to set the dependency for right most cu of last */
2383 /* ctb's top right data dependency */
2384 /* this even takes care of entropy dependency for */
2385 /* incomplete ctb as well */
2386 ihevce_dmgr_set_row_row_sync(
2387 pv_dep_mngr_enc_loop_cu_top_right,
2388 (ctb_ctr * ctb_size + ctb_size),
2389 vert_ctr,
2390 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2391
2392 ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2393
2394 /* Restore structure.
2395 Getting the address of stored-BS and Qp-map and other info */
2396 memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2397 {
2398 /* Update the pointers to the tile start */
2399 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2400 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
2401 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2402 (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
2403 s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2404 }
2405
2406 #if PROFILE_ENC_REG_DATA
2407 s_profile.u8_enc_reg_data[vert_ctr] = 0;
2408 #endif
2409
2410 /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2411 if(!ps_ctxt->u1_is_input_data_hbd)
2412 {
2413 WORD32 last_col_pic, last_col_tile;
2414
2415 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2416 {
2417 /* store the ctb level prms in cu prms */
2418 s_cu_prms.i4_ctb_pos = ctb_ctr;
2419 s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2420 s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2421
2422 s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2423 s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2424 s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2425
2426 s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2427
2428 s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2429
2430 /* If last ctb in the horizontal row */
2431 if(ctb_ctr == (num_ctbs_horz_pic - 1))
2432 {
2433 last_col_pic = 1;
2434 }
2435 else
2436 {
2437 last_col_pic = 0;
2438 }
2439
2440 /* If last ctb in the tile row */
2441 if(ctb_ctr == (ctb_end - 1))
2442 {
2443 last_col_tile = 1;
2444 }
2445 else
2446 {
2447 last_col_tile = 0;
2448 }
2449
2450 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2451 {
2452 /* for last ctb of a row check top instead of top right */
2453 if(((ctb_ctr + 1) == ctb_end) && (vert_ctr > 0))
2454 {
2455 dblk_offset = 1;
2456 }
2457 /* Wait till top neighbour CTB has done it's deblocking*/
2458 ihevce_dmgr_chk_row_row_sync(
2459 pv_dep_mngr_enc_loop_dblk,
2460 ctb_ctr,
2461 dblk_offset,
2462 dblk_check_dep_pos,
2463 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2464 ps_ctxt->thrd_id);
2465
2466 if((0 == ps_ctxt->i4_deblock_type))
2467 {
2468 /* Populate Qp-map */
2469 if(ctb_start == ctb_ctr)
2470 {
2471 ihevce_deblk_populate_qp_map(
2472 ps_ctxt,
2473 &s_deblk_ctb_row_params,
2474 ps_ctb_out_dblk,
2475 vert_ctr,
2476 ps_frm_ctb_prms,
2477 ps_tile_params);
2478 }
2479 ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2480
2481 /* recon pointers and stride */
2482 ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2483 ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2484 ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2485 ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2486
2487 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2488 {
2489 ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2490 (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2491 }
2492 ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2493 //or according to slice boundary. Support yet to be added !!!!
2494
2495 ihevce_deblk_ctb(
2496 &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2497
2498 //Increment for storing next CTB info
2499 s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2500 (ctb_size >> 3); //one vertical edge per 8x8 block
2501 s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2502 (ctb_size >> 3); //one horizontal edge per 8x8 block
2503 s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2504 (ctb_size >> 2); //one qp per 4x4 block.
2505 }
2506 } // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2507
2508 /* update the number of ctbs deblocked for this row */
2509 ihevce_dmgr_set_row_row_sync(
2510 pv_dep_mngr_enc_loop_dblk,
2511 (ctb_ctr + 1),
2512 vert_ctr,
2513 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2514
2515 } //end of loop over CTBs in current CTB-row
2516
2517 /* Apply SAO over the previous CTB-row */
2518 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2519 {
2520 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2521 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2522 {
2523 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2524
2525 if(vert_ctr > ps_tile_params->i4_first_ctb_y)
2526 {
2527 /*For last ctb check top dep only*/
2528 if((vert_ctr > 1) && ((ctb_ctr + 1) == ctb_end))
2529 {
2530 sao_offset = 1;
2531 }
2532
2533 ihevce_dmgr_chk_row_row_sync(
2534 pv_dep_mngr_enc_loop_sao,
2535 ctb_ctr,
2536 sao_offset,
2537 sao_check_dep_pos,
2538 ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2539 ps_ctxt->thrd_id);
2540
2541 /* Call the sao function to do sao for the current ctb*/
2542
2543 /* Register the curr ctb's x pos in sao context*/
2544 ps_sao_ctxt->i4_ctb_x = ctb_ctr;
2545
2546 /* Register the curr ctb's y pos in sao context*/
2547 ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2548
2549 ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2550 (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2551 ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2552 ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2553 ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2554
2555 ps_sao_ctxt->i4_is_last_ctb_row = 0;
2556 ps_sao_ctxt->i4_is_last_ctb_col = 0;
2557
2558 if((ctb_ctr + 1) == ctb_end)
2559 {
2560 ps_sao_ctxt->i4_is_last_ctb_col = 1;
2561 ps_sao_ctxt->i4_sao_blk_wd =
2562 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2563 ps_tile_params->i4_curr_tile_width);
2564 }
2565
2566 /* Calculate the recon buf pointer and stride for teh current ctb */
2567 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2568 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2569 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2570 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2571
2572 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2573
2574 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2575 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2576 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2577 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2578 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2579
2580 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2581 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2582
2583 ps_sao_ctxt->pu1_cur_luma_src_buf =
2584 ps_sao_ctxt->pu1_frm_luma_src_buf +
2585 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2586 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2587
2588 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2589
2590 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2591 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2592 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2593 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2594 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2595
2596 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2597
2598 /* Calculate the pointer to buff to store the (x,y)th sao
2599 * for the top merge of (x,y+1)th ctb
2600 */
2601 ps_sao_ctxt->ps_top_ctb_sao =
2602 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2603 [ps_sao_ctxt->i4_ctb_x +
2604 (ps_sao_ctxt->i4_ctb_y) *
2605 ps_frm_ctb_prms->i4_num_ctbs_horz +
2606 (ps_ctxt->i4_bitrate_instance_num *
2607 ps_sao_ctxt->i4_num_ctb_units)];
2608
2609 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2610 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2611 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2612 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2613 ps_sao_ctxt->i4_ctb_x * ctb_size +
2614 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2615 ps_sao_ctxt->i4_top_chroma_buf_size);
2616
2617 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2618 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2619 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2620 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2621 ps_sao_ctxt->i4_ctb_x * ctb_size +
2622 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2623 ps_sao_ctxt->i4_top_chroma_buf_size);
2624
2625 {
2626 UWORD32 u4_ctb_sao_bits;
2627
2628 ihevce_sao_analyse(
2629 &ps_ctxt->s_sao_ctxt_t,
2630 ps_ctb_out_sao,
2631 &u4_ctb_sao_bits,
2632 ps_tile_params);
2633 ps_ctxt
2634 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2635 [ps_ctxt->i4_bitrate_instance_num]
2636 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2637 ps_ctxt
2638 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2639 [ps_ctxt->i4_bitrate_instance_num]
2640 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2641 }
2642 /** Subpel generation not done for non-ref picture **/
2643 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2644 {
2645 /* Recon Padding */
2646 ihevce_recon_padding(
2647 ps_pad_interp_recon,
2648 ctb_ctr,
2649 vert_ctr - 1,
2650 ps_frm_ctb_prms,
2651 ps_ctxt->ps_func_selector);
2652 }
2653 /* update the number of SAO ctbs for this row */
2654 ihevce_dmgr_set_row_row_sync(
2655 pv_dep_mngr_enc_loop_sao,
2656 ctb_ctr + 1,
2657 vert_ctr - 1,
2658 ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2659 }
2660 }
2661 else //SAO Disabled
2662 {
2663 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2664 {
2665 /* Recon Padding */
2666 ihevce_recon_padding(
2667 ps_pad_interp_recon,
2668 ctb_ctr,
2669 vert_ctr,
2670 ps_frm_ctb_prms,
2671 ps_ctxt->ps_func_selector);
2672 }
2673 }
2674 } // end of SAO for loop
2675
2676 /* Call the sao function again for the last ctb row of frame */
2677 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2678 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2679 {
2680 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2681
2682 if(vert_ctr ==
2683 (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2684 {
2685 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2686 {
2687 /* Register the curr ctb's x pos in sao context*/
2688 ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2689
2690 /* Register the curr ctb's y pos in sao context*/
2691 ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2692
2693 ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2694 vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2695
2696 ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2697
2698 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2699 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2700
2701 if((ctb_ctr + 1) == ctb_end)
2702 {
2703 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2704 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2705 ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2706 ps_tile_params->i4_curr_tile_width);
2707 }
2708
2709 ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2710 ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2711 ps_tile_params->i4_curr_tile_height);
2712
2713 ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2714
2715 /* Calculate the recon buf pointer and stride for teh current ctb */
2716 ps_sao_ctxt->pu1_cur_luma_recon_buf =
2717 ps_sao_ctxt->pu1_frm_luma_recon_buf +
2718 (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2719 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2720
2721 ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2722
2723 ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2724 ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2725 (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2726 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2727 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2728
2729 ps_sao_ctxt->i4_cur_chroma_recon_stride =
2730 ps_sao_ctxt->i4_frm_chroma_recon_stride;
2731
2732 ps_sao_ctxt->pu1_cur_luma_src_buf =
2733 ps_sao_ctxt->pu1_frm_luma_src_buf +
2734 (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2735 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2736
2737 ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2738
2739 ps_sao_ctxt->pu1_cur_chroma_src_buf =
2740 ps_sao_ctxt->pu1_frm_chroma_src_buf +
2741 (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2742 (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2743 (ps_sao_ctxt->i4_ctb_x * ctb_size);
2744
2745 ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2746
2747 /* Calculate the pointer to buff to store the (x,y)th sao
2748 * for the top merge of (x,y+1)th ctb
2749 */
2750 ps_sao_ctxt->ps_top_ctb_sao =
2751 &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2752 [ps_sao_ctxt->i4_ctb_x +
2753 (ps_sao_ctxt->i4_ctb_y) *
2754 ps_frm_ctb_prms->i4_num_ctbs_horz +
2755 (ps_ctxt->i4_bitrate_instance_num *
2756 ps_sao_ctxt->i4_num_ctb_units)];
2757
2758 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2759 ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2760 ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2761 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2762 ps_sao_ctxt->i4_ctb_x * ctb_size +
2763 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2764 ps_sao_ctxt->i4_top_chroma_buf_size);
2765
2766 /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2767 ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2768 ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2769 (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2770 ps_sao_ctxt->i4_ctb_x * ctb_size +
2771 ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2772 ps_sao_ctxt->i4_top_chroma_buf_size);
2773
2774 {
2775 UWORD32 u4_ctb_sao_bits;
2776 ihevce_sao_analyse(
2777 &ps_ctxt->s_sao_ctxt_t,
2778 ps_ctb_out_sao,
2779 &u4_ctb_sao_bits,
2780 ps_tile_params);
2781 ps_ctxt
2782 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2783 [ps_ctxt->i4_bitrate_instance_num]
2784 ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2785 ps_ctxt
2786 ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2787 [ps_ctxt->i4_bitrate_instance_num]
2788 ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2789 }
2790 /** Subpel generation not done for non-ref picture **/
2791 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2792 {
2793 /* Recon Padding */
2794 ihevce_recon_padding(
2795 ps_pad_interp_recon,
2796 ctb_ctr,
2797 vert_ctr,
2798 ps_frm_ctb_prms,
2799 ps_ctxt->ps_func_selector);
2800 }
2801 }
2802 } //end of loop over CTBs in current CTB-row
2803 }
2804
2805 /* Subpel Plane Generation*/
2806 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2807 {
2808 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2809 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2810 {
2811 if(0 != vert_ctr)
2812 {
2813 /** Subpel generation not done for non-ref picture **/
2814 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2815 {
2816 /* Padding and Subpel Plane Generation */
2817 ihevce_pad_interp_recon_ctb(
2818 ps_pad_interp_recon,
2819 ctb_ctr,
2820 vert_ctr - 1,
2821 ps_ctxt->i4_quality_preset,
2822 ps_frm_ctb_prms,
2823 ps_ctxt->ai2_scratch,
2824 ps_ctxt->i4_bitrate_instance_num,
2825 ps_ctxt->ps_func_selector);
2826 }
2827 }
2828 }
2829 else
2830 { // SAO Disabled
2831 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2832 {
2833 /* Padding and Subpel Plane Generation */
2834 ihevce_pad_interp_recon_ctb(
2835 ps_pad_interp_recon,
2836 ctb_ctr,
2837 vert_ctr,
2838 ps_ctxt->i4_quality_preset,
2839 ps_frm_ctb_prms,
2840 ps_ctxt->ai2_scratch,
2841 ps_ctxt->i4_bitrate_instance_num,
2842 ps_ctxt->ps_func_selector);
2843 }
2844 }
2845 }
2846
2847 {
2848 if(!ps_ctxt->i4_bitrate_instance_num)
2849 {
2850 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2851 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2852 {
2853 /* If SAO is on, then signal completion of previous CTB row */
2854 if(0 != vert_ctr)
2855 {
2856 {
2857 WORD32 post_ctb_ctr;
2858
2859 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2860 {
2861 ihevce_dmgr_map_set_sync(
2862 pv_dep_mngr_me_dep_encloop,
2863 post_ctb_ctr,
2864 (vert_ctr - 1),
2865 MAP_CTB_COMPLETE);
2866 }
2867 }
2868 }
2869 }
2870 else
2871 {
2872 {
2873 WORD32 post_ctb_ctr;
2874
2875 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2876 {
2877 ihevce_dmgr_map_set_sync(
2878 pv_dep_mngr_me_dep_encloop,
2879 post_ctb_ctr,
2880 vert_ctr,
2881 MAP_CTB_COMPLETE);
2882 }
2883 }
2884 }
2885 }
2886 }
2887
2888 /*process last ctb row*/
2889 if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2890 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2891 {
2892 sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2893
2894 if(vert_ctr ==
2895 (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2896 {
2897 for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2898 {
2899 if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2900 {
2901 /* Padding and Subpel Plane Generation */
2902 ihevce_pad_interp_recon_ctb(
2903 ps_pad_interp_recon,
2904 ctb_ctr,
2905 vert_ctr,
2906 ps_ctxt->i4_quality_preset,
2907 ps_frm_ctb_prms,
2908 ps_ctxt->ai2_scratch,
2909 ps_ctxt->i4_bitrate_instance_num,
2910 ps_ctxt->ps_func_selector);
2911 }
2912 }
2913 }
2914 /* If SAO is on, then signal completion of the last CTB row of frame */
2915 {
2916 if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
2917 {
2918 if(!ps_ctxt->i4_bitrate_instance_num)
2919 {
2920 {
2921 WORD32 post_ctb_ctr;
2922
2923 for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2924 {
2925 ihevce_dmgr_map_set_sync(
2926 pv_dep_mngr_me_dep_encloop,
2927 post_ctb_ctr,
2928 vert_ctr,
2929 MAP_CTB_COMPLETE);
2930 }
2931 }
2932 }
2933 }
2934 }
2935 }
2936 }
2937
2938 return;
2939 }
2940
2941 /*!
2942 ******************************************************************************
2943 * \if Function name : ihevce_enc_loop_pass \endif
2944 *
2945 * \brief
2946 * Frame level enc_loop pass function
2947 *
2948 * \param[in] pv_ctxt : pointer to enc_loop module
2949 * \param[in] ps_frm_lamda : Frame level Lambda params
2950 * \param[in] ps_inp : pointer to input yuv buffer (frame buffer)
2951 * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
2952 * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
2953 * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
2954 * \param[out] ps_cu_out : pointer CU output structure (frame buffer)
2955 * \param[out] ps_tu_out : pointer TU output structure (frame buffer)
2956 * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
2957 *
2958 * \return
2959 * None
2960 *
2961 * Note : Currently the frame level calcualtions done assumes that
2962 * framewidth of the input /recon are excat multiple of ctbsize
2963 *
2964 * \author
2965 * Ittiam
2966 *
2967 *****************************************************************************
2968 */
ihevce_enc_loop_process(void * pv_ctxt,ihevce_lap_enc_buf_t * ps_curr_inp,ctb_analyse_t * ps_ctb_in,ipe_l0_ctb_analyse_for_me_t * ps_ipe_analyse,recon_pic_buf_t * ps_frm_recon,cur_ctb_cu_tree_t * ps_cu_tree_out,ctb_enc_loop_out_t * ps_ctb_out,cu_enc_loop_out_t * ps_cu_out,tu_enc_loop_out_t * ps_tu_out,pu_t * ps_pu_out,UWORD8 * pu1_frm_ecd_data,frm_ctb_ctxt_t * ps_frm_ctb_prms,frm_lambda_ctxt_t * ps_frm_lamda,multi_thrd_ctxt_t * ps_multi_thrd_ctxt,WORD32 thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_pass)2969 void ihevce_enc_loop_process(
2970 void *pv_ctxt,
2971 ihevce_lap_enc_buf_t *ps_curr_inp,
2972 ctb_analyse_t *ps_ctb_in,
2973 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
2974 recon_pic_buf_t *ps_frm_recon,
2975 cur_ctb_cu_tree_t *ps_cu_tree_out,
2976 ctb_enc_loop_out_t *ps_ctb_out,
2977 cu_enc_loop_out_t *ps_cu_out,
2978 tu_enc_loop_out_t *ps_tu_out,
2979 pu_t *ps_pu_out,
2980 UWORD8 *pu1_frm_ecd_data,
2981 frm_ctb_ctxt_t *ps_frm_ctb_prms,
2982 frm_lambda_ctxt_t *ps_frm_lamda,
2983 multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
2984 WORD32 thrd_id,
2985 WORD32 i4_enc_frm_id,
2986 WORD32 i4_pass)
2987 {
2988 WORD32 vert_ctr;
2989 WORD32 tile_col_idx;
2990 iv_enc_yuv_buf_t s_curr_src_bufs;
2991 iv_enc_yuv_buf_t s_curr_recon_bufs;
2992 iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
2993 UWORD32 *pu4_pu_offsets;
2994 WORD32 end_of_frame;
2995 UWORD8 *apu1_y_sub_pel_planes[3];
2996 pad_interp_recon_frm_t s_pad_interp_recon;
2997 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
2998
2999 ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3000
3001 WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3002
3003 /* initialize the closed loop lambda for the current frame */
3004 ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3005 ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3006 ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3007 ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3008 ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3009 ps_ctxt->thrd_id = thrd_id;
3010 ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3011
3012 #if DISABLE_SAO_WHEN_NOISY
3013 ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3014 ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3015 #endif
3016
3017 #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3018 ps_ctxt->pv_err_func_selector = ps_func_selector;
3019 #endif
3020
3021 ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3022 ps_frm_recon->i4_deblk_pad_hpel_cur_pic ||
3023 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3024 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag;
3025
3026 /* Share all reference pictures with nbr clients. This flag will be used only
3027 in case of dist-enc mode */
3028 ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3029 ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3030
3031 /* Register the frame level ssd lamda for both luma and chroma*/
3032 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3033 ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3034
3035 ihevce_populate_cl_cu_lambda_prms(
3036 ps_ctxt,
3037 ps_frm_lamda,
3038 (WORD32)ps_ctxt->i1_slice_type,
3039 ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3040 ENC_LOOP_LAMBDA_TYPE);
3041
3042 ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3043 (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3044 (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3045
3046 end_of_frame = 0;
3047
3048 /* ----------------------------------------------------- */
3049 /* store the stride and dimensions of source and recon */
3050 /* buffer pointers will be over written at every CTB row */
3051 /* ----------------------------------------------------- */
3052 memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3053
3054 memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3055
3056 memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3057
3058 /* get the frame level pu offset pointer*/
3059 pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3060
3061 s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3062
3063 /* ------------ Loop over all the CTB rows --------------- */
3064 while(0 == end_of_frame)
3065 {
3066 UWORD8 *pu1_tmp;
3067 UWORD8 *pu1_row_pu_map;
3068 UWORD8 *pu1_row_ecd_data;
3069 ctb_analyse_t *ps_ctb_row_in;
3070 ctb_enc_loop_out_t *ps_ctb_row_out;
3071 cu_enc_loop_out_t *ps_row_cu;
3072 tu_enc_loop_out_t *ps_row_tu;
3073 pu_t *ps_row_pu;
3074 pu_col_mv_t *ps_row_col_pu;
3075 job_queue_t *ps_job;
3076 UWORD32 *pu4_pu_row_offsets;
3077 UWORD16 *pu2_num_pu_row;
3078
3079 ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3080 cur_ctb_cu_tree_t *ps_row_cu_tree;
3081 UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3082
3083 /* Get the current row from the job queue */
3084 ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3085 ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3086
3087 /* Register the pointer to ctb out of the current frame*/
3088 ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3089
3090 /* If all rows are done, set the end of process flag to 1, */
3091 /* and the current row to -1 */
3092 if(NULL == ps_job)
3093 {
3094 vert_ctr = -1;
3095 tile_col_idx = -1;
3096 end_of_frame = 1;
3097 }
3098 else
3099 {
3100 ihevce_tile_params_t *ps_col_tile_params_temp;
3101 ihevce_tile_params_t *ps_tile_params;
3102 WORD32 i4_tile_id;
3103
3104 ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3105 /* set the output dependency */
3106 ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3107
3108 /* Obtain the current row's details from the job */
3109 vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3110 {
3111 /* Obtain the current colum tile index from the job */
3112 tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3113
3114 /* The tile parameter for the col. idx. Use only the properties
3115 which is same for all the bottom tiles like width, start_x, etc.
3116 Don't use height, start_y, etc. */
3117 ps_col_tile_params_temp =
3118 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3119
3120 /* Derive actual tile_id based on vert_ctr */
3121 i4_tile_id =
3122 *(ps_frm_ctb_prms->pi4_tile_id_map +
3123 vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3124 ps_col_tile_params_temp->i4_first_ctb_x);
3125 /* Derive pointer to current tile prms */
3126 ps_tile_params =
3127 ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3128 }
3129
3130 ps_ctxt->i4_tile_col_idx = tile_col_idx;
3131 /* derive the current ctb row pointers */
3132
3133 /* luma src */
3134 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3135 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3136 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3137 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3138
3139 pu1_tmp +=
3140 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3141 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3142
3143 s_curr_src_bufs.pv_y_buf = pu1_tmp;
3144
3145 if(!ps_ctxt->u1_is_input_data_hbd)
3146 {
3147 /* cb src */
3148 pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3149 pu1_tmp +=
3150 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3151 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3152
3153 s_curr_src_bufs.pv_u_buf = pu1_tmp;
3154 }
3155
3156 /* luma recon */
3157 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3158 pu1_tmp +=
3159 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3160
3161 s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3162 s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3163 s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3164 if(!ps_ctxt->u1_is_input_data_hbd)
3165 {
3166 /* cb recon */
3167 pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3168 pu1_tmp +=
3169 (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3170 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3171
3172 s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3173 s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3174 s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3175
3176 s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3177
3178 /* Register the source buffer pointers in sao context*/
3179 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3180 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3181 (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3182 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3183 ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3184
3185 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3186 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3187
3188 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3189 (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3190
3191 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3192 ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3193 }
3194
3195 /* Subpel planes hxfy, fxhy, hxhy*/
3196 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3197 pu1_tmp +=
3198 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3199 apu1_y_sub_pel_planes[0] = pu1_tmp;
3200 s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3201
3202 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3203 pu1_tmp +=
3204 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3205 apu1_y_sub_pel_planes[1] = pu1_tmp;
3206 s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3207
3208 pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3209 pu1_tmp +=
3210 (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3211 apu1_y_sub_pel_planes[2] = pu1_tmp;
3212 s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3213
3214 /* row level coeffs buffer */
3215 pu1_row_ecd_data =
3216 pu1_frm_ecd_data +
3217 (vert_ctr *
3218 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3219 : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3220 MAX_SCAN_COEFFS_BYTES_4x4);
3221
3222 /* Row level CU buffer */
3223 ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3224
3225 /* Row level TU buffer */
3226 ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3227
3228 /* Row level PU buffer */
3229 ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3230
3231 /* Row level colocated PU buffer */
3232 /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3233 ps_row_col_pu =
3234 ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3235 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3236 /* Row level col PU map buffer */
3237 /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3238 pu1_row_pu_map =
3239 ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3240 ps_frm_ctb_prms->i4_num_pus_in_ctb);
3241 /* row ctb in pointer */
3242 ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3243
3244 /* row ctb out pointer */
3245 ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3246
3247 /* row number of PUs map pointer */
3248 pu2_num_pu_row =
3249 ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3250
3251 /* row pu offsets pointer */
3252 pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3253 /* store the first CTB pu offset pointer */
3254 *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3255 /* Initialize ptr to current IPE row */
3256 ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3257
3258 /* Initialize ptr to current row */
3259 ps_row_cu_tree = ps_cu_tree_out +
3260 (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3261
3262 /* Get the EncLoop Top-Right CU Dep Mngr */
3263 ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3264 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3265 [i4_bitrate_instance_num];
3266 /* Get the EncLoop Deblock Dep Mngr */
3267 ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3268 ps_master_ctxt
3269 ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3270 /* Get the EncLoop Sao Dep Mngr */
3271 ps_ctxt->pv_dep_mngr_enc_loop_sao =
3272 ps_master_ctxt
3273 ->aapv_dep_mngr_enc_loop_sao[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3274
3275 ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3276
3277 {
3278 /* derive the pointers of top row buffers */
3279 ps_ctxt->pv_top_row_luma =
3280 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3281 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3282 (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3283
3284 ps_ctxt->pv_top_row_chroma =
3285 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3286 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3287 (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3288
3289 /* derive the pointers of bottom row buffers to update current row data */
3290 ps_ctxt->pv_bot_row_luma =
3291 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3292 (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3293 (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3294
3295 ps_ctxt->pv_bot_row_chroma =
3296 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3297 (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3298 (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3299
3300 /* Register the buffer pointers in sao context*/
3301 ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3302 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3303 ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3304 ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3305
3306 ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3307 (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3308 ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3309 ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3310
3311 ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3312
3313 ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3314 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3315
3316 ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3317 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3318 }
3319
3320 ps_ctxt->ps_top_row_nbr =
3321 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3322 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3323 (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3324
3325 ps_ctxt->ps_bot_row_nbr =
3326 ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3327 (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3328 (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3329
3330 if(vert_ctr > 0)
3331 {
3332 ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3333 }
3334 else
3335 {
3336 ps_ctxt->pu1_top_rt_cabac_state = NULL;
3337 }
3338
3339 ASSERT(
3340 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3341 .ps_pps->i1_sign_data_hiding_flag ==
3342 ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3343 .ps_pps->i1_sign_data_hiding_flag);
3344
3345 /* call the row level processing function */
3346 ihevce_enc_loop_process_row(
3347 ps_ctxt,
3348 &s_curr_src_bufs,
3349 &s_curr_recon_bufs,
3350 &s_curr_recon_bufs_src,
3351 &apu1_y_sub_pel_planes[0],
3352 ps_ctb_row_in,
3353 ps_ctb_row_out,
3354 ps_row_ipe_analyse,
3355 ps_row_cu_tree,
3356 ps_row_cu,
3357 ps_row_tu,
3358 ps_row_pu,
3359 ps_row_col_pu,
3360 pu2_num_pu_row,
3361 pu1_row_pu_map,
3362 pu1_row_ecd_data,
3363 pu4_pu_row_offsets,
3364 ps_frm_ctb_prms,
3365 vert_ctr,
3366 ps_frm_recon,
3367 ps_ctxt->pv_dep_mngr_encloop_dep_me,
3368 &s_pad_interp_recon,
3369 i4_pass,
3370 ps_multi_thrd_ctxt,
3371 ps_tile_params);
3372 }
3373 }
3374 }
3375
3376 /*!
3377 ******************************************************************************
3378 * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3379 *
3380 * \brief Returns to the caller key attributes relevant for dependency manager,
3381 * ie, the number of vertical units in l0 layer
3382 *
3383 * \par Description:
3384 *
3385 * \param[in] pai4_ht : ht
3386 * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3387 * for deblocking
3388 *
3389 * \return
3390 * None
3391 *
3392 * \author
3393 * Ittiam
3394 *
3395 *****************************************************************************
3396 */
ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht,WORD32 * pi4_num_vert_units_in_lyr)3397 void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3398 {
3399 /* Blk ht at a given layer*/
3400 WORD32 unit_ht_c;
3401 WORD32 ctb_size = 64;
3402
3403 /* compute blk ht and unit ht */
3404 unit_ht_c = ctb_size;
3405
3406 /* set the numebr of vertical units */
3407 *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3408 }
3409
3410 /*!
3411 ******************************************************************************
3412 * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3413 *
3414 * \brief
3415 * Number of memory records are returned for enc_loop module
3416 * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3417 *
3418 * \return
3419 * None
3420 *
3421 * \author
3422 * Ittiam
3423 *
3424 *****************************************************************************
3425 */
3426 WORD32
ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel)3427 ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3428 {
3429 WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3430 WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3431 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3432 WORD32 enc_loop_sao_dep_mngr_mem_recs =
3433 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3434 WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3435 i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3436 WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3437 i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3438
3439 return (
3440 (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + enc_loop_sao_dep_mngr_mem_recs +
3441 enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3442 }
3443 /*!
3444 ******************************************************************************
3445 * \if Function name : ihevce_enc_loop_get_mem_recs \endif
3446 *
3447 * \brief
3448 * Memory requirements are returned for ENC_LOOP.
3449 *
3450 * \param[in,out] ps_mem_tab : pointer to memory descriptors table
3451 * \param[in] ps_init_prms : Create time static parameters
3452 * \param[in] i4_num_proc_thrds : Number of processing threads for this module
3453 * \param[in] i4_mem_space : memspace in whihc memory request should be done
3454 *
3455 * \return
3456 * None
3457 *
3458 * \author
3459 * Ittiam
3460 *
3461 *****************************************************************************
3462 */
ihevce_enc_loop_get_mem_recs(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,WORD32 i4_num_bitrate_inst,WORD32 i4_num_enc_loop_frm_pllel,WORD32 i4_mem_space,WORD32 i4_resolution_id)3463 WORD32 ihevce_enc_loop_get_mem_recs(
3464 iv_mem_rec_t *ps_mem_tab,
3465 ihevce_static_cfg_params_t *ps_init_prms,
3466 WORD32 i4_num_proc_thrds,
3467 WORD32 i4_num_bitrate_inst,
3468 WORD32 i4_num_enc_loop_frm_pllel,
3469 WORD32 i4_mem_space,
3470 WORD32 i4_resolution_id)
3471 {
3472 UWORD32 u4_width, u4_height, n_tabs;
3473 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3474 WORD32 ctr;
3475 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3476
3477 /* derive frame dimensions */
3478 /*width of the input YUV to be encoded */
3479 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3480 /*making the width a multiple of CTB size*/
3481 u4_width += SET_CTB_ALIGN(
3482 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3483
3484 /*height of the input YUV to be encoded */
3485 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3486 /*making the height a multiple of CTB size*/
3487 u4_height += SET_CTB_ALIGN(
3488 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3489 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3490 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3491 /* memories should be requested assuming worst case requirememnts */
3492
3493 /* Module context structure */
3494 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3495
3496 ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3497
3498 ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3499
3500 /* Thread context structure */
3501 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3502 i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3503
3504 ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3505
3506 ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3507
3508 /* Scale matrices */
3509 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3510
3511 ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3512
3513 ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3514
3515 /* Rescale matrices */
3516 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3517
3518 ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3519
3520 ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3521
3522 /* top row luma one row of pixel data per CTB row */
3523 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3524 {
3525 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3526 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3527 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3528 }
3529 else
3530 {
3531 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3532 (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3533 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3534 }
3535
3536 ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3537
3538 ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3539
3540 /* top row chroma */
3541 if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3542 {
3543 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3544 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3545 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3546 }
3547 else
3548 {
3549 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3550 (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3551 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3552 }
3553
3554 ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3555
3556 ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3557
3558 /* top row neighbour 4x4 */
3559 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3560 (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3561 i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3562
3563 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3564
3565 ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3566
3567 /* memory to dump rate control parameters by each thread for each bit-rate instance */
3568 /* RC params collated by each thread for each bit-rate instance separately */
3569 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3570 i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3571
3572 ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3573
3574 ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3575 /* Memory required for deblocking */
3576 {
3577 /* Memory to store Qp of top4x4 blocks for each CTB row.
3578 This memory is allocated at frame level and shared across
3579 all cores. The Qp values are needed to form Qp-map(described
3580 in the ENC_LOOP_DEBLOCKING section below)*/
3581
3582 UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3583 UWORD32 u4_size_top_4x4_qp_memory;
3584
3585 /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3586 /*Space required per CTB*/
3587 u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3588 /*Space required for entire CTB row*/
3589 u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3590 /*Space required for entire frame*/
3591 u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3592 /*Space required for multiple bitrate*/
3593 u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3594 /*Space required for multiple frames in parallel*/
3595 u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3596
3597 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3598 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3599 ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3600
3601 /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3602 ## Boundary Strength(Vertical):
3603 BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3604 of the row followed by 8 entries of second CTB and so on.
3605 8 entries: Includes left edge of current CTB and excludes right edge.
3606 ## Boundary Strength(Horizontal):
3607 Same as Vertical.
3608 8 entries: Includes top edge of current CTB and excludes bottom edge.
3609
3610 ## Qp-map storage:
3611 T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3612 00 01 02 03 04 05 ..........to the end of the CTB row
3613 10 11 12 13 14 15 ..........to the end of the CTB row
3614 20 21 22 23 24 25 ..........to the end of the CTB row
3615 30 31 32 33 34 35 ..........to the end of the CTB row
3616 40 41 42 43 44 45 ..........to the end of the CTB row
3617 ............................to the end of the CTB row
3618 upto height_of_CTB..........to the end of the CTB row
3619
3620 Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3621 A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3622 where,
3623 => height_of_CTB = number of 4x4 blocks in a CTB vertically,
3624 => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3625 in order to deblock top edge of current CTB.
3626 => width_of_CTB = number of 4x4 blocks in a CTB horizontally,
3627 */
3628
3629 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3630 /*1 vertical edge per 8 pixel*/
3631 u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3632 /*Vertical edges for entire width of CTB row*/
3633 u4_size_bs_memory *= u4_ctb_in_a_row;
3634 /*Each vertical edge of CTB row is 4 bytes*/
3635 u4_size_bs_memory = u4_size_bs_memory << 2;
3636 /*Adding Memory required for storing horizontal BS by doubling*/
3637 u4_size_bs_memory = u4_size_bs_memory << 1;
3638
3639 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3640 /*Number of 4x4 blocks in the width of a CTB*/
3641 u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3642 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3643 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3644 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3645 /*Storage for entire CTB row*/
3646 u4_size_qp_memory *= u4_ctb_in_a_row;
3647
3648 /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3649 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3650 i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3651
3652 ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3653
3654 ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3655 }
3656
3657 /* Memory required to store pred for 422 chroma */
3658 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3659 i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3660 (i4_chroma_format == IV_YUV_422SP_UV) *
3661 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3662
3663 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3664
3665 ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3666
3667 /* Memory for inter pred buffers */
3668 {
3669 WORD32 i4_num_bufs_per_thread = 0;
3670
3671 WORD32 i4_buf_size_per_cand =
3672 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3673 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3674 WORD32 i4_quality_preset =
3675 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3676 switch(i4_quality_preset)
3677 {
3678 case IHEVCE_QUALITY_P0:
3679 {
3680 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3681 break;
3682 }
3683 case IHEVCE_QUALITY_P2:
3684 {
3685 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3686 break;
3687 }
3688 case IHEVCE_QUALITY_P3:
3689 {
3690 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3691 break;
3692 }
3693 case IHEVCE_QUALITY_P4:
3694 {
3695 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3696 break;
3697 }
3698 case IHEVCE_QUALITY_P5:
3699 case IHEVCE_QUALITY_P6:
3700 case IHEVCE_QUALITY_P7:
3701 {
3702 i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3703 break;
3704 }
3705 default:
3706 {
3707 ASSERT(0);
3708 }
3709 }
3710
3711 i4_num_bufs_per_thread += 4;
3712
3713 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3714 i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3715
3716 ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3717
3718 ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3719 }
3720
3721 /* Memory required to store chroma intra pred */
3722 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3723 i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3724 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3725 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3726
3727 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3728
3729 ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3730
3731 /* Memory required to store pred for reference substitution output */
3732 /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3733 allocate 16 bytes to the left and 7 bytes to the right to facilitate
3734 SIMD access */
3735 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3736 i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3737 + INTRAPRED_SIMD_LEFT_PADDING)*
3738 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3739
3740 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3741
3742 ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3743
3744 /* Memory required to store pred for reference filtering output */
3745 /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3746 allocate 16 bytes to the left and 7 bytes to the right to facilitate
3747 SIMD access */
3748 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3749 i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3750 + INTRAPRED_SIMD_LEFT_PADDING)*
3751 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3752
3753 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3754
3755 ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3756
3757 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3758 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3759 #endif
3760 {
3761 /* Memory assignments for recon storage during CU Recursion */
3762 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3763 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3764 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3765
3766 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3767
3768 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3769
3770 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3771 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3772 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3773 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3774
3775 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3776
3777 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3778 }
3779 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3780 else
3781 {
3782 /* Memory assignments for recon storage during CU Recursion */
3783 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3784
3785 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3786
3787 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3788
3789 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3790
3791 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3792
3793 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3794 }
3795 #endif
3796
3797 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3798 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3799 #endif
3800 {
3801 /* Memory assignments for pred storage during CU Recursion */
3802 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3803 i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3804 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3805
3806 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3807
3808 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3809
3810 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3811 i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3812 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3813 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3814
3815 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3816
3817 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3818 }
3819 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3820 else
3821 {
3822 /* Memory assignments for pred storage during CU Recursion */
3823 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3824
3825 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3826
3827 ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3828
3829 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3830
3831 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3832
3833 ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3834 }
3835 #endif
3836
3837 /* Memory assignments for CTB left luma data storage */
3838 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3839 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3840 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3841
3842 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3843
3844 ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3845
3846 /* Memory assignments for CTB left chroma data storage */
3847 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3848 i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3849 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3850 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3851 ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3852
3853 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3854
3855 ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3856
3857 /* Memory required for SAO */
3858 {
3859 WORD32 num_vert_units;
3860 WORD32 num_horz_units;
3861 WORD32 ctb_aligned_ht, ctb_aligned_wd;
3862 WORD32 luma_buf, chroma_buf;
3863
3864 num_vert_units = u4_height / MAX_CTB_SIZE;
3865 num_horz_units = u4_width / MAX_CTB_SIZE;
3866
3867 ctb_aligned_ht = u4_height;
3868 ctb_aligned_wd = u4_width;
3869
3870 /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3871 * and 1 extra location is required for top left buf ptr for row 0
3872 * Also 1 extra byte is required for every row for top left pixel if
3873 * the top left ptr is to be passed to leaf level unconditionally
3874 */
3875 luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3876 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3877 chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3878 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3879
3880 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3881 (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3882
3883 /* Add the memory required to store the sao information of top ctb for top merge
3884 * This is frame level buffer.
3885 */
3886 ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
3887 ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
3888 (i4_num_enc_loop_frm_pllel);
3889
3890 ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3891
3892 ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
3893 }
3894
3895 /* Memory for CU level Coeff data buffer */
3896 {
3897 /* 16 additional bytes are required to ensure alignment */
3898 {
3899 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
3900 i4_num_proc_thrds *
3901 (((MAX_LUMA_COEFFS_CTB +
3902 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
3903 16) *
3904 (2) * sizeof(UWORD8));
3905 }
3906
3907 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3908
3909 ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
3910
3911 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
3912 i4_num_proc_thrds *
3913 (MAX_LUMA_COEFFS_CTB +
3914 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
3915 sizeof(UWORD8);
3916
3917 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3918
3919 ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
3920 }
3921
3922 /* Memory for CU dequant data buffer */
3923 {
3924 /* 16 additional bytes are required to ensure alignment */
3925 {
3926 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
3927 i4_num_proc_thrds *
3928 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
3929 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
3930 8) *
3931 (2) * sizeof(WORD16);
3932 }
3933
3934 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3935
3936 ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
3937 }
3938
3939 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
3940 {
3941 WORD32 i4_memSize_perThread;
3942
3943 WORD32 i4_chroma_memSize_perThread = 0;
3944 /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3945 /* used in RDOPT to store cur and best modes' data */
3946 WORD32 i4_luma_memSize_perThread =
3947 4 * MAX_CU_SIZE * MAX_CU_SIZE *
3948 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3949
3950 /* 'Glossary' for comments in the following codeBlock */
3951 /* 1 - 2 Bufs for storing recons of the best modes determined in the */
3952 /* function 'ihevce_intra_chroma_pred_mode_selector' */
3953 /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3954 /* used in RDOPT to store cur and best modes' data */
3955 if(i4_chroma_format == IV_YUV_422SP_UV)
3956 {
3957 WORD32 i4_quality_preset =
3958 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3959 switch(i4_quality_preset)
3960 {
3961 case IHEVCE_QUALITY_P0:
3962 {
3963 /* 1 */
3964 i4_chroma_memSize_perThread +=
3965 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
3966 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3967
3968 /* 2 */
3969 i4_chroma_memSize_perThread +=
3970 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
3971 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3972
3973 break;
3974 }
3975 case IHEVCE_QUALITY_P2:
3976 {
3977 /* 1 */
3978 i4_chroma_memSize_perThread +=
3979 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
3980 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3981
3982 /* 2 */
3983 i4_chroma_memSize_perThread +=
3984 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
3985 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3986
3987 break;
3988 }
3989 case IHEVCE_QUALITY_P3:
3990 {
3991 /* 1 */
3992 i4_chroma_memSize_perThread +=
3993 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
3994 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3995
3996 /* 2 */
3997 i4_chroma_memSize_perThread +=
3998 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
3999 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4000
4001 break;
4002 }
4003 case IHEVCE_QUALITY_P4:
4004 {
4005 /* 1 */
4006 i4_chroma_memSize_perThread +=
4007 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4008 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4009
4010 /* 2 */
4011 i4_chroma_memSize_perThread +=
4012 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4013 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4014
4015 break;
4016 }
4017 case IHEVCE_QUALITY_P5:
4018 {
4019 /* 1 */
4020 i4_chroma_memSize_perThread +=
4021 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4022 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4023
4024 /* 2 */
4025 i4_chroma_memSize_perThread +=
4026 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4027 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4028
4029 break;
4030 }
4031 case IHEVCE_QUALITY_P6:
4032 case IHEVCE_QUALITY_P7:
4033 {
4034 /* 1 */
4035 i4_chroma_memSize_perThread +=
4036 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4037 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4038
4039 /* 2 */
4040 i4_chroma_memSize_perThread +=
4041 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4042 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4043
4044 break;
4045 }
4046 }
4047 }
4048 else
4049 {
4050 WORD32 i4_quality_preset =
4051 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4052 switch(i4_quality_preset)
4053 {
4054 case IHEVCE_QUALITY_P0:
4055 {
4056 /* 1 */
4057 i4_chroma_memSize_perThread +=
4058 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4059 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4060
4061 /* 2 */
4062 i4_chroma_memSize_perThread +=
4063 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4064 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4065 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4066
4067 break;
4068 }
4069 case IHEVCE_QUALITY_P2:
4070 {
4071 /* 1 */
4072 i4_chroma_memSize_perThread +=
4073 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4074 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4075
4076 /* 2 */
4077 i4_chroma_memSize_perThread +=
4078 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4079 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4080 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4081
4082 break;
4083 }
4084 case IHEVCE_QUALITY_P3:
4085 {
4086 /* 1 */
4087 i4_chroma_memSize_perThread +=
4088 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4089 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4090
4091 /* 2 */
4092 i4_chroma_memSize_perThread +=
4093 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4094 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4095 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4096
4097 break;
4098 }
4099 case IHEVCE_QUALITY_P4:
4100 {
4101 /* 1 */
4102 i4_chroma_memSize_perThread +=
4103 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4104 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4105
4106 /* 2 */
4107 i4_chroma_memSize_perThread +=
4108 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4109 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4110 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4111
4112 break;
4113 }
4114 case IHEVCE_QUALITY_P5:
4115 {
4116 /* 1 */
4117 i4_chroma_memSize_perThread +=
4118 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4119 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4120
4121 /* 2 */
4122 i4_chroma_memSize_perThread +=
4123 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4124 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4125 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4126
4127 break;
4128 }
4129 case IHEVCE_QUALITY_P6:
4130 case IHEVCE_QUALITY_P7:
4131 {
4132 /* 1 */
4133 i4_chroma_memSize_perThread +=
4134 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4135 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4136
4137 /* 2 */
4138 i4_chroma_memSize_perThread +=
4139 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4140 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4141 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4142
4143 break;
4144 }
4145 }
4146 }
4147
4148 i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4149
4150 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4151 i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4152
4153 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4154
4155 ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4156 }
4157
4158 n_tabs = NUM_ENC_LOOP_MEM_RECS;
4159
4160 /*************************************************************************/
4161 /* --- EncLoop Deblock and SAO sync Dep Mngr Mem requests -- */
4162 /*************************************************************************/
4163
4164 /* Fill the memtabs for EncLoop Deblock Dep Mngr */
4165 {
4166 WORD32 count;
4167 WORD32 num_vert_units;
4168 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4169
4170 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4171 ASSERT(num_vert_units > 0);
4172 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4173 {
4174 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4175 {
4176 n_tabs += ihevce_dmgr_get_mem_recs(
4177 &ps_mem_tab[n_tabs],
4178 DEP_MNGR_ROW_ROW_SYNC,
4179 num_vert_units,
4180 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4181 i4_num_proc_thrds,
4182 i4_mem_space);
4183 }
4184 }
4185
4186 /* Fill the memtabs for EncLoop SAO Dep Mngr */
4187 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4188 {
4189 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4190 {
4191 n_tabs += ihevce_dmgr_get_mem_recs(
4192 &ps_mem_tab[n_tabs],
4193 DEP_MNGR_ROW_ROW_SYNC,
4194 num_vert_units,
4195 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4196 i4_num_proc_thrds,
4197 i4_mem_space);
4198 }
4199 }
4200 }
4201
4202 /*************************************************************************/
4203 /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests -- */
4204 /*************************************************************************/
4205
4206 /* Fill the memtabs for Top-Right CU sync Dep Mngr */
4207 {
4208 WORD32 count;
4209 WORD32 num_vert_units;
4210 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4211 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4212 ASSERT(num_vert_units > 0);
4213
4214 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4215 {
4216 for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4217 {
4218 n_tabs += ihevce_dmgr_get_mem_recs(
4219 &ps_mem_tab[n_tabs],
4220 DEP_MNGR_ROW_ROW_SYNC,
4221 num_vert_units,
4222 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4223 i4_num_proc_thrds,
4224 i4_mem_space);
4225 }
4226 }
4227 }
4228
4229 /*************************************************************************/
4230 /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests -- */
4231 /*************************************************************************/
4232
4233 /* Fill the memtabs for EncLoop Aux. on Ref. bitrate Dep Mngr */
4234 {
4235 WORD32 count;
4236 WORD32 num_vert_units;
4237 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4238
4239 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4240 ASSERT(num_vert_units > 0);
4241
4242 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4243 {
4244 for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4245 {
4246 n_tabs += ihevce_dmgr_get_mem_recs(
4247 &ps_mem_tab[n_tabs],
4248 DEP_MNGR_ROW_ROW_SYNC,
4249 num_vert_units,
4250 ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4251 i4_num_proc_thrds,
4252 i4_mem_space);
4253 }
4254 }
4255 }
4256
4257 return (n_tabs);
4258 }
4259
4260 /*!
4261 ******************************************************************************
4262 * \if Function name : ihevce_enc_loop_init \endif
4263 *
4264 * \brief
4265 * Intialization for ENC_LOOP context state structure .
4266 *
4267 * \param[in] ps_mem_tab : pointer to memory descriptors table
4268 * \param[in] ps_init_prms : Create time static parameters
4269 * \param[in] pv_osal_handle : Osal handle
4270 *
4271 * \return
4272 * None
4273 *
4274 * \author
4275 * Ittiam
4276 *
4277 *****************************************************************************
4278 */
ihevce_enc_loop_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_num_proc_thrds,void * pv_osal_handle,func_selector_t * ps_func_selector,rc_quant_t * ps_rc_quant_ctxt,ihevce_tile_params_t * ps_tile_params_base,WORD32 i4_resolution_id,WORD32 i4_num_enc_loop_frm_pllel,UWORD8 u1_is_popcnt_available)4279 void *ihevce_enc_loop_init(
4280 iv_mem_rec_t *ps_mem_tab,
4281 ihevce_static_cfg_params_t *ps_init_prms,
4282 WORD32 i4_num_proc_thrds,
4283 void *pv_osal_handle,
4284 func_selector_t *ps_func_selector,
4285 rc_quant_t *ps_rc_quant_ctxt,
4286 ihevce_tile_params_t *ps_tile_params_base,
4287 WORD32 i4_resolution_id,
4288 WORD32 i4_num_enc_loop_frm_pllel,
4289 UWORD8 u1_is_popcnt_available)
4290 {
4291 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4292 ihevce_enc_loop_ctxt_t *ps_ctxt;
4293 WORD32 ctr, n_tabs;
4294 UWORD32 u4_width, u4_height;
4295 UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4296 UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4297 UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4298 WORD32 i;
4299 WORD32 i4_num_bitrate_inst =
4300 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4301 enc_loop_rc_params_t *ps_enc_loop_rc_params;
4302 UWORD8 *pu1_sao_base; /* store the base address of sao*/
4303 UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4304 WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4305 WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4306 WORD32 i4_enc_frm_id;
4307 WORD32 num_cu_in_ctb;
4308 WORD32 i4_num_tile_cols = 1; //Default value is 1
4309
4310 /* ENC_LOOP state structure */
4311 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4312
4313 ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4314
4315 ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4316 ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4317 ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4318 /*Calculation of memory sizes for deblocking*/
4319 {
4320 /*width of the input YUV to be encoded. */
4321 u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4322 /*making the width a multiple of CTB size*/
4323 u4_width += SET_CTB_ALIGN(
4324 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4325
4326 u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4327
4328 /*height of the input YUV to be encoded */
4329 u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4330 /*making the height a multiple of CTB size*/
4331 u4_height += SET_CTB_ALIGN(
4332 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4333
4334 u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4335
4336 /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4337 /*1 vertical edge per 8 pixel*/
4338 u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4339 /*Vertical edges for entire width of CTB row*/
4340 u4_size_bs_memory *= u4_ctb_in_a_row;
4341 /*Each vertical edge of CTB row is 4 bytes*/
4342 u4_size_bs_memory = u4_size_bs_memory << 2;
4343 /*Adding Memory required for storing horizontal BS by doubling*/
4344 u4_size_bs_memory = u4_size_bs_memory << 1;
4345
4346 /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4347 /*Number of 4x4 blocks in the width of a CTB*/
4348 u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4349 /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4350 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4351 u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4352 /*Storage for entire CTB row*/
4353 u4_size_qp_memory *= u4_ctb_in_a_row;
4354
4355 pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4356 }
4357
4358 /*Derive the base pointer of sao*/
4359 pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4360 ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4361 u4_ctb_aligned_wd = u4_width;
4362 u4_ctb_aligned_ht = u4_height;
4363 num_vert_units = (u4_height) / ctb_size;
4364
4365 for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4366 {
4367 ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4368 /* Store Tile params base into EncLoop context */
4369 ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4370 ihevce_cmn_utils_instr_set_router(
4371 &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4372 ihevce_sifter_sad_fxn_assigner(
4373 (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4374 ps_ctxt->i4_max_search_range_horizontal =
4375 ps_init_prms->s_config_prms.i4_max_search_range_horz;
4376 ps_ctxt->i4_max_search_range_vertical =
4377 ps_init_prms->s_config_prms.i4_max_search_range_vert;
4378
4379 ps_ctxt->i4_quality_preset =
4380 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4381
4382 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4383 {
4384 ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4385 }
4386
4387 ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4388
4389 ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4390
4391 ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4392
4393 ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4394
4395 ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4396
4397 ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4398
4399 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4400 {
4401 ps_ctxt->i4_use_ctb_level_lamda = 0;
4402 }
4403 else
4404 {
4405 ps_ctxt->i4_use_ctb_level_lamda = 0;
4406 }
4407
4408 /** Register the function selector pointer*/
4409 ps_ctxt->ps_func_selector = ps_func_selector;
4410
4411 ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4412
4413 /* Initiallization for non-distributed mode */
4414 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4415 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4416 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4417 ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4418
4419 ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4420 ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4421
4422 ps_ctxt->i4_frm_top_row_luma_size =
4423 ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4424
4425 ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4426
4427 ps_ctxt->i4_frm_top_row_chroma_size =
4428 ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4429
4430 {
4431 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4432 {
4433 /* +1 is to provision top left pel */
4434 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4435 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4436 (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4437
4438 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4439 ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4440 (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4441 ps_ctxt->i4_top_row_luma_stride;
4442
4443 /* +2 is to provision top left pel */
4444 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4445 (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4446 (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4447
4448 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4449 ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4450 (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4451 ps_ctxt->i4_top_row_chroma_stride;
4452 }
4453 }
4454
4455 /* +1 is to provision top left nbr */
4456 ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4457 ps_ctxt->i4_frm_top_row_nbr_size =
4458 ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4459 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4460 {
4461 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4462 (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4463 (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4464 ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4465 }
4466
4467 num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4468 num_cu_in_ctb *= num_cu_in_ctb;
4469
4470 /* pointer incremented by 1 row to avoid OOB access in 0th row */
4471
4472 /* Memory for CU level Coeff data buffer */
4473 {
4474 WORD32 i4_16byte_boundary_overshoot;
4475 WORD32 buf_size_per_cu;
4476 WORD32 buf_size_per_thread_wo_alignment_req;
4477 WORD32 buf_size_per_thread;
4478
4479 buf_size_per_cu =
4480 ((MAX_LUMA_COEFFS_CTB +
4481 (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4482 16) *
4483 sizeof(UWORD8);
4484 buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4485
4486 {
4487 buf_size_per_thread = buf_size_per_cu * (2);
4488
4489 for(i = 0; i < 2; i++)
4490 {
4491 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4492 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4493 (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4494
4495 i4_16byte_boundary_overshoot =
4496 ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4497
4498 ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4499 }
4500 }
4501
4502 ps_ctxt->pu1_cu_recur_coeffs =
4503 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4504 (ctr * buf_size_per_thread_wo_alignment_req);
4505 }
4506
4507 /* Memory for CU dequant data buffer */
4508 {
4509 WORD32 buf_size_per_thread;
4510 WORD32 i4_16byte_boundary_overshoot;
4511
4512 WORD32 buf_size_per_cu =
4513 (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4514 : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4515 8) *
4516 sizeof(WORD16);
4517
4518 {
4519 buf_size_per_thread = buf_size_per_cu * 2;
4520
4521 for(i = 0; i < 2; i++)
4522 {
4523 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4524 (WORD16
4525 *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4526
4527 i4_16byte_boundary_overshoot =
4528 ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4529
4530 ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4531 (WORD16
4532 *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4533 }
4534 }
4535 }
4536
4537 /*------ Deblocking memory's pointers assignements starts ------*/
4538
4539 /*Assign stride = 4x4 blocks in horizontal edge*/
4540 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4541
4542 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4543 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4544
4545 /*Assign frame level memory to store the Qp of
4546 top 4x4 neighbours of each CTB row*/
4547 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4548 {
4549 ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4550 (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4551 (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4552 i4_enc_frm_id);
4553 }
4554
4555 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4556
4557 ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4558 (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4559
4560 ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4561
4562 /*Assign stride = 4x4 blocks in horizontal edge*/
4563 ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4564
4565 pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4566
4567 /*------Deblocking memory's pointers assignements ends ------*/
4568
4569 /*------SAO memory's pointer assignment starts------------*/
4570 if(!is_hbd_mode)
4571 {
4572 /* 2 is added to allocate top left pixel */
4573 ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4574 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4575 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4576 u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4577 ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4578 num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4579
4580 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4581 {
4582 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4583 pu1_sao_base +
4584 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4585 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4586 i4_num_bitrate_inst * i4_enc_frm_id) + // move to the next frame_id
4587 u4_ctb_aligned_wd +
4588 2;
4589
4590 ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4591 pu1_sao_base +
4592 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4593 ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4594 i4_num_bitrate_inst * i4_enc_frm_id) +
4595 +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4596 u4_ctb_aligned_wd + 4;
4597
4598 ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4599 ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4600 *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4601 (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4602 }
4603 ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4604 (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4605 ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4606 }
4607
4608 /*------SAO memory's pointer assignment ends------------*/
4609
4610 /* perform all one time initialisation here */
4611 ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4612
4613 ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4614
4615 ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4616
4617 /* move the pointer to 1,2 location */
4618 ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4619 ps_ctxt->pu1_ctb_nbr_map++;
4620
4621 ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4622
4623 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4624
4625 CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4626
4627 CREATE_SUBBLOCK2CSBFID_MAP(
4628 gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4629
4630 CREATE_SUBBLOCK2CSBFID_MAP(
4631 gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4632
4633 /* For both instance initialise the chroma dequant start idx */
4634 ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4635 ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4636
4637 /* initialise all the function pointer tables */
4638 {
4639 ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4640 (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4641
4642 ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4643
4644 #if ENABLE_RDO_BASED_TU_RECURSION
4645 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4646 {
4647 ps_ctxt->pv_inter_rdopt_cu_ntu =
4648 (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4649 }
4650 #endif
4651 ps_ctxt->pv_intra_chroma_pred_mode_selector =
4652 (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4653 ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4654 ps_ctxt->pv_final_rdopt_mode_prcs =
4655 (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4656 ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4657 ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4658 ps_ctxt->pv_enc_loop_ctb_left_copy =
4659 (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4660
4661 /* Memory assignments for chroma intra pred buffer */
4662 {
4663 WORD32 pred_buf_size =
4664 MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4665 WORD32 pred_buf_size_per_thread =
4666 NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4667 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4668 (ctr * pred_buf_size_per_thread);
4669
4670 for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4671 {
4672 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4673 pu1_base += pred_buf_size;
4674 }
4675 }
4676
4677 /* Memory assignments for reference substitution output */
4678 {
4679 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4680 + INTRAPRED_SIMD_LEFT_PADDING);
4681 WORD32 pred_buf_size_per_thread = pred_buf_size;
4682 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4683 (ctr * pred_buf_size_per_thread);
4684
4685 ps_ctxt->pv_ref_sub_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4686 }
4687
4688 /* Memory assignments for reference filtering output */
4689 {
4690 WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4691 + INTRAPRED_SIMD_LEFT_PADDING);
4692 WORD32 pred_buf_size_per_thread = pred_buf_size;
4693 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4694 (ctr * pred_buf_size_per_thread);
4695
4696 ps_ctxt->pv_ref_filt_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4697 }
4698
4699 /* Memory assignments for recon storage during CU Recursion */
4700 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4701 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4702 #endif
4703 {
4704 {
4705 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4706 WORD32 pred_buf_size_per_thread = pred_buf_size;
4707 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4708 (ctr * pred_buf_size_per_thread);
4709
4710 ps_ctxt->pv_cu_luma_recon = pu1_base;
4711 }
4712
4713 {
4714 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4715 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4716 WORD32 pred_buf_size_per_thread = pred_buf_size;
4717 UWORD8 *pu1_base =
4718 (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4719 (ctr * pred_buf_size_per_thread);
4720
4721 ps_ctxt->pv_cu_chrma_recon = pu1_base;
4722 }
4723 }
4724
4725 /* Memory assignments for pred storage during CU Recursion */
4726 #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4727 if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4728 #endif
4729 {
4730 {
4731 WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4732 WORD32 pred_buf_size_per_thread = pred_buf_size;
4733 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4734 (ctr * pred_buf_size_per_thread);
4735
4736 ps_ctxt->pv_CTB_pred_luma = pu1_base;
4737 }
4738
4739 {
4740 WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4741 ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4742 WORD32 pred_buf_size_per_thread = pred_buf_size;
4743 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4744 (ctr * pred_buf_size_per_thread);
4745
4746 ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4747 }
4748 }
4749
4750 /* Memory assignments for CTB left luma data storage */
4751 {
4752 WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4753 WORD32 pred_buf_size_per_thread = pred_buf_size;
4754 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4755 (ctr * pred_buf_size_per_thread);
4756
4757 ps_ctxt->pv_left_luma_data = pu1_base;
4758 }
4759
4760 /* Memory assignments for CTB left chroma data storage */
4761 {
4762 WORD32 pred_buf_size =
4763 (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4764 WORD32 pred_buf_size_per_thread = pred_buf_size;
4765 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4766 (ctr * pred_buf_size_per_thread);
4767
4768 ps_ctxt->pv_left_chrm_data = pu1_base;
4769 }
4770 }
4771
4772 /* Memory for inter pred buffers */
4773 {
4774 WORD32 i4_num_bufs_per_thread;
4775
4776 WORD32 i4_buf_size_per_cand =
4777 (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4778 ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4779
4780 i4_num_bufs_per_thread =
4781 (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4782 i4_buf_size_per_cand;
4783
4784 ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4785
4786 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4787
4788 {
4789 UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4790 +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4791
4792 for(i = 0; i < i4_num_bufs_per_thread; i++)
4793 {
4794 ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4795 pu1_base + i * i4_buf_size_per_cand;
4796 ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4797 }
4798 }
4799 }
4800
4801 /* Memory required to store pred for 422 chroma */
4802 if(i4_chroma_format == IV_YUV_422SP_UV)
4803 {
4804 WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4805 WORD32 pred_buf_size_per_thread =
4806 pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4807 sizeof(UWORD8);
4808 void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4809 (ctr * pred_buf_size_per_thread);
4810
4811 ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4812 }
4813 else
4814 {
4815 ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4816 }
4817
4818 /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4819 {
4820 WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4821 WORD32 i4_chromaBufSize =
4822 MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4823 WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4824 (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4825 WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4826 {
4827 UWORD8 *pu1_mem_base =
4828 (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4829 ctr * i4_memSize_perThread);
4830
4831 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4832 pu1_mem_base + i4_lumaBufSize * 0;
4833 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4834 pu1_mem_base + i4_lumaBufSize * 1;
4835 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4836 pu1_mem_base + i4_lumaBufSize * 2;
4837 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4838 pu1_mem_base + i4_lumaBufSize * 3;
4839
4840 pu1_mem_base += i4_lumaBufSize * 4;
4841
4842 switch(i4_quality_preset)
4843 {
4844 case IHEVCE_QUALITY_P0:
4845 {
4846 #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4847 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4848 pu1_mem_base + i4_chromaBufSize * 0;
4849 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4850 pu1_mem_base + i4_chromaBufSize * 1;
4851 #else
4852 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4853 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4854 #endif
4855
4856 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4857 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4858 pu1_mem_base + i4_chromaBufSize * 2;
4859 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4860 pu1_mem_base + i4_chromaBufSize * 3;
4861 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4862 pu1_mem_base + i4_chromaBufSize * 2;
4863 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4864 pu1_mem_base + i4_chromaBufSize * 3;
4865 #else
4866 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4867 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4868 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4869 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4870 #endif
4871
4872 break;
4873 }
4874 case IHEVCE_QUALITY_P2:
4875 {
4876 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4877 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4878 pu1_mem_base + i4_chromaBufSize * 0;
4879 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4880 pu1_mem_base + i4_chromaBufSize * 1;
4881 #else
4882 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4883 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4884 #endif
4885
4886 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4887 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4888 pu1_mem_base + i4_chromaBufSize * 2;
4889 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4890 pu1_mem_base + i4_chromaBufSize * 3;
4891 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4892 pu1_mem_base + i4_chromaBufSize * 2;
4893 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4894 pu1_mem_base + i4_chromaBufSize * 3;
4895 #else
4896 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4897 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4898 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4899 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4900 #endif
4901
4902 break;
4903 }
4904 case IHEVCE_QUALITY_P3:
4905 {
4906 #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
4907 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4908 pu1_mem_base + i4_chromaBufSize * 0;
4909 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4910 pu1_mem_base + i4_chromaBufSize * 1;
4911 #else
4912 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4913 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4914 #endif
4915
4916 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
4917 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4918 pu1_mem_base + i4_chromaBufSize * 2;
4919 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4920 pu1_mem_base + i4_chromaBufSize * 3;
4921 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4922 pu1_mem_base + i4_chromaBufSize * 2;
4923 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4924 pu1_mem_base + i4_chromaBufSize * 3;
4925 #else
4926 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4927 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4928 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4929 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4930 #endif
4931
4932 break;
4933 }
4934 case IHEVCE_QUALITY_P4:
4935 {
4936 #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
4937 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4938 pu1_mem_base + i4_chromaBufSize * 0;
4939 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4940 pu1_mem_base + i4_chromaBufSize * 1;
4941 #else
4942 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4943 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4944 #endif
4945
4946 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
4947 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4948 pu1_mem_base + i4_chromaBufSize * 2;
4949 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4950 pu1_mem_base + i4_chromaBufSize * 3;
4951 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4952 pu1_mem_base + i4_chromaBufSize * 2;
4953 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4954 pu1_mem_base + i4_chromaBufSize * 3;
4955 #else
4956 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4957 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4958 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4959 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4960 #endif
4961
4962 break;
4963 }
4964 case IHEVCE_QUALITY_P5:
4965 {
4966 #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
4967 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4968 pu1_mem_base + i4_chromaBufSize * 0;
4969 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4970 pu1_mem_base + i4_chromaBufSize * 1;
4971 #else
4972 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4973 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4974 #endif
4975
4976 #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
4977 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4978 pu1_mem_base + i4_chromaBufSize * 2;
4979 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4980 pu1_mem_base + i4_chromaBufSize * 3;
4981 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4982 pu1_mem_base + i4_chromaBufSize * 2;
4983 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4984 pu1_mem_base + i4_chromaBufSize * 3;
4985 #else
4986 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4987 ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4988 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4989 ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4990 #endif
4991
4992 break;
4993 }
4994 }
4995 }
4996
4997 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4998 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4999 ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5000 ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5001
5002 } /* Recon Datastore */
5003
5004 /****************************************************/
5005 /****************************************************/
5006 /* ps_pps->i1_sign_data_hiding_flag == UNHIDDEN */
5007 /* when NO_SBH. else HIDDEN */
5008 /****************************************************/
5009 /****************************************************/
5010 /* Zero cbf tool is enabled by default for all presets */
5011 ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5012
5013 if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5014 {
5015 ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5016 ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5017 ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5018 ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5019 }
5020 else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5021 {
5022 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5023 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5024 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5025 ps_ctxt->i4_sbh_level = NO_SBH;
5026 }
5027 else
5028 {
5029 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5030 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5031 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5032 ps_ctxt->i4_sbh_level = NO_SBH;
5033 }
5034
5035 #if DISABLE_QUANT_ROUNDING
5036 ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5037 ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5038 #endif
5039 /*Disabling RDOQ only when spatial modulation is enabled
5040 as RDOQ degrades visual quality*/
5041 if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5042 {
5043 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5044 }
5045
5046 #if DISABLE_RDOQ
5047 ps_ctxt->i4_rdoq_level = NO_RDOQ;
5048 #endif
5049
5050 #if DISABLE_SBH
5051 ps_ctxt->i4_sbh_level = NO_SBH;
5052 #endif
5053
5054 /*Rounding factor calc based on previous cabac states */
5055
5056 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5057 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5058 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5059 ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5060
5061 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5062 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5063 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5064 ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5065
5066 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5067 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5068 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5069
5070 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5071 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5072 ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5073
5074 /****************************************************************************************/
5075 /* Setting the perform rdoq and sbh flags appropriately */
5076 /****************************************************************************************/
5077 {
5078 /******************************************/
5079 /* For best cand rdoq and/or sbh */
5080 /******************************************/
5081 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5082 (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5083 /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5084 we would have to do RDOQ again.*/
5085 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5086 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5087 ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5088 (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5089
5090 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5091 (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5092
5093 /* SBH should be performed if
5094 a) i4_sbh_level is BEST_CAND_SBH.
5095 b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5096 if SBH has to be done because for these presets the quant, iquant and scan coeff
5097 data are calculated in this function and not during the RDOPT stage*/
5098
5099 /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5100 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5101 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5102 ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5103 (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5104
5105 /******************************************/
5106 /* For all cand rdoq and/or sbh */
5107 /******************************************/
5108 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5109 (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5110 ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5111 (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5112 ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5113 ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5114 }
5115
5116 if(!is_hbd_mode)
5117 {
5118 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5119 {
5120 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5121 {
5122 ps_ctxt->apf_quant_iquant_ssd[0] =
5123 ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5124 ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5125 }
5126 else
5127 {
5128 ps_ctxt->apf_quant_iquant_ssd[0] =
5129 ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5130 ps_ctxt->apf_quant_iquant_ssd[2] =
5131 ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5132 }
5133
5134 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5135 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5136 {
5137 ps_ctxt->apf_quant_iquant_ssd[1] =
5138 ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5139 ps_ctxt->apf_quant_iquant_ssd[3] =
5140 ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5141 }
5142 else
5143 {
5144 ps_ctxt->apf_quant_iquant_ssd[1] =
5145 ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5146 ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5147 }
5148 }
5149 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5150 {
5151 if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5152 {
5153 ps_ctxt->apf_quant_iquant_ssd[0] =
5154 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5155 ps_ctxt->apf_quant_iquant_ssd[2] =
5156 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5157 }
5158 else
5159 {
5160 ps_ctxt->apf_quant_iquant_ssd[0] =
5161 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5162 ps_ctxt->apf_quant_iquant_ssd[2] =
5163 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5164 }
5165
5166 /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5167 if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5168 {
5169 ps_ctxt->apf_quant_iquant_ssd[1] =
5170 ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5171 ps_ctxt->apf_quant_iquant_ssd[3] =
5172 ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5173 }
5174 else
5175 {
5176 ps_ctxt->apf_quant_iquant_ssd[1] =
5177 ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5178 ps_ctxt->apf_quant_iquant_ssd[3] =
5179 ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5180 }
5181 }
5182
5183 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5184 ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5185 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5186 ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5187 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5188 ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5189 ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5190 ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5191
5192 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5193 ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5194 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5195 ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5196 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5197 ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5198 ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5199 ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5200
5201 ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5202 ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5203 ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5204 ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5205 ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5206
5207 ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5208 ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5209 ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5210
5211 ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5212 ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5213 ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5214 ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5215 ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5216
5217 ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5218 ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5219 ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5220
5221 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5222 ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5223 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5224 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5225 ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5226 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5227 ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5228 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5229 ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5230 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5231 ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5232 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5233 ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5234 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5235 ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5236 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5237 ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5238 ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5239
5240 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5241 ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5242 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5243 ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5244 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5245 ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5246 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5247 ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5248 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5249 ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5250 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5251 ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5252 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5253 ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5254 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5255 ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5256 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5257 ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5258 ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5259 ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5260
5261 ps_ctxt->apf_chrm_resd_trns_had[0] =
5262 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5263 ps_ctxt->apf_chrm_resd_trns_had[1] =
5264 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5265 ps_ctxt->apf_chrm_resd_trns_had[2] =
5266 (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5267 }
5268
5269 if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5270 {
5271 /* initialise the scale & rescale matricies */
5272 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5273 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5274 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5275 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5276 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5277 /*init for inter matrix*/
5278 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5279 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5280 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5281 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5282 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5283
5284 /*init for rescale matrix*/
5285 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5286 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5287 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5288 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5289 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5290 /*init for rescale inter matrix*/
5291 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5292 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5293 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5294 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5295 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5296 }
5297 else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5298 {
5299 /* initialise the scale & rescale matricies */
5300 ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5301 ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5302 ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5303 ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5304 ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5305 /*init for inter matrix*/
5306 ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5307 ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5308 ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5309 ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5310 ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5311
5312 /*init for rescale matrix*/
5313 ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5314 ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5315 ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5316 ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5317 ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5318 /*init for rescale inter matrix*/
5319 ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5320 ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5321 ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5322 ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5323 ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5324 }
5325 else
5326 {
5327 ASSERT(0);
5328 }
5329
5330 /* Not recomputing Luma pred-data and header data for any preset now */
5331 ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5332 ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5333 ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5334
5335 switch(ps_ctxt->i4_quality_preset)
5336 {
5337 case IHEVCE_QUALITY_P0:
5338 {
5339 ps_ctxt->i4_max_merge_candidates = 5;
5340 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5341 ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5342 ps_ctxt->u1_use_early_cbf_data = 0;
5343 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5344 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5345 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5346
5347 break;
5348 }
5349 case IHEVCE_QUALITY_P2:
5350 {
5351 ps_ctxt->i4_max_merge_candidates = 5;
5352 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5353 ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5354 ps_ctxt->u1_use_early_cbf_data = 0;
5355
5356 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5357 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5358 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5359
5360 break;
5361 }
5362 case IHEVCE_QUALITY_P3:
5363 {
5364 ps_ctxt->i4_max_merge_candidates = 3;
5365 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5366 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5367
5368 ps_ctxt->u1_use_early_cbf_data = 0;
5369 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5370 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5371 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5372
5373 break;
5374 }
5375 case IHEVCE_QUALITY_P4:
5376 {
5377 ps_ctxt->i4_max_merge_candidates = 2;
5378 ps_ctxt->i4_use_satd_for_merge_eval = 1;
5379 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5380 ps_ctxt->u1_use_early_cbf_data = 0;
5381 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5382 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5383 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5384
5385 break;
5386 }
5387 case IHEVCE_QUALITY_P5:
5388 {
5389 ps_ctxt->i4_max_merge_candidates = 2;
5390 ps_ctxt->i4_use_satd_for_merge_eval = 0;
5391 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5392 ps_ctxt->u1_use_early_cbf_data = 0;
5393 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5394 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5395 ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5396
5397 break;
5398 }
5399 case IHEVCE_QUALITY_P6:
5400 {
5401 ps_ctxt->i4_max_merge_candidates = 2;
5402 ps_ctxt->i4_use_satd_for_merge_eval = 0;
5403 ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5404 ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5405 break;
5406 }
5407 default:
5408 {
5409 ASSERT(0);
5410 }
5411 }
5412
5413 #if DISABLE_SKIP_AND_MERGE_EVAL
5414 ps_ctxt->i4_max_merge_candidates = 0;
5415 #endif
5416
5417 ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5418 !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5419
5420 /*initialize memory for RC related parameters required/populated by enc_loop */
5421 /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5422 |-------|-> Thread 0, instance 0
5423 | |
5424 | |
5425 | |
5426 |-------|-> thread 0, instance 1
5427 | |
5428 | |
5429 | |
5430 |-------|-> thread 0, intance 2
5431 | |
5432 | |
5433 | |
5434 |-------|-> thread 1, instance 0
5435 | |
5436 | |
5437 | |
5438 |-------|-> thread 1, instance 1
5439 | |
5440 | |
5441 | |
5442 |-------|-> thread 1, instance 2
5443 ... ...
5444
5445 Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5446 Finally, one thread will become master and collate the data from all the threads */
5447 for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5448 {
5449 for(i = 0; i < i4_num_bitrate_inst; i++)
5450 {
5451 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5452 ps_enc_loop_rc_params++;
5453 }
5454 }
5455 /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5456
5457 #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5458 ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5459 #endif
5460
5461 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5462 MAX_TU_SIZE;
5463 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5464 MAX_TU_SIZE;
5465 /*Multiplying by two to account for interleaving of cb and cr*/
5466 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5467 << 1;
5468 ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5469 MAX_TU_SIZE << 1;
5470
5471 /* Memory for a frame level memory to store tile-id */
5472 /* corresponding to each CTB of frame */
5473 ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5474
5475 ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5476 /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5477 /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5478 if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5479 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5480 {
5481 UWORD32 psy_strength;
5482 UWORD32 psy_strength_mask =
5483 224; // only bits 5,6,7 are ones. These three bits represent the psy strength
5484 psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5485 ps_ctxt->u1_enable_psyRDOPT = 1;
5486 ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5487 if(psy_strength == 0)
5488 {
5489 ps_ctxt->u1_enable_psyRDOPT = 0;
5490 ps_ctxt->u4_psy_strength = 0;
5491 }
5492 }
5493
5494 ps_ctxt->u1_is_stasino_enabled =
5495 ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5496 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5497 (ps_init_prms->s_coding_tools_prms.i4_vqet &
5498 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5499
5500 ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5501 ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5502 ps_ctxt++;
5503 }
5504 /* Store Tile params base into EncLoop Master context */
5505 ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5506
5507 if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5508 {
5509 i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5510 }
5511
5512 /* Updating ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5513 /* Loop over all tile-cols in frame */
5514 for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5515 {
5516 WORD32 i4_tile_col_wd_in_ctb_unit =
5517 (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5518 WORD32 offset_x;
5519
5520 if(ctr == (i4_num_tile_cols - 1))
5521 { /* Last tile-row of frame */
5522 WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5523
5524 WORD32 cu_aligned_pic_wd =
5525 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5526 SET_CTB_ALIGN(
5527 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5528 min_cu_size);
5529
5530 WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5531
5532 offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5533 offset_x += last_hz_ctb_wd;
5534 }
5535 else
5536 { /* Not the last tile-row of frame */
5537 offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5538 }
5539
5540 offset_x /= 4;
5541 offset_x -= 1;
5542
5543 ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5544 }
5545
5546 n_tabs = NUM_ENC_LOOP_MEM_RECS;
5547
5548 /*store num bit-rate instances in the master context */
5549 ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5550 ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5551 /*************************************************************************/
5552 /* --- EncLoop Deblock and SAO sync Dep Mngr Mem init -- */
5553 /*************************************************************************/
5554 {
5555 WORD32 count;
5556 WORD32 num_vert_units, num_blks_in_row;
5557 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5558 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5559
5560 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5561 ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5562 ASSERT(num_vert_units > 0);
5563 ASSERT(num_blks_in_row > 0);
5564
5565 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5566 {
5567 for(i = 0; i < i4_num_bitrate_inst; i++)
5568 {
5569 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5570 &ps_mem_tab[n_tabs],
5571 pv_osal_handle,
5572 DEP_MNGR_ROW_ROW_SYNC,
5573 num_vert_units,
5574 num_blks_in_row,
5575 i4_num_tile_cols, /* Number of Col Tiles */
5576 i4_num_proc_thrds,
5577 0 /*Sem Disabled*/
5578 );
5579
5580 n_tabs += ihevce_dmgr_get_num_mem_recs();
5581 }
5582 }
5583
5584 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5585 {
5586 for(i = 0; i < i4_num_bitrate_inst; i++)
5587 {
5588 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[count][i] = ihevce_dmgr_init(
5589 &ps_mem_tab[n_tabs],
5590 pv_osal_handle,
5591 DEP_MNGR_ROW_ROW_SYNC,
5592 num_vert_units,
5593 num_blks_in_row,
5594 i4_num_tile_cols, /* Number of Col Tiles */
5595 i4_num_proc_thrds,
5596 0 /*Sem Disabled*/
5597 );
5598
5599 n_tabs += ihevce_dmgr_get_num_mem_recs();
5600 }
5601 }
5602 }
5603 /*************************************************************************/
5604 /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init -- */
5605 /*************************************************************************/
5606 {
5607 WORD32 count;
5608 WORD32 num_vert_units, num_blks_in_row;
5609 WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5610 WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5611
5612 WORD32 i4_sem = 0;
5613
5614 if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5615 IHEVCE_QUALITY_P4)
5616 i4_sem = 0;
5617 else
5618 i4_sem = 1;
5619 ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5620 /* For Top-Right CU sync, adding one more CTB since value updation */
5621 /* happens in that way for the last CTB in the row */
5622 num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5623 num_blks_in_row += MAX_CTB_SIZE;
5624
5625 ASSERT(num_vert_units > 0);
5626 ASSERT(num_blks_in_row > 0);
5627
5628 for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5629 {
5630 for(i = 0; i < i4_num_bitrate_inst; i++)
5631 {
5632 /* For ES/HS, CU level updates uses spin-locks than semaphore */
5633 {
5634 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5635 ihevce_dmgr_init(
5636 &ps_mem_tab[n_tabs],
5637 pv_osal_handle,
5638 DEP_MNGR_ROW_ROW_SYNC,
5639 num_vert_units,
5640 num_blks_in_row,
5641 i4_num_tile_cols, /* Number of Col Tiles */
5642 i4_num_proc_thrds,
5643 i4_sem /*Sem Disabled*/
5644 );
5645 }
5646 n_tabs += ihevce_dmgr_get_num_mem_recs();
5647 }
5648 }
5649 }
5650
5651 for(i = 1; i < 5; i++)
5652 {
5653 WORD32 i4_log2_trans_size = i + 1;
5654 WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5655
5656 ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5657 }
5658
5659 ga_trans_shift[0] = ga_trans_shift[1];
5660
5661 /* return the handle to caller */
5662 return ((void *)ps_master_ctxt);
5663 }
5664
5665 /*!
5666 ******************************************************************************
5667 * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5668 *
5669 * \brief
5670 * Intialization for ENC_LOOP context state structure .
5671 *
5672 * \param[in] ps_mem_tab : pointer to memory descriptors table
5673 * \param[in] ppv_sem_hdls : Array of semaphore handles
5674 * \param[in] i4_num_proc_thrds : Number of processing threads
5675 *
5676 * \return
5677 * None
5678 *
5679 * \author
5680 * Ittiam
5681 *
5682 *****************************************************************************
5683 */
ihevce_enc_loop_reg_sem_hdls(void * pv_enc_loop_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)5684 void ihevce_enc_loop_reg_sem_hdls(
5685 void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5686 {
5687 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5688 WORD32 i, enc_frm_id;
5689
5690 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5691
5692 /*************************************************************************/
5693 /* --- EncLoop Deblock and SAO sync Dep Mngr reg Semaphores -- */
5694 /*************************************************************************/
5695 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5696 {
5697 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5698 {
5699 ihevce_dmgr_reg_sem_hdls(
5700 ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5701 ppv_sem_hdls,
5702 i4_num_proc_thrds);
5703 }
5704 }
5705
5706 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5707 {
5708 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5709 {
5710 ihevce_dmgr_reg_sem_hdls(
5711 ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][i],
5712 ppv_sem_hdls,
5713 i4_num_proc_thrds);
5714 }
5715 }
5716
5717 /*************************************************************************/
5718 /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores -- */
5719 /*************************************************************************/
5720 for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5721 {
5722 for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5723 {
5724 ihevce_dmgr_reg_sem_hdls(
5725 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5726 ppv_sem_hdls,
5727 i4_num_proc_thrds);
5728 }
5729 }
5730
5731 return;
5732 }
5733
5734 /*!
5735 ******************************************************************************
5736 * \if Function name : ihevce_enc_loop_delete \endif
5737 *
5738 * \brief
5739 * Destroy EncLoop module
5740 * Note : Only Destroys the resources allocated in the module like
5741 * semaphore,etc. Memory free is done Separately using memtabs
5742 *
5743 * \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5744 *
5745 * \return
5746 * None
5747 *
5748 * \author
5749 * Ittiam
5750 *
5751 *****************************************************************************
5752 */
ihevce_enc_loop_delete(void * pv_enc_loop_ctxt)5753 void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5754 {
5755 ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5756 WORD32 ctr, enc_frm_id;
5757
5758 ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5759
5760 for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5761 {
5762 for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5763 {
5764 /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5765 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5766 /* --- EncLoop Sao sync Dep Mngr Delete --*/
5767 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][ctr]);
5768 /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5769 ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5770 }
5771 }
5772 }
5773
5774 /*!
5775 ******************************************************************************
5776 * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5777 *
5778 * \brief
5779 * Frame level Reset for the Dependency Mngrs local to EncLoop.,
5780 * ie CU_TopRight and Dblk
5781 *
5782 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
5783 *
5784 * \return
5785 * None
5786 *
5787 * \author
5788 * Ittiam
5789 *
5790 *****************************************************************************
5791 */
ihevce_enc_loop_dep_mngr_frame_reset(void * pv_enc_loop_ctxt,WORD32 enc_frm_id)5792 void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5793 {
5794 WORD32 ctr, frame_id;
5795 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5796
5797 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5798
5799 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5800 {
5801 frame_id = 0;
5802 }
5803 else
5804 {
5805 frame_id = enc_frm_id;
5806 }
5807
5808 for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5809 {
5810 /* Dep. Mngr : Reset the num ctb Deblocked in every row for ENC sync */
5811 ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5812
5813 /* Dep. Mngr : Reset the num SAO ctb in every row for ENC sync */
5814 ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[frame_id][ctr]);
5815
5816 /* Dep. Mngr : Reset the TopRight CU Processed in every row for ENC sync */
5817 ihevce_dmgr_rst_row_row_sync(
5818 ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5819 }
5820 }
5821
5822 /*!
5823 ******************************************************************************
5824 * \if Function name : ihevce_enc_loop_frame_init \endif
5825 *
5826 * \brief
5827 * Frame level init of enocde loop function .
5828 *
5829 * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
5830 * \param[in] pi4_cu_processed : ptr to cur frame cu process in pix.
5831 * \param[in] aps_ref_list : ref pic list for the current frame
5832 * \param[in] ps_slice_hdr : ptr to current slice header params
5833 * \param[in] ps_pps : ptr to active pps params
5834 * \param[in] ps_sps : ptr to active sps params
5835 * \param[in] ps_vps : ptr to active vps params
5836
5837
5838 * \param[in] i1_weighted_pred_flag : weighted pred enable flag (unidir)
5839 * \param[in] i1_weighted_bipred_flag : weighted pred enable flag (bidir)
5840 * \param[in] log2_luma_wght_denom : down shift factor for weighted pred of luma
5841 * \param[in] log2_chroma_wght_denom : down shift factor for weighted pred of chroma
5842 * \param[in] cur_poc : currennt frame poc
5843 * \param[in] i4_bitrate_instance_num : number indicating the instance of bit-rate for multi-rate encoder
5844 *
5845 * \return
5846 * None
5847 *
5848 * \author
5849 * Ittiam
5850 *
5851 *****************************************************************************
5852 */
ihevce_enc_loop_frame_init(void * pv_enc_loop_ctxt,WORD32 i4_frm_qp,recon_pic_buf_t * (* aps_ref_list)[HEVCE_MAX_REF_PICS * 2],recon_pic_buf_t * ps_frm_recon,slice_header_t * ps_slice_hdr,pps_t * ps_pps,sps_t * ps_sps,vps_t * ps_vps,WORD8 i1_weighted_pred_flag,WORD8 i1_weighted_bipred_flag,WORD32 log2_luma_wght_denom,WORD32 log2_chroma_wght_denom,WORD32 cur_poc,WORD32 i4_display_num,enc_ctxt_t * ps_enc_ctxt,me_enc_rdopt_ctxt_t * ps_curr_inp_prms,WORD32 i4_bitrate_instance_num,WORD32 i4_thrd_id,WORD32 i4_enc_frm_id,WORD32 i4_num_bitrates,WORD32 i4_quality_preset,void * pv_dep_mngr_encloop_dep_me)5853 void ihevce_enc_loop_frame_init(
5854 void *pv_enc_loop_ctxt,
5855 WORD32 i4_frm_qp,
5856 recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5857 recon_pic_buf_t *ps_frm_recon,
5858 slice_header_t *ps_slice_hdr,
5859 pps_t *ps_pps,
5860 sps_t *ps_sps,
5861 vps_t *ps_vps,
5862 WORD8 i1_weighted_pred_flag,
5863 WORD8 i1_weighted_bipred_flag,
5864 WORD32 log2_luma_wght_denom,
5865 WORD32 log2_chroma_wght_denom,
5866 WORD32 cur_poc,
5867 WORD32 i4_display_num,
5868 enc_ctxt_t *ps_enc_ctxt,
5869 me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5870 WORD32 i4_bitrate_instance_num,
5871 WORD32 i4_thrd_id,
5872 WORD32 i4_enc_frm_id,
5873 WORD32 i4_num_bitrates,
5874 WORD32 i4_quality_preset,
5875 void *pv_dep_mngr_encloop_dep_me)
5876 {
5877 /* local variables */
5878 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5879 ihevce_enc_loop_ctxt_t *ps_ctxt;
5880 WORD32 chroma_qp_offset, i4_div_factor;
5881 WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5882 WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5883
5884 /* ENC_LOOP master state structure */
5885 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5886
5887 /* Nithya: Store the current POC in the slice header */
5888 ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5889
5890 /* Update the POC list of the current frame to the recon buffer */
5891 if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5892 {
5893 int i4_i;
5894 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5895 {
5896 ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5897 }
5898 }
5899 if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5900 {
5901 int i4_i;
5902 for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5903 {
5904 ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5905 }
5906 }
5907
5908 /* loop over all the threads */
5909 // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5910 {
5911 /* ENC_LOOP state structure */
5912 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5913
5914 /* SAO ctxt structure initialization*/
5915 ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5916 ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5917 ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5918
5919 /*bit-rate instance number for Multi-bitrate (MBR) encode */
5920 ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5921 ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5922 ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5923 ps_ctxt->i4_is_first_query = 1;
5924 ps_ctxt->i4_is_ctb_qp_modified = 0;
5925
5926 /* enc_frm_id for multiframe encode */
5927
5928 if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5929 {
5930 ps_ctxt->i4_enc_frm_id = 0;
5931 i4_enc_frm_id = 0;
5932 }
5933 else
5934 {
5935 ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
5936 }
5937
5938 /*Initialize the sub pic rc buf appropriately */
5939
5940 /*Set the thrd id flag */
5941 ps_enc_ctxt->s_multi_thrd
5942 .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
5943
5944 ps_enc_ctxt->s_multi_thrd
5945 .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5946 ps_enc_ctxt->s_multi_thrd
5947 .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5948
5949 ps_enc_ctxt->s_multi_thrd
5950 .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5951 ps_enc_ctxt->s_multi_thrd
5952 .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5953
5954 ps_enc_ctxt->s_multi_thrd
5955 .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5956 ps_enc_ctxt->s_multi_thrd
5957 .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5958 ps_enc_ctxt->s_multi_thrd
5959 .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5960 ps_enc_ctxt->s_multi_thrd
5961 .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5962 ps_enc_ctxt->s_multi_thrd
5963 .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5964 ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
5965 i4_frm_qp;
5966
5967 /*Frame level data for Sub Pic rc is initalized here */
5968 /*Can be sent once per frame*/
5969 {
5970 WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
5971 ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
5972
5973 /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
5974 ps_ctxt->u4_total_cu_bits = 0;
5975 ps_ctxt->u4_total_cu_hdr_bits = 0;
5976
5977 ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
5978 ps_ctxt->u4_cu_tot_bits = 0;
5979 ps_ctxt->u4_total_cu_bits_mul_qs = 0;
5980 ps_ctxt->i4_display_num = i4_display_num;
5981 ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
5982 /*The Qscale is to be generated every 10th of total frame ctb is completed */
5983 //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
5984 ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
5985
5986 ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
5987 /*Sub Pic RC frame level params */
5988 ps_ctxt->i8_frame_l1_ipe_sad =
5989 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
5990 ps_ctxt->i8_frame_l0_ipe_satd =
5991 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
5992 ps_ctxt->i8_frame_l1_me_sad =
5993 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
5994 ps_ctxt->i8_frame_l1_activity_fact =
5995 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
5996 if(ps_ctxt->i4_sub_pic_level_rc)
5997 {
5998 ASSERT(
5999 ps_curr_inp_prms->ps_curr_inp->s_lap_out
6000 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6001
6002 ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6003 [ps_ctxt->i4_bitrate_instance_num] =
6004 ps_curr_inp_prms->ps_curr_inp->s_lap_out
6005 .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6006 }
6007 //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6008
6009 ps_ctxt->i4_is_I_scenecut =
6010 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6011 (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6012 ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6013
6014 ps_ctxt->i4_is_non_I_scenecut =
6015 ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6016 (ps_ctxt->i4_is_I_scenecut == 0));
6017
6018 /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6019 ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6020 ps_ctxt->i4_is_model_valid =
6021 ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6022 }
6023 /* cb and cr offsets are assumed to be same */
6024 chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6025
6026 /* assumption of cb = cr qp */
6027 ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6028 ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6029
6030 ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6031
6032 ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6033
6034 ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6035 ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6036
6037 /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6038 ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6039 ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6040 ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6041
6042 ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6043 ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6044 ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6045 ps_ctxt->i4_use_const_lamda_modifier =
6046 ps_ctxt->i4_use_const_lamda_modifier ||
6047 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6048 (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6049 ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6050 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6051 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6052 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6053 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6054 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6055 (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6056 (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6057
6058 {
6059 ps_ctxt->f_i_pic_lamda_modifier =
6060 ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6061 }
6062
6063 ps_ctxt->i4_frame_qp = i4_frm_qp;
6064 ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6065 ps_ctxt->i4_cu_qp = i4_frm_qp;
6066 ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6067 ps_ctxt->i4_chrm_cu_qp =
6068 (ps_ctxt->u1_chroma_array_type == 2)
6069 ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6070 : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6071
6072 ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6073 i4_div_factor = (i4_frm_qp + 3) / 6;
6074 i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6075 ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6076
6077 ps_ctxt->i4_chrm_cu_qp_div6 =
6078 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6079 ps_ctxt->i4_chrm_cu_qp_mod6 =
6080 (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6081
6082 #define INTER_RND_QP_BY_6
6083 #ifdef INTER_RND_QP_BY_6
6084
6085 { /*1/6 rounding for 8 bit b frames*/
6086 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6087 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6088 }
6089 #else
6090 /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6091 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6092 #endif
6093
6094 if(ISLICE == i1_slice_type)
6095 {
6096 /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6097 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6098 /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6099 }
6100 else
6101 {
6102 /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6103 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6104 ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6105 /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6106 }
6107
6108 ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6109
6110 ps_ctxt->i1_slice_type = i1_slice_type;
6111
6112 /* intialize the inter pred (MC) context at frame level */
6113 ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6114 ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6115 ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6116 ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6117 ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6118
6119 /* intialize the MV pred context at frame level */
6120 ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6121 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6122 ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6123 ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6124 ps_pps->i1_log2_parallel_merge_level - 2;
6125
6126 #if ADAPT_COLOCATED_FROM_L0_FLAG
6127 if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6128 {
6129 if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6130 (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6131 ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6132 {
6133 ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6134 }
6135 }
6136 #endif
6137 /* Initialization of deblocking params */
6138 ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6139 ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6140
6141 ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6142
6143 ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6144 /*init frame level stat accumualtion parameters */
6145 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6146 ->u4_frame_sad_acc = 0;
6147 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6148 ->u4_frame_intra_sad_acc = 0;
6149 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6150 ->u4_frame_open_loop_intra_sad = 0;
6151 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6152 ->i8_frame_open_loop_ssd = 0;
6153 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6154 ->u4_frame_inter_sad_acc = 0;
6155
6156 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6157 ->i8_frame_cost_acc = 0;
6158 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6159 ->i8_frame_intra_cost_acc = 0;
6160 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6161 ->i8_frame_inter_cost_acc = 0;
6162
6163 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6164 ->u4_frame_intra_sad = 0;
6165 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6166 ->u4_frame_rdopt_bits = 0;
6167 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6168 ->u4_frame_rdopt_header_bits = 0;
6169 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6170 ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6171 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6172 ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6173 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6174 ->i4_8x8_cu_sum[0] = 0;
6175 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6176 ->i4_8x8_cu_sum[1] = 0;
6177 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6178 ->i8_sad_by_qscale[0] = 0;
6179 ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6180 ->i8_sad_by_qscale[1] = 0;
6181 /* Compute the frame_qstep */
6182 GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6183
6184 ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6185
6186 ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6187 /* intialize the cabac rdopt context at frame level */
6188 ihevce_entropy_rdo_frame_init(
6189 &ps_ctxt->s_rdopt_entropy_ctxt,
6190 ps_slice_hdr,
6191 ps_pps,
6192 ps_sps,
6193 ps_vps,
6194 ps_master_ctxt->au1_cu_skip_top_row,
6195 &ps_enc_ctxt->s_rc_quant);
6196
6197 /* register the dep mngr instance for forward ME sync */
6198 ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6199 }
6200 }
6201 /*
6202 ******************************************************************************
6203 * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6204 *
6205 * \brief
6206 * returns Nil
6207 *
6208 * \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6209 * \param[out]ps_rc_prms : ptr to frame level info structure
6210 *
6211 * \return
6212 * None
6213 *
6214 * \author
6215 * Ittiam
6216 *
6217 *****************************************************************************
6218 */
ihevce_enc_loop_get_frame_rc_prms(void * pv_enc_loop_ctxt,rc_bits_sad_t * ps_rc_prms,WORD32 i4_br_id,WORD32 i4_enc_frm_id)6219 void ihevce_enc_loop_get_frame_rc_prms(
6220 void *pv_enc_loop_ctxt,
6221 rc_bits_sad_t *ps_rc_prms,
6222 WORD32 i4_br_id, //bitrate instance id
6223 WORD32 i4_enc_frm_id) // frame id
6224 {
6225 /*Get the master thread pointer*/
6226 ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6227 ihevce_enc_loop_ctxt_t *ps_ctxt;
6228 UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6229 LWORD64 i8_total_ssd_frame = 0;
6230 UWORD32 total_frame_sad = 0;
6231 UWORD32 total_frame_rdopt_bits = 0;
6232 UWORD32 total_frame_rdopt_header_bits = 0;
6233 WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6234 WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6235 LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6236 WORD32 i4_curr_qp_acc = 0;
6237 WORD32 i;
6238
6239 /* ENC_LOOP master state structure */
6240 ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6241
6242 if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6243 {
6244 i4_enc_frm_id = 0;
6245 }
6246 /*loop through all threads and accumulate intra sad across all threads*/
6247 for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6248 {
6249 /* ENC_LOOP state structure */
6250 ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6251 total_frame_open_loop_intra_sad +=
6252 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6253 i8_total_ssd_frame +=
6254 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6255 total_frame_intra_sad +=
6256 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6257 total_frame_sad +=
6258 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6259 total_frame_rdopt_bits +=
6260 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6261 total_frame_rdopt_header_bits +=
6262 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6263 i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6264 ->i4_qp_normalized_8x8_cu_sum[0];
6265 i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6266 ->i4_qp_normalized_8x8_cu_sum[1];
6267 i4_8x8_cu_sum[0] +=
6268 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6269 i4_8x8_cu_sum[1] +=
6270 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6271 i8_sad_by_qscale[0] +=
6272 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6273 i8_sad_by_qscale[1] +=
6274 ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6275 }
6276
6277 ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6278 ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6279 ps_rc_prms->u4_total_sad = total_frame_sad;
6280 ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6281 ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6282 /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6283 ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6284 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6285 ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6286 ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6287 ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6288 ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6289 ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6290 }
6291