xref: /aosp_15_r20/external/libhevc/encoder/ihevce_enc_structs.h (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_enc_structs.h
23 *
24 * \brief
25 *    This file contains structure definations of Encoder
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _IHEVCE_ENC_STRUCTS_H_
37 #define _IHEVCE_ENC_STRUCTS_H_
38 
39 /*****************************************************************************/
40 /* Constant Macros                                                           */
41 /*****************************************************************************/
42 #define HEVCE_MAX_WIDTH 1920
43 #define HEVCE_MAX_HEIGHT 1088
44 
45 #define HEVCE_MIN_WIDTH 64
46 #define HEVCE_MIN_HEIGHT 64
47 
48 #define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE)
49 #define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE)
50 
51 #define MIN_VERT_PROC_UNIT (8)
52 #define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT)
53 
54 #define HEVCE_MAX_REF_PICS 8
55 #define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1)
56 
57 #define PAD_HORZ 80
58 #define PAD_VERT 80
59 
60 #define DEFAULT_MAX_REFERENCE_PICS 4
61 
62 #define BLU_RAY_SUPPORT 231457
63 
64 /** @brief max number of parts in minCU : max 4 for NxN */
65 #define NUM_PU_PARTS 4
66 /** @brief max number of parts in Inter CU */
67 #define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS)
68 #define SEND_BI_RDOPT
69 #ifdef SEND_BI_RDOPT
70 /** @brief */
71 #define MAX_INTER_CU_CANDIDATES 4
72 #else
73 /** @brief */
74 #define MAX_INTER_CU_CANDIDATES 3
75 #endif
76 /** @brief */
77 #define MAX_INTRA_CU_CANDIDATES 3
78 
79 #define MAX_INTRA_CANDIDATES 35
80 
81 /** For each resolution & bit-rate instance, one entropy thread is created */
82 #define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES)
83 
84 /* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */
85 #define NUM_BUFS_DECOMP_HME 1
86 
87 /** Macro to indicate pre me and L0 ipe stagger in pre enc*/
88 /** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/
89 #define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ)
90 
91 #define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL)
92 
93 #define MIN_L0_IPE_ENC_STAGGER 1
94 
95 /*stagger between L0 IPE and enc*/
96 #define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER))
97 
98 #define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
99 
100 #define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
101 
102 /** @brief number of ctb contexts maintained at frame level b/w encode : entropy */
103 #define NUM_FRMPROC_ENTCOD_BUFS 1
104 
105 /** @brief number of extra recon buffs required for stagger design*/
106 #define NUM_EXTRA_RECON_BUFS 0
107 
108 /** recon picture buffer size need to be increased to support EncLoop Parallelism **/
109 #define NUM_EXTRA_RECON_BUFS_FOR_ELP 0
110 
111 /** @brief maximum number of bytes in 4x4 afetr scanning */
112 #define MAX_SCAN_COEFFS_BYTES_4x4 (48)
113 
114 /** @brief maximum number of luma coeffs bytes after scan at CTB level  */
115 #define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4)
116 
117 /** @brief maximum number of chroma coeffs bytes after scan at CTB level  */
118 #define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4)
119 
120 /** @brief maximum number of coeffs bytes after scan at CTB level  */
121 #define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB))
122 
123 /** @breif PU map CTB buffer buyes for neighbour availibility */
124 #define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW)
125 
126 /** @brief tottal system memory records */
127 #define TOTAL_SYSTEM_MEM_RECS 120
128 
129 /** @brief number of input async command buffers */
130 #define NUM_AYSNC_CMD_BUFS 4
131 
132 /** @brief Comand buffers size */
133 #define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */
134 
135 /** @brief Number of output buffers */
136 #define NUM_OUTPUT_BUFS 4
137 
138 /** @brief Lamda for SATD cost estimation */
139 #define LAMDA_SATD 1
140 
141 /** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */
142 #define MAX_GT_ONE 8
143 
144 /** MAX num ipntra pred modes */
145 #define MAX_NUM_IP_MODES 35
146 
147 /** Number of best intra modes used for intra mode refinement */
148 #define NUM_BEST_MODES 3
149 
150 /** Maximim number of parallel frame processing threads in pre enocde group */
151 #define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES
152 
153 /** Maximim number of parallel frame processing threads in encode group */
154 #define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES
155 
156 /** Macro to indicate teh PING_PONG buffers for stagger*/
157 #define PING_PONG_BUF 2
158 
159 /** Max number of layers in Motion estimation
160  * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h
161  */
162 
163 #define MAX_NUM_HME_LAYERS 5
164 /**
165 ******************************************************************************
166  *  @brief      Maximum number of layers allowed
167 ******************************************************************************
168  */
169 #define MAX_NUM_LAYERS 4
170 
171 #define NUM_RC_PIC_TYPE 9
172 
173 #define MAX_NUM_NODES_CU_TREE (85)
174 
175 /* macros to control Dynamic load balance */
176 #define DYN_LOAD_BAL_UPPER_LIMIT 0.80
177 
178 #define DYN_LOAD_BAL_LOWER_LIMIT 0.20
179 
180 #define NUM_SUB_GOP_DYN_BAL 1
181 
182 #define MIN_NUM_FRMS_DYN_BAL 4
183 
184 #define CORES_SRES_OR_MRES 2
185 
186 #define HME_HIGH_SAD_BLK_THRESH 35
187 
188 /* Enable to compare cabac states of final entropy thread with enc loop states */
189 #define VERIFY_ENCLOOP_CABAC_STATES 0
190 
191 #define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */
192 
193 /*****************************************************************************/
194 /* Function Macros                                                           */
195 /*****************************************************************************/
196 
197 /*****************************************************************************/
198 /* Typedefs                                                                  */
199 /*****************************************************************************/
200 typedef void (*pf_iq_it_rec)(
201     WORD16 *pi2_src,
202     WORD16 *pi2_tmp,
203     UWORD8 *pu1_pred,
204     WORD16 *pi2_dequant_coeff,
205     UWORD8 *pu1_dst,
206     WORD32 qp_div, /* qpscaled / 6 */
207     WORD32 qp_rem, /* qpscaled % 6 */
208     WORD32 src_strd,
209     WORD32 pred_strd,
210     WORD32 dst_strd,
211     WORD32 zero_cols,
212     WORD32 zero_rows);
213 
214 typedef void (*pf_intra_pred)(
215     UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode);
216 
217 typedef UWORD32 (*pf_res_trans_luma)(
218     UWORD8 *pu1_src,
219     UWORD8 *pu1_pred,
220     WORD32 *pi4_tmp,
221     WORD16 *pi2_dst,
222     WORD32 src_strd,
223     WORD32 pred_strd,
224     WORD32 dst_strd,
225     CHROMA_PLANE_ID_T e_chroma_plane);
226 
227 typedef WORD32 (*pf_quant)(
228     WORD16 *pi2_coeffs,
229     WORD16 *pi2_quant_coeff,
230     WORD16 *pi2_dst,
231     WORD32 qp_div, /* qpscaled / 6 */
232     WORD32 qp_rem, /* qpscaled % 6 */
233     WORD32 q_add,
234     WORD32 src_strd,
235     WORD32 dst_strd,
236     UWORD8 *pu1_csbf_buf,
237     WORD32 csbf_strd,
238     WORD32 *zero_cols,
239     WORD32 *zero_row);
240 
241 /*****************************************************************************/
242 /* Enums                                                                     */
243 /*****************************************************************************/
244 /// supported partition shape
245 typedef enum
246 {
247     SIZE_2Nx2N = 0,  ///< symmetric motion partition,  2Nx2N
248     SIZE_2NxN = 1,  ///< symmetric motion partition,  2Nx N
249     SIZE_Nx2N = 2,  ///< symmetric motion partition,   Nx2N
250     SIZE_NxN = 3,  ///< symmetric motion partition,   Nx N
251     SIZE_2NxnU = 4,  ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2)
252     SIZE_2NxnD = 5,  ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
253     SIZE_nLx2N = 6,  ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
254     SIZE_nRx2N = 7  ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
255 } PART_SIZE_E;
256 
257 /** @brief  Interface level Queues of Encoder */
258 
259 typedef enum
260 {
261     IHEVCE_INPUT_DATA_CTRL_Q = 0,
262     IHEVCE_ENC_INPUT_Q,
263     IHEVCE_INPUT_ASYNCH_CTRL_Q,
264     IHEVCE_OUTPUT_DATA_Q,
265     IHEVCE_OUTPUT_STATUS_Q,
266     IHEVCE_RECON_DATA_Q,  //   /*que for holding recon buffer */
267 
268     IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */
269 
270     IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */
271 
272     IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */
273 
274     IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/
275 
276     /* should be last entry */
277     IHEVCE_MAX_NUM_QUEUES
278 
279 } IHEVCE_Q_DESC_T;
280 
281 /*****************************************************************************/
282 /* Structure                                                                 */
283 /*****************************************************************************/
284 
285 /**
286 RC_QP_QSCALE conversion structures
287 **/
288 typedef struct
289 {
290     WORD16 i2_min_qp;
291 
292     WORD16 i2_max_qp;
293 
294     WORD16 i2_min_qscale;
295 
296     WORD16 i2_max_qscale;
297 
298     WORD32 *pi4_qscale_to_qp;
299 
300     WORD32 *pi4_qp_to_qscale_q_factor;
301 
302     WORD32 *pi4_qp_to_qscale;
303 
304     WORD8 i1_qp_offset;
305 
306 } rc_quant_t;
307 
308 /**
309 ******************************************************************************
310  *  @brief     4x4 level structure which contains all the parameters
311  *             for neighbour prediction puopose
312 ******************************************************************************
313  */
314 typedef struct
315 {
316     /** PU motion vectors */
317     pu_mv_t mv;
318     /** Intra or Inter flag for each partition - 0 or 1  */
319     UWORD16 b1_intra_flag : 1;
320     /** CU skip flag - 0 or 1  */
321     UWORD16 b1_skip_flag : 1;
322     /** CU depth in CTB tree (0-3)  */
323     UWORD16 b2_cu_depth : 2;
324 
325     /** Y Qp  for loop filter */
326     WORD16 b8_qp : 8;
327 
328     /** Luma Intra Mode 0 - 34   */
329     UWORD16 b6_luma_intra_mode : 6;
330 
331     /** Y CBF  for BS compute */
332     UWORD16 b1_y_cbf : 1;
333     /** Pred L0 flag of current 4x4 */
334     UWORD16 b1_pred_l0_flag : 1;
335 
336     /** Pred L0 flag of current 4x4 */
337     UWORD16 b1_pred_l1_flag : 1;
338 } nbr_4x4_t;
339 
340 typedef struct
341 {
342     /** Bottom Left availability flag */
343     UWORD8 u1_bot_lt_avail;
344 
345     /** Left availability flag */
346     UWORD8 u1_left_avail;
347 
348     /** Top availability flag */
349     UWORD8 u1_top_avail;
350 
351     /** Top Right availability flag */
352     UWORD8 u1_top_rt_avail;
353 
354     /** Top Left availability flag */
355     UWORD8 u1_top_lt_avail;
356 
357 } nbr_avail_flags_t;
358 
359 typedef struct
360 {
361     /** prev intra flag*/
362     UWORD8 b1_prev_intra_luma_pred_flag : 1;
363 
364     /** mpm_idx */
365     UWORD8 b2_mpm_idx : 2;
366 
367     /** reminder pred mode */
368     UWORD8 b5_rem_intra_pred_mode : 5;
369 
370 } intra_prev_rem_flags_t;
371 
372 /**
373 ******************************************************************************
374  *  @brief     calc (T+Q+RDOQ) output TU structure; entropy input TU structure
375 ******************************************************************************
376  */
377 typedef struct
378 {
379     /** base tu structure */
380     tu_t s_tu;
381 
382     /** offset of luma data in ecd buffer */
383     WORD32 i4_luma_coeff_offset;
384 
385     /** offset of cb data in ecd buffer */
386     WORD32 ai4_cb_coeff_offset[2];
387 
388     /** offset of cr data in ecd buffer */
389     WORD32 ai4_cr_coeff_offset[2];
390 
391 } tu_enc_loop_out_t;
392 
393 typedef struct
394 {
395     /* L0 Motion Vector */
396     mv_t s_l0_mv;
397 
398     /* L1 Motion Vector */
399     mv_t s_l1_mv;
400 
401     /* L0 Ref index */
402     WORD8 i1_l0_ref_idx;
403 
404     /*  L1 Ref index */
405     WORD8 i1_l1_ref_idx;
406 
407     /* L0 Ref Pic Buf ID */
408     WORD8 i1_l0_pic_buf_id;
409 
410     /* L1 Ref Pic Buf ID */
411     WORD8 i1_l1_pic_buf_id;
412 
413     /** intra flag */
414     UWORD8 b1_intra_flag : 1;
415 
416     /* Pred mode */
417     UWORD8 b2_pred_mode : 2;
418 
419     /* reserved flag can be used for something later */
420     UWORD8 u1_reserved;
421 
422 } pu_col_mv_t;
423 
424 /*****************************************************************************/
425 /* Encoder uses same structure as pu_t for prediction unit                   */
426 /*****************************************************************************/
427 
428 /**
429 ******************************************************************************
430  *  @brief     Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure
431 ******************************************************************************
432  */
433 typedef struct
434 {
435     /* CU X position in terms of min CU (8x8) units */
436     UWORD32 b3_cu_pos_x : 3;
437 
438     /* CU Y position in terms of min CU (8x8) units */
439     UWORD32 b3_cu_pos_y : 3;
440 
441     /** CU size in terms of min CU (8x8) units */
442     UWORD32 b4_cu_size : 4;
443 
444     /** transquant bypass flag ; 0 for this encoder */
445     UWORD32 b1_tq_bypass_flag : 1;
446 
447     /** cu skip flag */
448     UWORD32 b1_skip_flag : 1;
449 
450     /** intra / inter CU flag */
451     UWORD32 b1_pred_mode_flag : 1;
452 
453     /** indicates partition information for CU
454      *  For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize
455      *  For inter 0 : @sa PART_SIZE_E
456      */
457     UWORD32 b3_part_mode : 3;
458 
459     /** 0 for this encoder */
460     UWORD32 b1_pcm_flag : 1;
461 
462     /** only applicable for intra cu */
463     UWORD32 b3_chroma_intra_pred_mode : 3;
464 
465     /** no residue flag for cu */
466     UWORD32 b1_no_residual_syntax_flag : 1;
467 
468     /* flag to indicate if current CU is the first
469     CU of the Quantisation group*/
470     UWORD32 b1_first_cu_in_qg : 1;
471 
472     /** Intra prev and reminder flags
473      * if part is NxN the tntries 1,2,3 will be valid
474      * other wise only enry 0 will be set.
475      */
476     intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS];
477 
478     /**
479      *  Access valid  number of pus in this array based on u1_part_mode
480      *  Moiton vector differentials and reference idx should be
481      *  populated in this structure
482      *  @remarks shall be accessed only for inter pus
483      */
484     pu_t *ps_pu;
485 
486     /**
487      *  pointer to first tu of this cu. Each TU need to be populated
488      *  in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu
489      */
490     tu_enc_loop_out_t *ps_enc_tu;
491 
492     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
493     UWORD16 u2_num_tus_in_cu;
494 
495     /** Coeff bufer pointer */
496     /* Pointer to transform coeff data */
497     /*************************************************************************/
498     /* Following format is repeated for every coded TU                       */
499     /* Luma Block                                                            */
500     /* num_coeffs      : 16 bits                                             */
501     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
502     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
503     /* coeff_data      : Non zero coefficients                               */
504     /* Cb Block (only for last TU in 4x4 case else for every luma TU)        */
505     /* num_coeffs      : 16 bits                                             */
506     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
507     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
508     /* coeff_data      : Non zero coefficients                               */
509     /* Cr Block (only for last TU in 4x4 case else for every luma TU)        */
510     /* num_coeffs      : 16 bits                                             */
511     /* zero_cols       : 8 bits ( 1 bit per 4 columns)                       */
512     /* sig_coeff_map   : ((TU Size * TU Size) + 31) >> 5 number of WORD32s   */
513     /* coeff_data      : Non zero coefficients                               */
514     /*************************************************************************/
515     void *pv_coeff;
516 
517     /** qp used during for CU
518       * @remarks :
519       */
520     WORD8 i1_cu_qp;
521 
522 } cu_enc_loop_out_t;
523 
524 /**
525  * SAO
526  */
527 typedef struct
528 {
529     /**
530      * sao_type_idx_luma
531      */
532     UWORD32 b3_y_type_idx : 3;
533 
534     /**
535      * luma sao_band_position
536      */
537     UWORD32 b5_y_band_pos : 5;
538 
539     /**
540      * sao_type_idx_chroma
541      */
542     UWORD32 b3_cb_type_idx : 3;
543 
544     /**
545      * cb sao_band_position
546      */
547     UWORD32 b5_cb_band_pos : 5;
548 
549     /**
550      * sao_type_idx_chroma
551      */
552     UWORD32 b3_cr_type_idx : 3;
553 
554     /**
555      * cb sao_band_position
556      */
557     UWORD32 b5_cr_band_pos : 5;
558 
559     /*SAO Offsets
560      * In all these offsets, 0th element is not used
561      */
562     /**
563      * luma SaoOffsetVal[i]
564      */
565     WORD8 u1_y_offset[5];
566 
567     /**
568      * chroma cb SaoOffsetVal[i]
569      */
570     WORD8 u1_cb_offset[5];
571 
572     /**
573      * chroma cr SaoOffsetVal[i]
574      */
575     WORD8 u1_cr_offset[5];
576 
577     /**
578      * sao_merge_left_flag common for y,cb,cr
579      */
580     UWORD32 b1_sao_merge_left_flag : 1;
581 
582     /**
583      * sao_merge_up_flag common for y,cb,cr
584      */
585     UWORD32 b1_sao_merge_up_flag : 1;
586 
587 } sao_enc_t;
588 
589 /**
590 ******************************************************************************
591  *  @brief       ctb output structure; output of Encode loop, input to entropy
592 ******************************************************************************
593  */
594 typedef struct
595 {
596     /**
597      * bit0     :  depth0 split flag, (64x64 splits)
598      * bits 1-3 :  not used
599      * bits 4-7 :  depth1 split flags; valid iff depth0 split=1 (32x32 splits)
600      * bits 8-23:  depth2 split flags; (if 0 16x16 is cu else 8x8 min cu)
601 
602      * if a split flag of n is set for depth 1, check the following split flags
603      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
604      *
605      */
606     UWORD32 u4_cu_split_flags;
607 
608     /***************************************************************
609      * For any given CU position CU_posx, CU_posy access
610      *  au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)]
611      * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0]
612      ****************************************************************/
613 
614     /**
615      * access bits corresponding to actual CU size till leaf nodes
616      * bit0     :  (32x32 TU split flag)
617      * bits 1-3 :  not used
618      * bits 4-7 :  (16x16 TUsplit flags)
619      * bits 8-23:  (8x8  TU split flags)
620 
621      * if a split flag of n is set for depth 1, check the following split flags
622      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
623      *
624      * @remarks     As tu sizes are relative to CU sizes the producer has to
625      * make sure the correctness of u4_packed_tu_split_flags.
626      *
627      * @remarks     au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only
628      *              for 64x64 ctb.
629      */
630     UWORD32 au4_packed_tu_split_flags_cu[4];
631 
632     /**
633      *  pointer to first CU of CTB. Each CU need to be populated
634      *  in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb
635      */
636     cu_enc_loop_out_t *ps_enc_cu;
637 
638     /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
639     UWORD8 u1_num_cus_in_ctb;
640 
641     /** CTB neighbour availability flags */
642     nbr_avail_flags_t s_ctb_nbr_avail_flags;
643 
644     /* SAO parameters of the CTB */
645     sao_enc_t s_sao;
646 
647 } ctb_enc_loop_out_t;
648 
649 /**
650 ******************************************************************************
651  *  @brief      cu inter candidate for encoder
652 ******************************************************************************
653  */
654 typedef struct
655 {
656     /** base pu structure
657      *  access valid  number of entries in this array based on u1_part_size
658      */
659     pu_t as_inter_pu[NUM_INTER_PU_PARTS];
660 
661     /* TU split flag : tu_split_flag[0] represents the transform splits
662      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
663      *  to respective 32x32  */
664     /* For a 8x8 TU - 1 bit used to indicate split */
665     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
666     /* For a 32x32 TU - See above */
667     WORD32 ai4_tu_split_flag[4];
668 
669     /* TU split flag : tu_split_flag[0] represents the transform splits
670      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
671      *  to respective 32x32  */
672     /* For a 8x8 TU - 1 bit used to indicate split */
673     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
674     /* For a 32x32 TU - See above */
675     WORD32 ai4_tu_early_cbf[4];
676 
677     /**Pointer to the buffer having predicted data after mc in SATD stage
678      * Since we have 2 buffers for each candidate pred data for best merge candidate
679      * can be in one of the 2 buffers.
680      */
681     UWORD8 *pu1_pred_data;
682 
683     UWORD16 *pu2_pred_data;
684 
685     UWORD8 *pu1_pred_data_scr;
686 
687     UWORD16 *pu2_pred_data_src;
688 
689     /* Total cost: SATD cost + MV cost */
690     WORD32 i4_total_cost;
691 
692     /** Stride for predicted data*/
693     WORD32 i4_pred_data_stride;
694 
695     /** @remarks u1_part_size can be non square only for  Inter   */
696     UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */
697 
698     /** evaluate transform for cusize iff this flag is 1 */
699     /** this flag should be set 0 if CU is 64x64         */
700     UWORD8 b1_eval_tx_cusize : 1;
701 
702     /** evaluate transform for cusize/2 iff this flag is 1 */
703     UWORD8 b1_eval_tx_cusize_by2 : 1;
704 
705     /** Skip Flag : ME should always set this 0 for the candidates */
706     UWORD8 b1_skip_flag : 1;
707 
708     UWORD8 b1_intra_has_won : 1;
709 
710     /* used to mark if this mode needs to be evaluated in auxiliary mode */
711     /* if 1, this mode will be evaluated otherwise not.*/
712     UWORD8 b1_eval_mark : 1;
713 
714 } cu_inter_cand_t;
715 
716 /**
717 ******************************************************************************
718  *  @brief      cu intra candidate for encoder
719 ******************************************************************************
720  */
721 typedef struct
722 {
723     UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES];
724 
725     /**
726      *  List of NxN PU candidates in CU  for each partition
727      *  valid only of if current cusize = mincusize
728      * +1 to signal the last flag invalid value of 255 needs to be stored
729      */
730     UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1];
731 
732     /* used to mark if this mode needs to be evaluated in auxiliary mode */
733     /* if 1, this mode will be evaluated otherwise not.*/
734     UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1];
735 
736     /**
737      *  List of 2Nx2N PU candidates in CU
738      * +1 to signal the last flag invalid value of 255 needs to be stored
739      */
740     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1];
741 
742     /**
743      *  List of 2Nx2N PU candidates in CU
744      * +1 to signal the last flag invalid value of 255 needs to be stored
745      */
746     UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1];
747 
748     /* used to mark if this mode needs to be evaluated in auxiliary mode */
749     /* if 1, this mode will be evaluated otherwise not.*/
750     UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
751 
752     /* used to mark if this mode needs to be evaluated in auxiliary mode */
753     /* if 1, this mode will be evaluated otherwise not.*/
754     UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
755 
756     UWORD8 au1_num_modes_added[NUM_PU_PARTS];
757 
758     /** evaluate transform for cusize iff this flag is 1 */
759     /** this flag should be set 0 if CU is 64x64         */
760     UWORD8 b1_eval_tx_cusize : 1;
761 
762     /** evaluate transform for cusize/2 iff this flag is 1 */
763     UWORD8 b1_eval_tx_cusize_by2 : 1;
764 
765     /** number of intra candidates for SATD evaluation in */
766     UWORD8 b6_num_intra_cands : 6;
767 
768 } cu_intra_cand_t;
769 
770 /**
771 ******************************************************************************
772  *  @brief      cu structure for mode analysis/evaluation
773 ******************************************************************************
774  */
775 typedef struct
776 {
777     /** CU X position in terms of min CU (8x8) units */
778     UWORD8 b3_cu_pos_x : 3;
779 
780     /** CU Y position in terms of min CU (8x8) units */
781     UWORD8 b3_cu_pos_y : 3;
782 
783     /** reserved bytes */
784     UWORD8 b2_reserved : 2;
785 
786     /** CU size 2N (width or height) in pixels */
787     UWORD8 u1_cu_size;
788 
789     /** Intra CU candidates after FAST CU decision (output of IPE)
790      *  8421 algo along with transform size evalution will
791      *  be done for these modes in Encode loop pass.
792      */
793     cu_intra_cand_t s_cu_intra_cand;
794 
795     /** indicates the angular mode (0 - 34) for chroma,
796      *  Note : No provision currently to take chroma through RDOPT or SATD
797      */
798     UWORD8 u1_chroma_intra_pred_mode;
799 
800     /** number of inter candidates in as_cu_inter_cand[]
801       * shall be 0 for intra frames.
802       * These inters are evaluated for RDOPT apart from merge/skip candidates
803       */
804     UWORD8 u1_num_inter_cands;
805 
806     /** List of candidates to be evalauted (SATD/RDOPT) for this CU
807       * @remarks : all  merge/skip candidates not a part of this list
808       */
809     cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES];
810 
811     WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
812 
813 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
814     WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
815 #endif
816 
817     /* Flag to convey if Inta or Inter is the best candidate among the
818     candidates populated
819      0: If inter is the winner and 1: if Intra is winner*/
820     UWORD8 u1_best_is_intra;
821 
822     /** number of intra rdopt candidates
823       * @remarks : shall be <= u1_num_intra_cands
824       */
825     UWORD8 u1_num_intra_rdopt_cands;
826     /** qp used during for CU
827       * @remarks :
828       */
829     WORD8 i1_cu_qp;
830     /** Activity factor used in pre enc thread for deriving the Qp
831       * @remarks : This is in Q format
832       */
833     WORD32 i4_act_factor[4][2];
834 
835 } cu_analyse_t;
836 
837 /**
838 ******************************************************************************
839  *  @brief      Structure for CU recursion
840 ******************************************************************************
841  */
842 typedef struct cur_ctb_cu_tree_t
843 {
844     /** CU X position in terms of min CU (8x8) units */
845     UWORD8 b3_cu_pos_x : 3;
846 
847     /** CU X position in terms of min CU (8x8) units */
848     UWORD8 b3_cu_pos_y : 3;
849 
850     /** reserved bytes */
851     UWORD8 b2_reserved : 2;
852 
853     UWORD8 u1_cu_size;
854 
855     UWORD8 u1_intra_eval_enable;
856 
857     UWORD8 u1_inter_eval_enable;
858 
859     /* Flag that indicates whether to evaluate this node */
860     /* during RDOPT evaluation. This does not mean that */
861     /* evaluation of the children need to be abandoned */
862     UWORD8 is_node_valid;
863 
864     LWORD64 i8_best_rdopt_cost;
865 
866     struct cur_ctb_cu_tree_t *ps_child_node_tl;
867 
868     struct cur_ctb_cu_tree_t *ps_child_node_tr;
869 
870     struct cur_ctb_cu_tree_t *ps_child_node_bl;
871 
872     struct cur_ctb_cu_tree_t *ps_child_node_br;
873 
874 } cur_ctb_cu_tree_t;
875 
876 typedef struct
877 {
878     WORD32 num_best_results;
879 
880     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
881 
882 } block_data_32x32_t;
883 
884 /**
885 ******************************************************************************
886  *  @brief      Structure for storing data about all the 64x64
887  *              block in a 64x64 CTB
888 ******************************************************************************
889  */
890 typedef block_data_32x32_t block_data_64x64_t;
891 
892 /**
893 ******************************************************************************
894  *  @brief      Structure for storing data about all 16 16x16
895  *              blocks in a 64x64 CTB and each of their partitions
896 ******************************************************************************
897  */
898 typedef struct
899 {
900     WORD32 num_best_results;
901 
902     /**
903      * mask of active partitions, Totally 17 bits. For a given partition
904      * id, as per PART_ID_T enum the corresponding bit position is 1/0
905      * indicating that partition is active or inactive
906      */
907     /*WORD32 i4_part_mask;*/
908 
909     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
910 
911 } block_data_16x16_t;
912 
913 typedef struct
914 {
915     WORD32 num_best_results;
916 
917     part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
918 } block_data_8x8_t;
919 
920 /**
921 ******************************************************************************
922  *  @brief      Structure for data export from ME to Enc_Loop
923 ******************************************************************************
924  */
925 typedef struct
926 {
927     block_data_8x8_t as_8x8_block_data[64];
928 
929     block_data_16x16_t as_block_data[16];
930 
931     block_data_32x32_t as_32x32_block_data[4];
932 
933     block_data_64x64_t s_64x64_block_data;
934 
935 } me_ctb_data_t;
936 
937 /**
938 ******************************************************************************
939  *  @brief   noise detection related structure
940  *
941 ******************************************************************************
942  */
943 
944 typedef struct
945 {
946     WORD32 i4_noise_present;
947 
948     UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB];
949 
950     UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB];
951 } ihevce_ctb_noise_params;
952 
953 /**
954 ******************************************************************************
955  *  @brief      ctb structure for mode analysis/evaluation
956 ******************************************************************************
957  */
958 typedef struct
959 {
960     /**
961      * CU decision in a ctb is frozen by ME/IPE and populated in
962      * u4_packed_cu_split_flags.
963      * @remarks
964      * TODO:review comment
965      * bit0     :  64x64 split flag,  (depth0 flag for 64x64 ctb unused for smaller ctb)
966      * bits 1-3 :  not used
967      * bits 4-7 :  32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb)
968      * bits 8-23:  16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] )
969 
970      * if a split flag of n is set for depth 1, check the following split flags
971      * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
972      *
973      */
974     UWORD32 u4_cu_split_flags;
975 
976     UWORD8 u1_num_cus_in_ctb;
977 
978     cur_ctb_cu_tree_t *ps_cu_tree;
979 
980     me_ctb_data_t *ps_me_ctb_data;
981 
982     ihevce_ctb_noise_params s_ctb_noise_params;
983 
984 } ctb_analyse_t;
985 /**
986 ******************************************************************************
987  *  @brief Structures for tapping ssd and bit-estimate information for all CUs
988 ******************************************************************************
989  */
990 
991 typedef struct
992 {
993     LWORD64 i8_cost;
994     WORD32 i4_idx;
995 } cost_idx_t;
996 
997 /**
998 ******************************************************************************
999  *  @brief      reference/non reference pic context for encoder
1000 ******************************************************************************
1001  */
1002 typedef struct
1003 
1004 {
1005     /**
1006      * YUV buffer discriptor for the recon
1007      * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) +  2 * PAD_HORZ)*
1008      *                              ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT)
1009      */
1010     iv_enc_yuv_buf_t s_yuv_buf_desc;
1011 
1012     iv_enc_yuv_buf_src_t s_yuv_buf_desc_src;
1013 
1014     /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid            */
1015     /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */
1016     /* the fullpel plane for use as reference */
1017     UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE];
1018 
1019     /**
1020      * frm level pointer to pu bank for colocated  mv access
1021      * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) *
1022      *                         (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE)
1023      */
1024     pu_col_mv_t *ps_frm_col_mv;
1025     /**
1026      ************************************************************************
1027      * Pointer to a PU map stored at frame level,
1028      * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min
1029      * granularirty of MIN_PU_SIZE size.
1030      ************************************************************************
1031      */
1032     UWORD8 *pu1_frm_pu_map;
1033 
1034     /** CTB level frame buffer to store the accumulated sum of
1035      * number of PUs for every row */
1036     UWORD16 *pu2_num_pu_map;
1037 
1038     /** Offsets in the PU buffer at every CTB level */
1039     UWORD32 *pu4_pu_off;
1040 
1041     /**  Collocated POC for reference list 0
1042      * ToDo: Change the array size when multiple slices are to be supported */
1043     WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS];
1044 
1045     /** Collocated POC for reference list 1 */
1046     WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS];
1047 
1048     /** 0 = top field,  1 = bottom field  */
1049     WORD32 i4_bottom_field;
1050 
1051     /** top field first input in case of interlaced case */
1052     WORD32 i4_topfield_first;
1053 
1054     /** top field first input in case of interlaced case */
1055     WORD32 i4_poc;
1056 
1057     /** unique buffer id */
1058     WORD32 i4_buf_id;
1059 
1060     /** is this reference frame or not */
1061     WORD32 i4_is_reference;
1062 
1063     /** Picture type of current picture */
1064     WORD32 i4_pic_type;
1065 
1066     /** Flag to indicate whether current pictute is free or in use */
1067     WORD32 i4_is_free;
1068 
1069     /** Bit0 -  of this Flag to indicate whether current pictute needs to be deblocked,
1070         padded and hpel planes need to be generated.
1071         These are turned off typically in non referecne pictures when psnr
1072         and recon dump is disabled.
1073 
1074         Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled
1075      */
1076     WORD32 i4_deblk_pad_hpel_cur_pic;
1077 
1078     /**
1079      * weight and offset for this ref pic. To be initialized for every pic
1080      * based on the lap output
1081      */
1082     ihevce_wght_offst_t s_weight_offset;
1083 
1084     /**
1085      * Reciprocal of the lumaweight in q15 format
1086      */
1087     WORD32 i4_inv_luma_wt;
1088 
1089     /**
1090      * Log to base 2 of the common denominator used for luma weights across all ref pics
1091      */
1092     WORD32 i4_log2_wt_denom;
1093 
1094     /**
1095      * Used as Reference for encoding current picture flag
1096      */
1097     WORD32 i4_used_by_cur_pic_flag;
1098 
1099 #if ADAPT_COLOCATED_FROM_L0_FLAG
1100     WORD32 i4_frame_qp;
1101 #endif
1102     /*
1103     * IDR GOP number
1104     */
1105 
1106     WORD32 i4_idr_gop_num;
1107 
1108     /*
1109     * non-ref-free_flag
1110     */
1111     WORD32 i4_non_ref_free_flag;
1112     /**
1113       * Dependency manager instance for ME - Prev recon dep
1114       */
1115     void *pv_dep_mngr_recon;
1116 
1117     /*display num*/
1118     WORD32 i4_display_num;
1119 } recon_pic_buf_t;
1120 
1121 /**
1122 ******************************************************************************
1123  *  @brief  Lambda values used for various cost computations
1124 ******************************************************************************
1125  */
1126 typedef struct
1127 {
1128     /************************************************************************/
1129     /* The fields with the string 'type2' in their names are required */
1130     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
1131     /* to the bit_depth != internal_bit_depth are stored in these fields */
1132     /************************************************************************/
1133 
1134     /**
1135      * Closed loop SSD Lambda
1136      * This is multiplied with bits for RD cost computations in SSD mode
1137      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1138      */
1139     LWORD64 i8_cl_ssd_lambda_qf;
1140 
1141     LWORD64 i8_cl_ssd_type2_lambda_qf;
1142 
1143     /**
1144      * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp)
1145      * This is multiplied with bits for RD cost computations in SSD mode
1146      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1147      */
1148     LWORD64 i8_cl_ssd_lambda_chroma_qf;
1149 
1150     LWORD64 i8_cl_ssd_type2_lambda_chroma_qf;
1151 
1152     /**
1153      * Closed loop SAD Lambda
1154      * This is multiplied with bits for RD cost computations in SAD mode
1155      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1156      */
1157     WORD32 i4_cl_sad_lambda_qf;
1158 
1159     WORD32 i4_cl_sad_type2_lambda_qf;
1160 
1161     /**
1162      * Open loop SAD Lambda
1163      * This is multiplied with bits for RD cost computations in SAD mode
1164      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1165      */
1166     WORD32 i4_ol_sad_lambda_qf;
1167 
1168     WORD32 i4_ol_sad_type2_lambda_qf;
1169 
1170     /**
1171      * Closed loop SATD Lambda
1172      * This is multiplied with bits for RD cost computations in SATD mode
1173      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1174      */
1175     WORD32 i4_cl_satd_lambda_qf;
1176 
1177     WORD32 i4_cl_satd_type2_lambda_qf;
1178 
1179     /**
1180      * Open loop SATD Lambda
1181      * This is multiplied with bits for RD cost computations in SATD mode
1182      * This is represented in q format with shift of LAMBDA_Q_SHIFT
1183      */
1184     WORD32 i4_ol_satd_lambda_qf;
1185 
1186     WORD32 i4_ol_satd_type2_lambda_qf;
1187 
1188     double lambda_modifier;
1189 
1190     double lambda_uv_modifier;
1191 
1192     UWORD32 u4_chroma_cost_weighing_factor;
1193 
1194 } frm_lambda_ctxt_t;
1195 /**
1196 ******************************************************************************
1197 *  @brief  Mode attributes for 4x4 block populated by early decision
1198 ******************************************************************************
1199  */
1200 typedef struct
1201 {
1202     /* If best mode is present or not */
1203     UWORD8 mode_present;
1204 
1205     /** Best mode for the current 4x4 prediction block */
1206     UWORD8 best_mode;
1207 
1208     /** sad for the best mode for the current 4x4 prediction block */
1209     UWORD16 sad;
1210 
1211     /** cost for the best mode for the current 4x4 prediction block */
1212     UWORD16 sad_cost;
1213 
1214 } ihevce_ed_mode_attr_t;  //early decision
1215 
1216 /**
1217 ******************************************************************************
1218  *  @brief  Structure at 4x4 block level which has parameters about early
1219  *          intra or inter decision
1220 ******************************************************************************
1221  */
1222 typedef struct
1223 {
1224     /**
1225      * Final parameter of Intra-Inter early decision for the current 4x4.
1226      * 0 - invalid decision
1227      * 1 - eval intra only
1228      * 2 - eval inter only
1229      * 3 - eval both intra and inter
1230      */
1231     UWORD8 intra_or_inter;
1232 
1233     UWORD8 merge_success;
1234 
1235     /** Best mode for the current 4x4 prediction block */
1236     UWORD8 best_mode;
1237 
1238     /** Best mode for the current 4x4 prediction block */
1239     UWORD8 best_merge_mode;
1240 
1241     /** Store SATD at 4*4 level for current layer (L1) */
1242     WORD32 i4_4x4_satd;
1243 
1244 } ihevce_ed_blk_t;  //early decision
1245 
1246 /* l1 ipe ctb analyze structure */
1247 /* Contains cu level qp mod related information for all possible cu
1248 sizes (16,32,64 in L0) in a CTB*/
1249 typedef struct
1250 {
1251     WORD32 i4_sum_4x4_satd[16];
1252     WORD32 i4_min_4x4_satd[16];
1253 
1254     /* satd for L1_8x8 blocks in L1_32x32
1255      * [16] : num L1_8x8 in L1_32x32
1256      * [2]  : 0 - sum of L1_4x4 @ L1_8x8
1257      *          - equivalent to transform size of 16x16 @ L0
1258      *        1 - min/median of L1_4x4 @ L1_8x8
1259      *          - equivalent to transform size of 8x8 @ L0
1260      */
1261     WORD32 i4_8x8_satd[16][2];
1262 
1263     /* satd for L1_16x16 blocks in L1_32x32
1264      * [4] : num L1_16x16 in L1_32x32
1265      * [3] : 0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
1266      *         - equivalent to transform size of 32x32 @ L0
1267      *       1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
1268      *         - equivalent to transform size of 16x16 @ L0
1269      *       2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16
1270      *         - equivalent to transform size of 8x8 @ L0
1271      */
1272     WORD32 i4_16x16_satd[4][3];
1273 
1274     /* Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */
1275     /* satd for L1_32x32 blocks in L1_32x32
1276      * [1] : num L1_32x32 in L1_32x32
1277      * [4] : 0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
1278      *         - equivalent to transform size of 32x32 @ L0
1279      *       1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32
1280      *         - equivalent to transform size of 16x16 @ L0
1281      *       2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32
1282      *         - equivalent to transform size of 8x8 @ L0
1283      *       3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
1284      */
1285     WORD32 i4_32x32_satd[1][4];
1286 
1287     /*Store SATD at 8x8 level for current layer (L1)*/
1288     WORD32 i4_best_satd_8x8[16];
1289 
1290     /* EIID: This will be used for early inter intra decisions */
1291     /*SAD at 8x8 level for current layer (l1) */
1292     /*Cost based on sad at 8x8 level for current layer (l1) */
1293     WORD32 i4_best_sad_cost_8x8_l1_ipe[16];
1294 
1295     WORD32 i4_best_sad_8x8_l1_ipe[16];
1296     /* SAD at 8x8 level for ME. All other cost are IPE cost */
1297     WORD32 i4_best_sad_cost_8x8_l1_me[16];
1298 
1299     /* SAD at 8x8 level for ME. for given reference */
1300     WORD32 i4_sad_cost_me_for_ref[16];
1301 
1302     /* SAD at 8x8 level for ME. for given reference */
1303     WORD32 i4_sad_me_for_ref[16];
1304 
1305     /* SAD at 8x8 level for ME. All other cost are IPE cost */
1306     WORD32 i4_best_sad_8x8_l1_me[16];
1307 
1308     WORD32 i4_best_sad_8x8_l1_me_for_decide[16];
1309 
1310     /*Mean @ L0 16x16*/
1311     WORD32 ai4_16x16_mean[16];
1312 
1313     /*Mean @ L0 32x32*/
1314     WORD32 ai4_32x32_mean[4];
1315 
1316     /*Mean @ L0 64x64*/
1317     WORD32 i4_64x64_mean;
1318 
1319 } ihevce_ed_ctb_l1_t;  //early decision
1320 
1321 /**
1322 ******************************************************************************
1323  *  @brief   8x8 Intra analyze structure
1324 ******************************************************************************
1325  */
1326 typedef struct
1327 {
1328     /** Best intra modes for 8x8 transform.
1329      *  Insert 255 in the end to limit number of modes
1330      */
1331     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
1332 
1333     /** Best 8x8 intra modes for 4x4 transform
1334      *  Insert 255 in the end to limit number of modes
1335      */
1336     UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1];
1337 
1338     /** Best 4x4 intra modes
1339      *  Insert 255 in the end to limit number of modes
1340      */
1341     UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1];
1342 
1343     /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */
1344     UWORD8 b1_enable_nxn : 1;
1345 
1346     /** valid cu flag : required for incomplete ctbs at frame boundaries */
1347     UWORD8 b1_valid_cu : 1;
1348 
1349     /** dummy bits */
1350     UWORD8 b6_reserved : 6;
1351 
1352 } intra8_analyse_t;
1353 
1354 /**
1355 ******************************************************************************
1356  *  @brief   16x16 Intra analyze structure
1357 ******************************************************************************
1358  */
1359 typedef struct
1360 {
1361     /** Best intra modes for 16x16 transform.
1362      *  Insert 255 in the end to limit number of modes
1363      */
1364     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
1365 
1366     /** Best 16x16 intra modes for 8x8 transform
1367      *  Insert 255 in the end to limit number of modes
1368      */
1369     UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
1370 
1371     /** 8x8 children intra analyze for this 16x16 */
1372     intra8_analyse_t as_intra8_analyse[4];
1373 
1374     /* indicates if 16x16 is best cu or 8x8 cu */
1375     UWORD8 b1_split_flag : 1;
1376 
1377     /* indicates if 8x8 vs 16x16 rdo evaluation needed */
1378     /* or only 8x8's rdo evaluation needed */
1379     UWORD8 b1_merge_flag : 1;
1380 
1381     /**
1382      * valid cu flag : required for incomplete ctbs at frame boundaries
1383      * or if CTB size is lower than 32
1384      */
1385     UWORD8 b1_valid_cu : 1;
1386 
1387     /** dummy bits */
1388     UWORD8 b6_reserved : 5;
1389 
1390 } intra16_analyse_t;
1391 
1392 /**
1393 ******************************************************************************
1394  *  @brief   32x32 Intra analyze structure
1395 ******************************************************************************
1396  */
1397 typedef struct
1398 {
1399     /** Best intra modes for 32x32 transform.
1400      *  Insert 255 in the end to limit number of modes
1401      */
1402     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
1403 
1404     /** Best 32x32 intra modes for 16x16 transform
1405      *  Insert 255 in the end to limit number of modes
1406      */
1407     UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
1408 
1409     /** 16x16 children intra analyze for this 32x32 */
1410     intra16_analyse_t as_intra16_analyse[4];
1411 
1412     /* indicates if 32x32 is best cu or 16x16 cu    */
1413     UWORD8 b1_split_flag : 1;
1414 
1415     /* indicates if 32x32 vs 16x16 rdo evaluation needed */
1416     /* or 16x16 vs 8x8 evaluation is needed */
1417     UWORD8 b1_merge_flag : 1;
1418 
1419     /**
1420      * valid cu flag : required for incomplete ctbs at frame boundaries
1421      * or if CTB size is lower than 64
1422      */
1423     UWORD8 b1_valid_cu : 1;
1424 
1425     /** dummy bits */
1426     UWORD8 b6_reserved : 5;
1427 
1428 } intra32_analyse_t;
1429 
1430 /**
1431 ******************************************************************************
1432  *  @brief  IPE L0 analyze structure for L0 ME to do intra/inter CU decisions
1433  *          This is a CTB level structure encapsulating IPE modes, cost at all
1434  *          level. IPE also recommemds max intra CU sizes which is required
1435  *          by ME for CU size determination in intra dominant CTB
1436 ******************************************************************************
1437  */
1438 typedef struct
1439 {
1440     /** Best 64x64 intra modes for 32x32 transform.
1441      *  Insert 255 in the end to limit number of modes
1442      */
1443     UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
1444 
1445     /** 32x32 children intra analyze for this 32x32    */
1446     intra32_analyse_t as_intra32_analyse[4];
1447 
1448     /* indicates if 64x64 is best CUs or 32x32 CUs      */
1449     UWORD8 u1_split_flag;
1450 
1451     /* CTB level best 8x8 intra costs  */
1452     WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB];
1453 
1454     /* CTB level best 16x16 intra costs */
1455     WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2];
1456 
1457     /* CTB level best 32x32 intra costs */
1458     WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4];
1459 
1460     /* best 64x64 intra cost */
1461     WORD32 i4_best64x64_intra_cost;
1462 
1463     /*
1464     @L0 level
1465     4 => 0 - 32x32 TU in 64x64 CU
1466          1 - 16x16 TU in 64x64 CU
1467          2 - 8x8  TU in 64x64 CU
1468          3 - 64x64 CU
1469     2 => Intra/Inter */
1470     WORD32 i4_64x64_act_factor[4][2];
1471 
1472     /*
1473     @L0 level
1474     4 => num 32x32 in CTB
1475     3 => 0 - 32x32 TU in 64x64 CU
1476          1 - 16x16 TU in 64x64 CU
1477          2 - 8x8  TU in 64x64 CU
1478     2 => Intra/Inter */
1479     WORD32 i4_32x32_act_factor[4][3][2];
1480 
1481     /*
1482     @L0 level
1483     16 => num 16x16 in CTB
1484     2 => 0 - 16x16 TU in 64x64 CU
1485          1 - 8x8  TU in 64x64 CU
1486     2 => Intra/Inter */
1487     WORD32 i4_16x16_act_factor[16][2][2];
1488 
1489     WORD32 nodes_created_in_cu_tree;
1490 
1491     cur_ctb_cu_tree_t *ps_cu_tree_root;
1492 
1493     WORD32 ai4_8x8_act_factor[16];
1494     WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB];
1495     WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB];
1496     WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB];
1497     WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB];
1498 
1499     /*Ctb level accumalated satd*/
1500     WORD32 i4_ctb_acc_satd;
1501 
1502     /*Ctb level accumalated mpm bits*/
1503     WORD32 i4_ctb_acc_mpm_bits;
1504 
1505 } ipe_l0_ctb_analyse_for_me_t;
1506 
1507 typedef struct
1508 {
1509     WORD16 i2_mv_x;
1510     WORD16 i2_mv_y;
1511 } global_mv_t;
1512 
1513 /**
1514 ******************************************************************************
1515  *  @brief  Pre Encode pass and ME pass shared variables and buffers
1516 ******************************************************************************
1517  */
1518 typedef struct
1519 {
1520     /**
1521      * Buffer id
1522      */
1523     WORD32 i4_buf_id;
1524 
1525     /**
1526     * Flag will be set to 1 by frame processing thread after receiving flush
1527     * command from application
1528     */
1529     WORD32 i4_end_flag;
1530 
1531     /** frame leve ctb analyse  buffer pointer */
1532     ctb_analyse_t *ps_ctb_analyse;
1533 
1534     /** frame level cu analyse  buffer pointer for IPE */
1535     //cu_analyse_t       *ps_cu_analyse;
1536 
1537     /** current input pointer */
1538     ihevce_lap_enc_buf_t *ps_curr_inp;
1539 
1540     /** current inp buffer id */
1541     WORD32 curr_inp_buf_id;
1542 
1543     /** Slice header parameters   */
1544     slice_header_t s_slice_hdr;
1545 
1546     /** sps parameters activated by current slice  */
1547     sps_t *ps_sps;
1548 
1549     /** pps parameters activated by current slice  */
1550     pps_t *ps_pps;
1551 
1552     /** vps parameters activated by current slice  */
1553     vps_t *ps_vps;
1554     /**  Pointer to Penultilate Layer context memory internally has MV bank buff and related params */
1555     void *pv_me_lyr_ctxt;
1556 
1557     /**  Pointer to Penultilate Layer  NV bank context memory */
1558     void *pv_me_lyr_bnk_ctxt;
1559 
1560     /**  Pointer to Penultilate Layer MV bank buff */
1561     void *pv_me_mv_bank;
1562 
1563     /**  Pointer to Penultilate Layer reference idx buffer */
1564     void *pv_me_ref_idx;
1565     /**
1566      * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost)
1567      * The order of storing is raster scan order within CTB and
1568      * CTB order is raster scan within frame.
1569      */
1570     double *plf_intra_8x8_cost;
1571 
1572     /**
1573      * L0 layer ctb anaylse frame level buffer.
1574      * IPE wil populate the cost and best modes at all levels in this buffer
1575      *  for every CTB in a frame
1576      */
1577     // moved to shorter buffer queue
1578     //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
1579 
1580     /** Layer L1 buffer pointer */
1581     ihevce_ed_blk_t *ps_layer1_buf;
1582 
1583     /** Layer L2 buffer pointer */
1584     ihevce_ed_blk_t *ps_layer2_buf;
1585 
1586     /*ME reverse map info*/
1587     UWORD8 *pu1_me_reverse_map_info;
1588 
1589     /** Buffer pointer for CTB level information in pre intra pass*/
1590     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
1591 
1592 #ifndef DISABLE_SEI
1593     /** vps parameters activated by current slice  */
1594     sei_params_t s_sei;
1595 #endif
1596 
1597     /** nal_type for the slice to be encoded  */
1598     WORD32 i4_slice_nal_type;
1599 
1600     /** input time stamp in terms of ticks: lower 32  */
1601     WORD32 i4_inp_timestamp_low;
1602 
1603     /** input time stamp in terms of ticks: higher 32 */
1604     WORD32 i4_inp_timestamp_high;
1605 
1606     /** input frame ctxt of app to be retured in output buffer */
1607     void *pv_app_frm_ctxt;
1608 
1609     /** current frm valid flag :
1610      * will be 1 if valid input was processed by frame proc thrd
1611      */
1612     WORD32 i4_frm_proc_valid_flag;
1613 
1614     /**
1615      * Qp to be used for current frame
1616      */
1617     WORD32 i4_curr_frm_qp;
1618 
1619     /**
1620      * Frame level Lambda parameters
1621      */
1622     frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES];
1623 
1624     /** Frame-levelSATDcost accumalator */
1625     LWORD64 i8_frame_acc_satd_cost;
1626 
1627     /** Frame - L1 coarse me cost accumulated */
1628     LWORD64 i8_acc_frame_coarse_me_cost;
1629     /** Frame - L1 coarse me cost accumulated */
1630     //LWORD64 i8_acc_frame_coarse_me_cost_for_ref;
1631 
1632     /** Frame - L1 coarse me sad accumulated */
1633     LWORD64 i8_acc_frame_coarse_me_sad;
1634 
1635     /* Averge activity of 4x4 blocks from previous frame
1636     *  If L1, maps to 8*8 in L0
1637     */
1638     WORD32 i4_curr_frame_4x4_avg_act;
1639 
1640     WORD32 ai4_mod_factor_derived_by_variance[2];
1641 
1642     float f_strength;
1643 
1644     /* Averge activity of 8x8 blocks from previous frame
1645     *  If L1, maps to 16*16 in L0
1646     */
1647 
1648     long double ld_curr_frame_8x8_log_avg[2];
1649 
1650     LWORD64 i8_curr_frame_8x8_avg_act[2];
1651 
1652     LWORD64 i8_curr_frame_8x8_sum_act[2];
1653 
1654     WORD32 i4_curr_frame_8x8_sum_act_for_strength[2];
1655 
1656     ULWORD64 u8_curr_frame_8x8_sum_act_sqr;
1657 
1658     WORD32 i4_curr_frame_8x8_num_blks[2];
1659 
1660     LWORD64 i8_acc_frame_8x8_sum_act[2];
1661     LWORD64 i8_acc_frame_8x8_sum_act_sqr;
1662     WORD32 i4_acc_frame_8x8_num_blks[2];
1663     LWORD64 i8_acc_frame_8x8_sum_act_for_strength;
1664     LWORD64 i8_curr_frame_8x8_sum_act_for_strength;
1665 
1666     /* Averge activity of 16x16 blocks from previous frame
1667     *  If L1, maps to 32*32 in L0
1668     */
1669 
1670     long double ld_curr_frame_16x16_log_avg[3];
1671 
1672     LWORD64 i8_curr_frame_16x16_avg_act[3];
1673 
1674     LWORD64 i8_curr_frame_16x16_sum_act[3];
1675 
1676     WORD32 i4_curr_frame_16x16_num_blks[3];
1677 
1678     LWORD64 i8_acc_frame_16x16_sum_act[3];
1679     WORD32 i4_acc_frame_16x16_num_blks[3];
1680 
1681     /* Averge activity of 32x32 blocks from previous frame
1682     *  If L1, maps to 64*64 in L0
1683     */
1684 
1685     long double ld_curr_frame_32x32_log_avg[3];
1686 
1687     LWORD64 i8_curr_frame_32x32_avg_act[3];
1688 
1689     global_mv_t s_global_mv[MAX_NUM_REF];
1690     LWORD64 i8_curr_frame_32x32_sum_act[3];
1691 
1692     WORD32 i4_curr_frame_32x32_num_blks[3];
1693 
1694     LWORD64 i8_acc_frame_32x32_sum_act[3];
1695     WORD32 i4_acc_frame_32x32_num_blks[3];
1696 
1697     LWORD64 i8_acc_num_blks_high_sad;
1698 
1699     LWORD64 i8_total_blks;
1700 
1701     WORD32 i4_complexity_percentage;
1702 
1703     WORD32 i4_is_high_complex_region;
1704 
1705     WORD32 i4_avg_noise_thrshld_4x4;
1706 
1707     LWORD64 i8_curr_frame_mean_sum;
1708     WORD32 i4_curr_frame_mean_num_blks;
1709     LWORD64 i8_curr_frame_avg_mean_act;
1710 
1711 } pre_enc_me_ctxt_t;
1712 
1713 /**
1714 ******************************************************************************
1715  *  @brief  buffers from L0 IPE to ME and enc loop
1716 ******************************************************************************
1717  */
1718 typedef struct
1719 {
1720     WORD32 i4_size;
1721 
1722     ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
1723 } pre_enc_L0_ipe_encloop_ctxt_t;
1724 /**
1725 ******************************************************************************
1726  *  @brief  Frame process and Entropy coding pass shared variables and buffers
1727 ******************************************************************************
1728  */
1729 
1730 typedef struct
1731 {
1732     /*PIC level Info*/
1733     ULWORD64 i8_total_cu;
1734     ULWORD64 i8_total_cu_min_8x8;
1735     ULWORD64 i8_total_pu;
1736     ULWORD64 i8_total_intra_cu;
1737     ULWORD64 i8_total_inter_cu;
1738     ULWORD64 i8_total_skip_cu;
1739     ULWORD64 i8_total_cu_based_on_size[4];
1740 
1741     ULWORD64 i8_total_intra_pu;
1742     ULWORD64 i8_total_merge_pu;
1743     ULWORD64 i8_total_non_skipped_inter_pu;
1744 
1745     ULWORD64 i8_total_2nx2n_intra_pu[4];
1746     ULWORD64 i8_total_nxn_intra_pu;
1747     ULWORD64 i8_total_2nx2n_inter_pu[4];
1748     ULWORD64 i8_total_smp_inter_pu[4];
1749     ULWORD64 i8_total_amp_inter_pu[3];
1750     ULWORD64 i8_total_nxn_inter_pu[3];
1751 
1752     ULWORD64 i8_total_L0_mode;
1753     ULWORD64 i8_total_L1_mode;
1754     ULWORD64 i8_total_BI_mode;
1755 
1756     ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE];
1757     ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE];
1758 
1759     ULWORD64 i8_total_tu;
1760     ULWORD64 i8_total_non_coded_tu;
1761     ULWORD64 i8_total_inter_coded_tu;
1762     ULWORD64 i8_total_intra_coded_tu;
1763 
1764     ULWORD64 i8_total_tu_based_on_size[4];
1765     ULWORD64 i8_total_tu_cu64[4];
1766     ULWORD64 i8_total_tu_cu32[4];
1767     ULWORD64 i8_total_tu_cu16[3];
1768     ULWORD64 i8_total_tu_cu8[2];
1769 
1770     LWORD64 i8_total_qp;
1771     LWORD64 i8_total_qp_min_cu;
1772     WORD32 i4_min_qp;
1773     WORD32 i4_max_qp;
1774     LWORD64 i8_sum_squared_frame_qp;
1775     LWORD64 i8_total_frame_qp;
1776     WORD32 i4_max_frame_qp;
1777     float f_total_buffer_underflow;
1778     float f_total_buffer_overflow;
1779     float f_max_buffer_underflow;
1780     float f_max_buffer_overflow;
1781 
1782     UWORD8 i1_num_ref_idx_l0_active;
1783     UWORD8 i1_num_ref_idx_l1_active;
1784 
1785     WORD32 i4_ref_poc_l0[MAX_DPB_SIZE];
1786     WORD32 i4_ref_poc_l1[MAX_DPB_SIZE];
1787 
1788     WORD8 i1_list_entry_l0[MAX_DPB_SIZE];
1789     DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE];
1790     WORD16 i2_luma_offset_l0[MAX_DPB_SIZE];
1791     WORD8 i1_list_entry_l1[MAX_DPB_SIZE];
1792     DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE];
1793     WORD16 i2_luma_offset_l1[MAX_DPB_SIZE];
1794 
1795     ULWORD64 u8_bits_estimated_intra;
1796     ULWORD64 u8_bits_estimated_inter;
1797     ULWORD64 u8_bits_estimated_slice_header;
1798     ULWORD64 u8_bits_estimated_sao;
1799     ULWORD64 u8_bits_estimated_split_cu_flag;
1800     ULWORD64 u8_bits_estimated_cu_hdr_bits;
1801     ULWORD64 u8_bits_estimated_split_tu_flag;
1802     ULWORD64 u8_bits_estimated_qp_delta_bits;
1803     ULWORD64 u8_bits_estimated_cbf_luma_bits;
1804     ULWORD64 u8_bits_estimated_cbf_chroma_bits;
1805 
1806     ULWORD64 u8_bits_estimated_res_luma_bits;
1807     ULWORD64 u8_bits_estimated_res_chroma_bits;
1808 
1809     ULWORD64 u8_bits_estimated_ref_id;
1810     ULWORD64 u8_bits_estimated_mvd;
1811     ULWORD64 u8_bits_estimated_merge_flag;
1812     ULWORD64 u8_bits_estimated_mpm_luma;
1813     ULWORD64 u8_bits_estimated_mpm_chroma;
1814 
1815     ULWORD64 u8_total_bits_generated;
1816     ULWORD64 u8_total_bits_vbv;
1817 
1818     ULWORD64 u8_total_I_bits_generated;
1819     ULWORD64 u8_total_P_bits_generated;
1820     ULWORD64 u8_total_B_bits_generated;
1821 
1822     UWORD32 u4_frame_sad;
1823     UWORD32 u4_frame_intra_sad;
1824     UWORD32 u4_frame_inter_sad;
1825 
1826     ULWORD64 i8_frame_cost;
1827     ULWORD64 i8_frame_intra_cost;
1828     ULWORD64 i8_frame_inter_cost;
1829 } s_pic_level_acc_info_t;
1830 
1831 #ifndef DISABLE_SEI
1832 typedef struct
1833 {
1834     UWORD32 u4_target_bit_rate_sei_entropy;
1835     UWORD32 u4_buffer_size_sei_entropy;
1836     UWORD32 u4_dbf_entropy;
1837 
1838 } s_pic_level_sei_info_t;
1839 #endif
1840 /**
1841 ******************************************************************************
1842 *  @brief  ME pass and Main enocde pass shared variables and buffers
1843 ******************************************************************************
1844 */
1845 typedef struct
1846 {
1847     /**
1848     * Buffer id
1849     */
1850     WORD32 i4_buf_id;
1851 
1852     /**
1853     * Flag will be set to 1 by frame processing thread after receiving flush
1854     * command from application
1855     */
1856     WORD32 i4_end_flag;
1857 
1858     /** current input pointer */
1859     ihevce_lap_enc_buf_t *ps_curr_inp;
1860 
1861     /** current inp buffer id */
1862     WORD32 curr_inp_buf_id;
1863 
1864     /** current input buffers from ME */
1865     pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms;
1866 
1867     /** current inp buffer id from ME */
1868     WORD32 curr_inp_from_me_buf_id;
1869 
1870     /** current input buffers from L0 IPE */
1871     pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms;
1872 
1873     /** current inp buffer id from L0 IPE */
1874     WORD32 curr_inp_from_l0_ipe_buf_id;
1875 
1876     /** Slice header parameters   */
1877     slice_header_t s_slice_hdr;
1878 
1879     /** current frm valid flag :
1880      * will be 1 if valid input was processed by frame proc thrd
1881      */
1882     WORD32 i4_frm_proc_valid_flag;
1883 
1884     /**
1885      * Array of reference picture list for ping instance
1886      * 2=> ref_pic_list0 and ref_pic_list1
1887      */
1888     recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
1889 
1890     /**
1891      * Array of reference picture list
1892      * 2=> ref_pic_list0 and ref_pic_list1
1893      */
1894     recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
1895 
1896     /**  Job Queue Memory encode */
1897     job_queue_t *ps_job_q_enc;
1898 
1899     /** Array of Job Queue handles of enc group for ping and pong instance*/
1900     job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES];
1901 
1902     /** Array of Job Queue handles of enc group for re-encode*/
1903     job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES];
1904     /** frame level me_ctb_data_t buffer pointer
1905       */
1906     me_ctb_data_t *ps_cur_ctb_me_data;
1907 
1908     /** frame level cur_ctb_cu_tree_t buffer pointer for ME
1909       */
1910     cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree;
1911 
1912     /** Pointer to Dep. Mngr for CTBs processed in every row of a frame.
1913      * ME is producer, EncLoop is the consumer
1914      */
1915     void *pv_dep_mngr_encloop_dep_me;
1916 
1917 } me_enc_rdopt_ctxt_t;
1918 
1919 #ifndef DISABLE_SEI
1920 typedef struct
1921 {
1922     UWORD32 u4_payload_type;
1923     UWORD32 u4_payload_length;
1924     UWORD8 *pu1_sei_payload;
1925 } sei_payload_t;
1926 #endif
1927 
1928 typedef struct
1929 {
1930     /**
1931     * Flag will be set to 1 by frame processing thread after receiving flush
1932     * command from application
1933     */
1934     WORD32 i4_end_flag;
1935 
1936     /** frame level ctb allocation for ctb after aligning to max cu size */
1937     ctb_enc_loop_out_t *ps_frm_ctb_data;
1938 
1939     /** frame level cu allocation for ctb after aligning to max cu size  */
1940     cu_enc_loop_out_t *ps_frm_cu_data;
1941 
1942     /** frame level tu allocation for ctb after aligning to max cu size  */
1943     tu_enc_loop_out_t *ps_frm_tu_data;
1944 
1945     /** frame level pu allocation for ctb after aligning to max cu size  */
1946     pu_t *ps_frm_pu_data;
1947 
1948     /**  frame level coeff allocation for ctb after aligning to max cu size */
1949     void *pv_coeff_data;
1950 
1951     /** Slice header parameters   */
1952     slice_header_t s_slice_hdr;
1953 
1954     /** sps parameters activated by current slice  */
1955     sps_t *ps_sps;
1956 
1957     /** pps parameters activated by current slice  */
1958     pps_t *ps_pps;
1959 
1960     /** vps parameters activated by current slice  */
1961     vps_t *ps_vps;
1962 
1963 #ifndef DISABLE_SEI
1964     /** vps parameters activated by current slice  */
1965     sei_params_t s_sei;
1966 #endif
1967 
1968     /* Flag to indicate if AUD NAL is present */
1969     WORD8 i1_aud_present_flag;
1970 
1971     /* Flag to indicate if EOS NAL is present */
1972     WORD8 i1_eos_present_flag;
1973 
1974     /** nal_type for the slice to be encoded  */
1975     WORD32 i4_slice_nal_type;
1976 
1977     /** input time stamp in terms of ticks: lower 32  */
1978     WORD32 i4_inp_timestamp_low;
1979 
1980     /** input time stamp in terms of ticks: higher 32 */
1981     WORD32 i4_inp_timestamp_high;
1982 
1983     /** input frame ctxt of app to be retured in output buffer */
1984     void *pv_app_frm_ctxt;
1985 
1986     /** current frm valid flag :
1987      * will be 1 if valid input was processed by frame proc thrd
1988      */
1989     WORD32 i4_frm_proc_valid_flag;
1990 
1991     /** To support entropy sync the bitstream offset of each CTB row
1992      * is populated in this array any put in slice header in the end
1993      */
1994     WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM];
1995 
1996     /** RDopt estimation of bytes generated based on which rc update happens
1997      *
1998      */
1999     WORD32 i4_rdopt_bits_generated_estimate;
2000 
2001     /* These params are passed from enc-threads to entropy thread for
2002         passing params needed for PSNR caclulation and encoding
2003         summary prints */
2004     DOUBLE lf_luma_mse;
2005     DOUBLE lf_cb_mse;
2006     DOUBLE lf_cr_mse;
2007 
2008     DOUBLE lf_luma_ssim;
2009     DOUBLE lf_cb_ssim;
2010     DOUBLE lf_cr_ssim;
2011 
2012     WORD32 i4_qp;
2013     WORD32 i4_poc;
2014     WORD32 i4_display_num;
2015     WORD32 i4_pic_type;
2016 
2017     /** I-only SCD */
2018     WORD32 i4_is_I_scenecut;
2019 
2020     WORD32 i4_is_non_I_scenecut;
2021     WORD32 i4_sub_pic_level_rc;
2022 
2023     WORD32 ai4_frame_bits_estimated;
2024     s_pic_level_acc_info_t s_pic_level_info;
2025 
2026     LWORD64 i8_buf_level_bitrate_change;
2027 
2028     WORD32 i4_is_end_of_idr_gop;
2029 
2030 #ifndef DISABLE_SEI
2031     sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD];
2032 
2033     UWORD32 u4_num_sei_payload;
2034 #endif
2035     /* Flag used only in mres single output case to flush out one res and start with next */
2036     WORD32 i4_out_flush_flag;
2037 
2038 } frm_proc_ent_cod_ctxt_t;
2039 
2040 /**
2041 ******************************************************************************
2042 *  @brief  ME pass and Main enocde pass shared variables and buffers
2043 ******************************************************************************
2044 */
2045 typedef struct
2046 {
2047     /*BitRate ID*/
2048     WORD32 i4_br_id;
2049 
2050     /*Frame ID*/
2051     WORD32 i4_frm_id;
2052 
2053     /*Number of CTB, after ich data is populated*/
2054     WORD32 i4_ctb_count_in_data;
2055 
2056     /*Number of CTB, after ich scale is computed*/
2057     WORD32 i4_ctb_count_out_scale;
2058 
2059     /*Bits estimated for the frame */
2060     /* For NON-I SCD max buf bits*/
2061     LWORD64 i8_frame_bits_estimated;
2062 
2063     /* Bits consumed till the nctb*/
2064     LWORD64 i8_nctb_bits_consumed;
2065 
2066     /* Bits consumed till the nctb*/
2067     LWORD64 i8_acc_bits_consumed;
2068 
2069     /*Frame level Best of Ipe and ME sad*/
2070     LWORD64 i8_frame_l1_me_sad;
2071 
2072     /*SAD accumalted till NCTB*/
2073     LWORD64 i8_nctb_l1_me_sad;
2074 
2075     /*Frame level IPE sad*/
2076     LWORD64 i8_frame_l1_ipe_sad;
2077 
2078     /*SAD accumalted till NCTB*/
2079     LWORD64 i8_nctb_l1_ipe_sad;
2080 
2081     /*Frame level L0 IPE satd*/
2082     LWORD64 i8_frame_l0_ipe_satd;
2083 
2084     /*L0 SATD accumalted till NCTB*/
2085     LWORD64 i8_nctb_l0_ipe_satd;
2086 
2087     /*Frame level Activity factor acc at 8x8 level */
2088     LWORD64 i8_frame_l1_activity_fact;
2089 
2090     /*NCTB Activity factor acc at 8x8 level */
2091     LWORD64 i8_nctb_l1_activity_fact;
2092 
2093     /*L0 MPM bits accumalted till NCTB*/
2094     LWORD64 i8_nctb_l0_mpm_bits;
2095 
2096     /*Encoder hdr accumalted till NCTB*/
2097     LWORD64 i8_nctb_hdr_bits_consumed;
2098 
2099 } ihevce_sub_pic_rc_ctxt_t;
2100 
2101 /**
2102 ******************************************************************************
2103  *  @brief  Memoery manager context (stores the memory tables allcoated)
2104 ******************************************************************************
2105  */
2106 typedef struct
2107 {
2108     /**
2109     * Total number of memtabs (Modules and system)
2110     * during create time
2111     */
2112     WORD32 i4_num_create_memtabs;
2113 
2114     /**
2115     * Pointer to the mem tabs
2116     * of crate time
2117     */
2118     iv_mem_rec_t *ps_create_memtab;
2119 
2120     /**
2121     * Total number of memtabs Data and control Ques
2122     * during Ques create time
2123     */
2124     WORD32 i4_num_q_memtabs;
2125 
2126     /**
2127     * Pointer to the mem tabs
2128     * of crate time
2129     */
2130     iv_mem_rec_t *ps_q_memtab;
2131 
2132 } enc_mem_mngr_ctxt;
2133 
2134 /**
2135 ******************************************************************************
2136  *  @brief  Encoder Interafce Queues Context
2137 ******************************************************************************
2138  */
2139 typedef struct
2140 {
2141     /** Number of Queues at interface context level */
2142     WORD32 i4_num_queues;
2143 
2144     /** Array of Queues handle */
2145     void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES];
2146 
2147     /** Mutex for encuring thread safety of the access of the queues */
2148     void *pv_q_mutex_hdl;
2149 
2150 } enc_q_ctxt_t;
2151 
2152 /**
2153 ******************************************************************************
2154  *  @brief  Module context of different modules in encoder
2155 ******************************************************************************
2156  */
2157 
2158 typedef struct
2159 {
2160     /** Motion estimation context pointer */
2161     void *pv_me_ctxt;
2162     /** Coarse Motion estimation context pointer */
2163     void *pv_coarse_me_ctxt;
2164 
2165     /** Intra Prediction context pointer */
2166     void *pv_ipe_ctxt;
2167 
2168     /** Encode Loop context pointer */
2169     void *pv_enc_loop_ctxt;
2170 
2171     /** Entropy Coding context pointer */
2172     void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES];
2173 
2174     /** Look Ahead Processing context pointer */
2175     void *pv_lap_ctxt;
2176     /** Rate control context pointer */
2177     void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES];
2178     /** Decomposition pre intra context pointer */
2179     void *pv_decomp_pre_intra_ctxt;
2180 
2181 } module_ctxt_t;
2182 
2183 /**
2184 ******************************************************************************
2185  *  @brief  Threads semaphore handles
2186 ******************************************************************************
2187  */
2188 typedef struct
2189 {
2190     /** LAP semaphore handle */
2191     void *pv_lap_sem_handle;
2192 
2193     /** Encode frame Process semaphore handle */
2194     void *pv_enc_frm_proc_sem_handle;
2195 
2196     /** Pre Encode frame Process semaphore handle */
2197     void *pv_pre_enc_frm_proc_sem_handle;
2198 
2199     /** Entropy coding semaphore handle
2200         One semaphore for each entropy thread, i.e. for each bit-rate instance*/
2201     void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2202 
2203     /**
2204      *  Semaphore handle corresponding to get free inp frame buff
2205      *  function call from app if called in blocking mode
2206      */
2207     void *pv_inp_data_sem_handle;
2208 
2209     /**
2210      *  Semaphore handle corresponding to get free inp control command buff
2211      *  function call from app if called in blocking mode
2212      */
2213     void *pv_inp_ctrl_sem_handle;
2214 
2215     /**
2216      *  Semaphore handle corresponding to get filled out bitstream buff
2217      *  function call from app if called in blocking mode
2218      */
2219     void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2220 
2221     /**
2222      *  Semaphore handle corresponding to get filled out recon buff
2223      *  function call from app if called in blocking mode
2224      */
2225     void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES];
2226 
2227     /**
2228      *  Semaphore handle corresponding to get filled out control status buff
2229      *  function call from app if called in blocking mode
2230      */
2231     void *pv_out_ctrl_sem_handle;
2232 
2233     /**
2234      *  Semaphore handle corresponding to get filled out control status buff
2235      *  function call from app if called in blocking mode
2236      */
2237     void *pv_lap_inp_data_sem_hdl;
2238 
2239     /**
2240      *  Semaphore handle corresponding to get filled out control status buff
2241      *  function call from app if called in blocking mode
2242      */
2243     void *pv_preenc_inp_data_sem_hdl;
2244 
2245     /**
2246      *  Semaphore handle corresponding to Multi Res Single output case
2247      */
2248     void *pv_ent_common_mres_sem_hdl;
2249     void *pv_out_common_mres_sem_hdl;
2250 
2251 } thrd_que_sem_hdl_t;
2252 
2253 /**
2254 ******************************************************************************
2255  *  @brief  Frame level structure which has parameters about CTBs
2256 ******************************************************************************
2257  */
2258 typedef struct
2259 {
2260     /** CTB size of all CTB in a frame in pixels
2261      *  this will be create time value,
2262      *  run time change in this value is not supported
2263      */
2264     WORD32 i4_ctb_size;
2265 
2266     /** Minimum CU size of CTB in a frame in pixels
2267      *  this will be create time value,
2268      *  run time change in this value is not supported
2269      */
2270     WORD32 i4_min_cu_size;
2271 
2272     /** Worst case num CUs in CTB based on i4_ctb_size */
2273     WORD32 i4_num_cus_in_ctb;
2274 
2275     /** Worst case num PUs in CTB based on i4_ctb_size */
2276     WORD32 i4_num_pus_in_ctb;
2277 
2278     /** Worst case num TUs in CTB based on i4_ctb_size */
2279     WORD32 i4_num_tus_in_ctb;
2280 
2281     /** Number of CTBs in horizontal direction
2282       * this is based on run time source width and i4_ctb_size
2283       */
2284     WORD32 i4_num_ctbs_horz;
2285 
2286     /** Number of CTBs in vertical direction
2287      *  this is based on run time source height and i4_ctb_size
2288      */
2289     WORD32 i4_num_ctbs_vert;
2290 
2291     /** MAX CUs in horizontal direction
2292      * this is based on run time source width, i4_ctb_size and  i4_num_cus_in_ctb
2293      */
2294     WORD32 i4_max_cus_in_row;
2295 
2296     /** MAX PUs in horizontal direction
2297      * this is based on run time source width, i4_ctb_size and  i4_num_pus_in_ctb
2298      */
2299     WORD32 i4_max_pus_in_row;
2300 
2301     /** MAX TUs in horizontal direction
2302      * this is based on run time source width, i4_ctb_size and  i4_num_tus_in_ctb
2303      */
2304     WORD32 i4_max_tus_in_row;
2305 
2306     /**
2307      * CU aligned picture width (currently aligned to MAX CU size)
2308      * should be modified to be aligned to MIN CU size
2309      */
2310 
2311     WORD32 i4_cu_aligned_pic_wd;
2312 
2313     /**
2314      * CU aligned picture height (currently aligned to MAX CU size)
2315      * should be modified to be aligned to MIN CU size
2316      */
2317 
2318     WORD32 i4_cu_aligned_pic_ht;
2319 
2320     /* Pointer to a frame level memory,
2321     Stride is = 1 + (num ctbs in a ctb-row) + 1
2322     Hieght is = 1 + (num ctbs in a ctb-col)
2323     Contains tile-id of each ctb */
2324     WORD32 *pi4_tile_id_map;
2325 
2326     /* stride in units of ctb */
2327     WORD32 i4_tile_id_ctb_map_stride;
2328 
2329 } frm_ctb_ctxt_t;
2330 
2331 /**
2332 ******************************************************************************
2333  *  @brief  ME Job Queue desc
2334 ******************************************************************************
2335  */
2336 typedef struct
2337 {
2338     /** Number of output dependencies which need to be set after
2339      *  current job is complete,
2340      *  should be less than or equal to MAX_OUT_DEP defined in
2341      *  ihevce_multi_thrd_structs.h
2342      */
2343     WORD32 i4_num_output_dep;
2344 
2345     /** Array of offsets from the start of output dependent layer's Job Ques
2346      *  which are dependent on current Job to be complete
2347      */
2348     WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP];
2349 
2350     /** Number of input dependencies to be resolved for current job to start
2351      *  these many jobs in lower layer should be complete to
2352      *  start the current JOB
2353      */
2354     WORD32 i4_num_inp_dep;
2355 
2356 } multi_thrd_me_job_q_prms_t;
2357 
2358 /**
2359  *  @brief  structure in which recon data
2360  *          and related parameters are sent from Encoder
2361  */
2362 typedef struct
2363 {
2364     /** Kept for maintaining backwards compatibility in future */
2365     WORD32 i4_size;
2366 
2367     /** Buffer id for the current buffer */
2368     WORD32 i4_buf_id;
2369 
2370     /** POC of the current buffer */
2371     WORD32 i4_poc;
2372 
2373     /** End flag to communicate this is last frame output from encoder */
2374     WORD32 i4_end_flag;
2375 
2376     /** End flag to communicate encoder that this is the last buffer from application
2377         1 - Last buf, 0 - Not last buffer. No other values are supported.
2378         Application has to set the appropriate value before queing in encoder queue */
2379 
2380     WORD32 i4_is_last_buf;
2381 
2382     /** Recon luma buffer pointer */
2383     void *pv_y_buf;
2384 
2385     /** Recon cb buffer pointer */
2386     void *pv_cb_buf;
2387 
2388     /** Recon cr buffer pointer */
2389     void *pv_cr_buf;
2390 
2391     /** Luma size **/
2392     WORD32 i4_y_pixels;
2393 
2394     /** Chroma size **/
2395     WORD32 i4_uv_pixels;
2396 
2397 } iv_enc_recon_data_buffs_t;
2398 
2399 /**
2400 ******************************************************************************
2401  *  @brief  Multi Thread context structure
2402 ******************************************************************************
2403  */
2404 typedef struct
2405 {
2406     /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/
2407     WORD32 i4_force_end_flag;
2408 
2409     /** Force all active threads flag
2410       * This flag will be set to 1 if all Number of cores givento the encoder
2411       * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode
2412       * All pre enc threads and enc threads will run of the same cores with
2413       * time sharing ar frame level
2414       */
2415     WORD32 i4_all_thrds_active_flag;
2416 
2417     /** Flag to indicate that core manager has been configured to enable
2418      * sequential execution
2419      */
2420     WORD32 i4_seq_mode_enabled_flag;
2421     /*-----------------------------------------------------------------------*/
2422     /*--------- Params related to encode group  -----------------------------*/
2423     /*-----------------------------------------------------------------------*/
2424 
2425     /** Number of processing threads created runtime in encode group */
2426     WORD32 i4_num_enc_proc_thrds;
2427 
2428     /** Number of processing threads active for a given frame
2429      * This value will be monitored at frame level, so as to
2430      * have provsion for increasing / decreasing threads
2431      * based on Load balance b/w stage in encoder
2432      */
2433     WORD32 i4_num_active_enc_thrds;
2434 
2435     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
2436     void *pv_job_q_mutex_hdl_enc_grp_me;
2437 
2438     /** Mutex for ensuring thread safety of the access of Job queues in encode group */
2439     void *pv_job_q_mutex_hdl_enc_grp_enc_loop;
2440 
2441     /** Array of Semaphore handles (for each frame processing threads ) */
2442     void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
2443 
2444     /** Array for ME to export the Job que dependency for all layers */
2445     multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM];
2446 
2447     /* pointer to the mutex handle*/
2448     void *apv_mutex_handle[MAX_NUM_ME_PARALLEL];
2449 
2450     /* pointer to the mutex handle for frame init*/
2451     void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL];
2452 
2453     /* pointer to the mutex handle for frame init*/
2454     void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
2455 
2456     /*pointer to the mutex handle*/
2457     void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL];
2458 
2459     /* Flag to indicate that master has done ME init*/
2460     WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL];
2461 
2462     /* Counter to keep track of me num of thrds exiting critical section*/
2463     WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL];
2464 
2465     /* Flag to indicate that master has done the frame init*/
2466     WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
2467 
2468     /* Counter to keep track of num of thrds exiting critical section*/
2469     WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL];
2470 
2471     /* Counter to keep track of num of thrds exiting critical section for re-encode*/
2472     WORD32 num_thrds_exited_for_reenc;
2473 
2474     /* Array to store the curr qp for ping and pong instance*/
2475     WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2476 
2477     /* Pointers to store output buffers for ping and pong instance*/
2478     frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2479 
2480     /* Pointer to store input buffers for me*/
2481     pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL];
2482 
2483     /*pointers to store output buffers from me */
2484     me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS];
2485 
2486     /*pointers to store input buffers to enc-rdopt */
2487     me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS];
2488 
2489     /*Shared memory for Sub Pic rc */
2490     /*Qscale calulated by sub pic rc bit control for Intra Pic*/
2491     WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2492 
2493     /*Header bits error by sub pic rc bit control*/
2494     float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2495 
2496     /*Accumalated ME SAD for NCTB*/
2497     LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2498                            [MAX_NUM_FRM_PROC_THRDS_ENC];
2499 
2500     /*Accumalated IPE SAD for NCTB*/
2501     LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2502                             [MAX_NUM_FRM_PROC_THRDS_ENC];
2503 
2504     /*Accumalated L0 IPE SAD for NCTB*/
2505     LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2506                                [MAX_NUM_FRM_PROC_THRDS_ENC];
2507 
2508     /*Accumalated Activity Factor for NCTB*/
2509     LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2510                                [MAX_NUM_FRM_PROC_THRDS_ENC];
2511 
2512     /*Accumalated Ctb counter across all threads*/
2513     WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2514 
2515     /*Bits threshold reached for across all threads*/
2516     WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2517 
2518     /*To hold the Previous In-frame RC chunk QP*/
2519     WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2520 
2521     /*Accumalated Ctb counter across all threads*/
2522     WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2523 
2524     /*Flag to check if thread is initialized */
2525     WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2526                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
2527 
2528     /*Accumalated Ctb counter across all threads*/
2529     //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC];
2530 
2531     /*Accumalated bits consumed for nctbs across all threads*/
2532     LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2533                                   [MAX_NUM_FRM_PROC_THRDS_ENC];
2534 
2535     /*Accumalated hdr bits consumed for nctbs across all threads*/
2536     LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2537                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
2538 
2539     /*Accumalated l0 mpm bits consumed for nctbs across all threads*/
2540     LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2541                                       [MAX_NUM_FRM_PROC_THRDS_ENC];
2542 
2543     /*Accumalated bits consumed for total ctbs across all threads*/
2544     LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2545                                  [MAX_NUM_FRM_PROC_THRDS_ENC];
2546 
2547     /*Accumalated bits consumed for total ctbs across all threads*/
2548     LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
2549                                         [MAX_NUM_FRM_PROC_THRDS_ENC];
2550 
2551     /*Qscale calulated by sub pic rc bit control */
2552     WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2553     /* End of Sub pic rc variables */
2554 
2555     /* Pointers to store input (only L0 IPE)*/
2556     pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL];
2557 
2558     /* Array tp store L0 IPE input buf ids*/
2559     WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL];
2560 
2561     /* Array to store output buffer ids for ping and pong instances*/
2562     WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2563 
2564     /* Array of pointers to store the recon buf pointers*/
2565     iv_enc_recon_data_buffs_t *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2566 
2567     /* Array of pointers to frame recon for ping and pong instances*/
2568     recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];
2569 
2570     /* Array of recon buffer ids for ping and pong instance*/
2571     WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];
2572 
2573     /* Counter to keep track of num thrds done*/
2574     WORD32 num_thrds_done;
2575 
2576     /* Flags to keep track of dumped ping pong recon buffer*/
2577     WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2578 
2579     /* Flags to keep track of dumped ping pong output buffer*/
2580     WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2581 
2582     /* flag to produce output buffer by the thread who ever is finishing
2583     enc-loop processing first, so that the entropy thread can start processing */
2584     WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
2585 
2586     /* Flags to keep track of dumped ping pong input buffer*/
2587     WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
2588 
2589     /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/
2590     WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
2591 
2592     /** Dependency manager for checking whether prev. EncLoop done before
2593         current frame EncLoop starts */
2594     void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL];
2595 
2596     /** Dependency manager for checking whether prev. EncLoop done before
2597         re-encode of the current frame */
2598     void *pv_dep_mngr_prev_frame_enc_done_for_reenc;
2599 
2600     /** Dependency manager for checking whether prev. me done before
2601         current frame me starts */
2602     void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL];
2603 
2604     /** ME coarsest layer JOB queue type */
2605     WORD32 i4_me_coarsest_lyr_type;
2606 
2607     /** number of encloop frames running in parallel */
2608     WORD32 i4_num_enc_loop_frm_pllel;
2609 
2610     /** number of me frames running in parallel */
2611     WORD32 i4_num_me_frm_pllel;
2612 
2613     /*-----------------------------------------------------------------------*/
2614     /*--------- Params related to pre-enc stage -----------------------------*/
2615     /*-----------------------------------------------------------------------*/
2616 
2617     /** Number of processing threads created runtime in pre encode group */
2618     WORD32 i4_num_pre_enc_proc_thrds;
2619 
2620     /** Number of processing threads active for a given frame
2621      * This value will be monitored at frame level, so as to
2622      * have provsion for increasing / decreasing threads
2623      * based on Load balance b/w stage in encoder
2624      */
2625     WORD32 i4_num_active_pre_enc_thrds;
2626     /** number of threads that have done processing the current frame
2627         Use to find out the last thread that is coming out of pre-enc processing
2628         so that the last thread can do de-init of pre-enc stage */
2629     WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2630 
2631     /** number of threads that have done processing the current frame
2632         Use to find out the first thread and last inoder to get qp query. As the query
2633         is not read only , the quer should be done only once by thread that comes first
2634         and other threads should get same value*/
2635     WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2636 
2637     /** number of threads that have done proessing decomp_intra
2638         Used to find out the last thread that is coming out so that
2639         the last thread can set flag for decomp_pre_intra_finish */
2640     WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2641 
2642     /** number of threads that have done proessing coarse_me
2643         Used to find out the last thread that is coming out so that
2644         the last thread can set flag for coarse_me_finish */
2645     WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2646 
2647     /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done.
2648       Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */
2649     WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2650 
2651     /** Dependency manager for checking whether prev. frame decomp_intra
2652         done before current frame  decomp_intra starts */
2653     void *pv_dep_mngr_prev_frame_pre_enc_l1;
2654 
2655     /** Dependency manager for checking whether prev. frame L0 IPE done before
2656         current frame L0 IPE starts */
2657     void *pv_dep_mngr_prev_frame_pre_enc_l0;
2658 
2659     /** Dependency manager for checking whether prev. frame coarse_me done before
2660         current frame coarse_me starts */
2661     void *pv_dep_mngr_prev_frame_pre_enc_coarse_me;
2662 
2663     /** flag to indicate if pre_enc_init is done for current frame */
2664     WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2665 
2666     /** flag to indicate if pre_enc_hme_init is done for current frame */
2667     WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2668 
2669     /** flag to indicate if pre_enc_deinit is done for current frame */
2670     WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2671 
2672     /** Flag to indicate the end of processing when all the frames are done processing */
2673     WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2674 
2675     /** Flag to indicate the control blocking mode indicating input command to pre-enc
2676     group should be blocking or unblocking */
2677     WORD32 i4_ctrl_blocking_mode;
2678 
2679     /** Current input pointer */
2680     ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2681 
2682     WORD32 i4_last_inp_buf;
2683 
2684     /* buffer id for input buffer */
2685     WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2686 
2687     /** Current output pointer */
2688     pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2689 
2690     /*Current L0 IPE to enc output pointer */
2691     pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc;
2692 
2693     /** buffer id for output buffer */
2694     WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2695 
2696     /** buffer id for L0 IPE enc buffer*/
2697     WORD32 i4_L0_IPE_out_buf_id;
2698 
2699     /** Current picture Qp */
2700     WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2701 
2702     /** Decomp layer buffers indicies */
2703     WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2704 
2705     /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done,
2706       an array of MAX_PRE_ENC_STAGGER might not be required*/
2707     WORD32 i4_qp_update_l0_ipe;
2708 
2709     /** Current picture encoded is the last picture to be encoded flag */
2710     WORD32 i4_last_pic_flag;
2711 
2712     /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */
2713     void *pv_job_q_mutex_hdl_pre_enc_decomp;
2714 
2715     /** Mutex for ensuring thread safety of the access of Job queues in HME group */
2716     void *pv_job_q_mutex_hdl_pre_enc_hme;
2717 
2718     /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */
2719     void *pv_job_q_mutex_hdl_pre_enc_l0ipe;
2720 
2721     /** mutex handle for pre-enc init */
2722     void *pv_mutex_hdl_pre_enc_init;
2723 
2724     /** mutex handle for pre-enc decomp deinit */
2725     void *pv_mutex_hdl_pre_enc_decomp_deinit;
2726 
2727     /** mutex handle for pre enc hme init */
2728     void *pv_mutex_hdl_pre_enc_hme_init;
2729 
2730     /** mutex handle for pre-enc hme deinit */
2731     void *pv_mutex_hdl_pre_enc_hme_deinit;
2732 
2733     /*qp qurey before l0 ipe is done by multiple frame*/
2734     /** mutex handle for L0 ipe(pre-enc init)*/
2735     void *pv_mutex_hdl_l0_ipe_init;
2736 
2737     /** mutex handle for pre-enc deinit */
2738     void *pv_mutex_hdl_pre_enc_deinit;
2739 
2740     /** Array of Semaphore handles (for each frame processing threads ) */
2741     void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
2742     /** array which will tell the number of CTB processed in each row,
2743     *   used for Row level sync in IPE pass
2744     */
2745     WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM];
2746 
2747     /**  Job Queue Memory pre encode */
2748     job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
2749 
2750     /** Array of Job Queue handles enc group */
2751     job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2752                                              [NUM_PRE_ENC_JOBS_QUES];
2753 
2754     /* accumulate intra sad across all thread to get qp before L0 IPE*/
2755     WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2756                              [MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
2757 
2758     WORD32 i4_delay_pre_me_btw_l0_ipe;
2759 
2760     /*** This variable has the maximum delay between hme and l0ipe ***/
2761     /*** This is used for wrapping around L0IPE index ***/
2762     WORD32 i4_max_delay_pre_me_btw_l0_ipe;
2763 
2764     /* This is to register the handles of Dep Mngr b/w EncLoop and ME */
2765     /* This is used to delete the Mngr at the end                          */
2766     void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS];
2767     /*flag to track buffer in me/enc que is produced or not*/
2768     WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS];
2769 
2770     /*out buf que id for me */
2771     WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS];
2772 
2773     /*in buf que id for enc from me*/
2774     WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS];
2775 
2776     /* This is used to tell whether the free of recon buffers are done or not */
2777     WORD32 i4_is_recon_free_done;
2778 
2779     /* index for DVSR population */
2780     WORD32 i4_idx_dvsr_p;
2781     WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
2782                                  [(HEVCE_MAX_HEIGHT >> 1) / 8];
2783 
2784     WORD32 i4_rc_l0_qp;
2785 
2786     /* Used for mres single out cases. Checks whether a particular resolution is active or passive */
2787     /* Only one resolution should be active for mres_single_out case */
2788     WORD32 *pi4_active_res_id;
2789 
2790     /**
2791      * Sub Pic bit control mutex lock handle
2792      */
2793     void *pv_sub_pic_rc_mutex_lock_hdl;
2794 
2795     void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl;
2796 
2797     WORD32 i4_encode;
2798     WORD32 i4_in_frame_rc_enabled;
2799     WORD32 i4_num_re_enc;
2800 
2801 } multi_thrd_ctxt_t;
2802 
2803 /**
2804  *  @brief    Structure to describe tile params
2805  */
2806 typedef struct
2807 {
2808     /* flag to indicate tile encoding enabled/disabled */
2809     WORD32 i4_tiles_enabled_flag;
2810 
2811     /* flag to indicate unifrom spacing of tiles */
2812     WORD32 i4_uniform_spacing_flag;
2813 
2814     /* num tiles in a tile-row. num tiles in tile-col */
2815     WORD32 i4_num_tile_cols;
2816     WORD32 i4_num_tile_rows;
2817 
2818     /* Curr tile width and height*/
2819     WORD32 i4_curr_tile_width;
2820     WORD32 i4_curr_tile_height;
2821 
2822     /* Curr tile width and heignt in CTB units*/
2823     WORD32 i4_curr_tile_wd_in_ctb_unit;
2824     WORD32 i4_curr_tile_ht_in_ctb_unit;
2825 
2826     /* frame resolution */
2827     //WORD32  i4_frame_width;  /* encode-width  */
2828     //WORD32  i4_frame_height; /* encode-height */
2829 
2830     /* total num of tiles "in frame" */
2831     WORD32 i4_num_tiles;
2832 
2833     /* Curr tile id. Assigned by raster scan order in a frame */
2834     WORD32 i4_curr_tile_id;
2835 
2836     /* x-pos of first ctb of the slice in ctb */
2837     /* y-pos of first ctb of the slice in ctb */
2838     WORD32 i4_first_ctb_x;
2839     WORD32 i4_first_ctb_y;
2840 
2841     /* x-pos of first ctb of the slice in samples */
2842     /* y-pos of first ctb of the slice in samples */
2843     WORD32 i4_first_sample_x;
2844     WORD32 i4_first_sample_y;
2845 
2846 } ihevce_tile_params_t;
2847 
2848 /**
2849 ******************************************************************************
2850  *  @brief  Encoder context structure
2851 ******************************************************************************
2852  */
2853 
2854 typedef struct
2855 {
2856     /**
2857      *  vps parameters
2858      */
2859     vps_t as_vps[IHEVCE_MAX_NUM_BITRATES];
2860 
2861     /**
2862      *  sps parameters
2863      */
2864     sps_t as_sps[IHEVCE_MAX_NUM_BITRATES];
2865 
2866     /**
2867      *  pps parameters
2868      *  Required for each bitrate separately, mainly because
2869      *  init qp etc parameters needs to be different for each instance
2870      */
2871     pps_t as_pps[IHEVCE_MAX_NUM_BITRATES];
2872 
2873     /**
2874      * Rate control mutex lock handle
2875      */
2876     void *pv_rc_mutex_lock_hdl;
2877 
2878     /** frame level cu analyse  buffer pointer for ME
2879      * ME will get ps_ctb_analyse structure populated with ps_cu pointers
2880      * pointing to ps_cu_analyse buffer from IPE.
2881       */
2882     //cu_analyse_t       *ps_cu_analyse_inter[PING_PONG_BUF];
2883 
2884     /**
2885       *  CTB frame context between encoder (producer) and entropy (consumer)
2886       */
2887     enc_q_ctxt_t s_enc_ques;
2888 
2889     /**
2890      *  Encoder memory manager ctxt
2891      */
2892     enc_mem_mngr_ctxt s_mem_mngr;
2893 
2894     /**
2895      * Semaphores of all the threads created in HLE
2896      * and Que handle for buffers b/w frame process and entropy
2897      */
2898     thrd_que_sem_hdl_t s_thrd_sem_ctxt;
2899 
2900     /**
2901      *  Reference /recon buffer Que pointer
2902      */
2903     recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES];
2904 
2905     /**
2906      * Number of buffers in Recon buffer queue
2907      */
2908     WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES];
2909 
2910     /**
2911      * Reference / recon buffer Que pointer for Pre Encode group
2912      * this will be just a container and no buffers will be allcoated
2913      */
2914     recon_pic_buf_t **pps_pre_enc_recon_buf_q;
2915 
2916     /**
2917      * Number of buffers in Recon buffer queue
2918      */
2919     WORD32 i4_pre_enc_num_buf_recon_q;
2920 
2921     /**
2922       * frame level CTB parameters and worst PU CU and TU in a CTB row
2923       */
2924     frm_ctb_ctxt_t s_frm_ctb_prms;
2925 
2926     /*
2927      * Moudle ctxt pointers of all modules
2928      */
2929     module_ctxt_t s_module_ctxt;
2930 
2931     /*
2932      * LAP static parameters
2933      */
2934     ihevce_lap_static_params_t s_lap_stat_prms;
2935 
2936     /*
2937      * Run time dynamic source params
2938      */
2939 
2940     ihevce_src_params_t s_runtime_src_prms;
2941 
2942     /*
2943      *Target params
2944      */
2945     ihevce_tgt_params_t s_runtime_tgt_params;
2946 
2947     /*
2948      *  Run time dynamic coding params
2949      */
2950     ihevce_coding_params_t s_runtime_coding_prms;
2951 
2952     /**
2953      * Pointer to static config params
2954      */
2955     ihevce_static_cfg_params_t *ps_stat_prms;
2956 
2957     /**
2958      * the following structure members used for copying recon buf info
2959      * in case of duplicate pics
2960      */
2961 
2962     /**
2963      * Array of reference picture list for pre enc group
2964      * Separate list for ping_pong instnaces
2965      * 2=> ref_pic_list0 and ref_pic_list1
2966      */
2967     recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
2968                                         [HEVCE_MAX_REF_PICS * 2];
2969 
2970     /**
2971      * Array of reference picture list for pre enc group
2972      * Separate list for ping_pong instnaces
2973      * 2=> ref_pic_list0 and ref_pic_list1
2974      */
2975     recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
2976                                           [HEVCE_MAX_REF_PICS * 2];
2977 
2978     /**
2979      *  Number of input frames per input queue
2980      */
2981     WORD32 i4_num_input_buf_per_queue;
2982 
2983     /**
2984      *  poc of the Clean Random Access(CRA)Ipic
2985      */
2986     WORD32 i4_cra_poc;
2987 
2988     /** Number of ref pics in list 0 for any given frame */
2989     WORD32 i4_num_ref_l0;
2990 
2991     /** Number of ref pics in list 1 for any given frame */
2992     WORD32 i4_num_ref_l1;
2993 
2994     /** Number of active ref pics in list 0 for cur frame */
2995     WORD32 i4_num_ref_l0_active;
2996 
2997     /** Number of active ref pics in list 1 for cur frame */
2998     WORD32 i4_num_ref_l1_active;
2999 
3000     /** Number of ref pics in list 0 for any given frame pre encode stage */
3001     WORD32 i4_pre_enc_num_ref_l0;
3002 
3003     /** Number of ref pics in list 1 for any given frame  pre encode stage */
3004     WORD32 i4_pre_enc_num_ref_l1;
3005 
3006     /** Number of active ref pics in list 0 for cur frame  pre encode stage */
3007     WORD32 i4_pre_enc_num_ref_l0_active;
3008 
3009     /** Number of active ref pics in list 1 for cur frame  pre encode stage */
3010     WORD32 i4_pre_enc_num_ref_l1_active;
3011 
3012     /**
3013      *  working mem to be used for frm level activities
3014      * One example is interplation at frame level. This requires memory
3015      * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes.
3016      * This is so since we generate interp output for max_width + 16 x
3017      * max_height + 16, and then the intermediate output is 16 bit and
3018      * is max_height + 16 + 7 rows
3019      */
3020     UWORD8 *pu1_frm_lvl_wkg_mem;
3021 
3022     /**
3023      * Multi thread processing context
3024      * This memory contains the variables and pointers shared across threads
3025      * in enc-group and pre-enc-group
3026      */
3027     multi_thrd_ctxt_t s_multi_thrd;
3028 
3029     /** I/O Queues created status */
3030     WORD32 i4_io_queues_created;
3031 
3032     WORD32 i4_end_flag;
3033 
3034     /** number of bit-rate instances running */
3035     WORD32 i4_num_bitrates;
3036 
3037     /** number of enc frames running in parallel */
3038     WORD32 i4_num_enc_loop_frm_pllel;
3039 
3040     /*ref bitrate id*/
3041     WORD32 i4_ref_mbr_id;
3042 
3043     /* Flag to indicate app, that end of processing has reached */
3044     WORD32 i4_frame_limit_reached;
3045 
3046     /*Structure to store the function selector
3047      * pointers for common and encoder */
3048     func_selector_t s_func_selector;
3049 
3050     /*ref resolution id*/
3051     WORD32 i4_resolution_id;
3052 
3053     /*hle context*/
3054     void *pv_hle_ctxt;
3055 
3056     rc_quant_t s_rc_quant;
3057     /*ME cost of P pic stored for the next ref B pic*/
3058     //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2];
3059 
3060     UWORD32 u4_cur_pic_encode_cnt;
3061     UWORD32 u4_cur_pic_encode_cnt_dbp;
3062     /*past 2 p pics high complexity status*/
3063     WORD32 ai4_is_past_pic_complex[2];
3064 
3065     WORD32 i4_is_I_reset_done;
3066     WORD32 i4_past_RC_reset_count;
3067 
3068     WORD32 i4_future_RC_reset;
3069 
3070     WORD32 i4_past_RC_scd_reset_count;
3071 
3072     WORD32 i4_future_RC_scd_reset;
3073     WORD32 i4_poc_reset_values;
3074 
3075     /*Place holder to store the length of LAP in first pass*/
3076     /** Number of frames to look-ahead for RC by -
3077      * counts 2 fields as one frame for interlaced
3078      */
3079     WORD32 i4_look_ahead_frames_in_first_pass;
3080 
3081     WORD32 ai4_mod_factor_derived_by_variance[2];
3082     float f_strength;
3083 
3084     /*for B frames use the avg activity
3085     from the layer 0 (I or P) which is the average over
3086     Lap2 window*/
3087     LWORD64 ai8_lap2_8x8_avg_act_from_T0[2];
3088 
3089     LWORD64 ai8_lap2_16x16_avg_act_from_T0[3];
3090 
3091     LWORD64 ai8_lap2_32x32_avg_act_from_T0[3];
3092 
3093     /*for B frames use the log of avg activity
3094     from the layer 0 (I or P) which is the average over
3095     Lap2 window*/
3096     long double ald_lap2_8x8_log_avg_act_from_T0[2];
3097 
3098     long double ald_lap2_16x16_log_avg_act_from_T0[3];
3099 
3100     long double ald_lap2_32x32_log_avg_act_from_T0[3];
3101 
3102     ihevce_tile_params_t *ps_tile_params_base;
3103 
3104     WORD32 ai4_column_width_array[MAX_TILE_COLUMNS];
3105 
3106     WORD32 ai4_row_height_array[MAX_TILE_ROWS];
3107 
3108     /* Architecture */
3109     IV_ARCH_T e_arch_type;
3110 
3111     UWORD8 u1_is_popcnt_available;
3112 
3113     WORD32 i4_active_scene_num;
3114 
3115     WORD32 i4_max_fr_enc_loop_parallel_rc;
3116     WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES];
3117     WORD32 i4_active_enc_frame_id;
3118 
3119     /**
3120     * LAP interface ctxt pointer
3121     */
3122     void *pv_lap_interface_ctxt;
3123 
3124     /* If enable, enables blu ray compatibility of op*/
3125     WORD32 i4_blu_ray_spec;
3126 
3127 } enc_ctxt_t;
3128 
3129 /**
3130 ******************************************************************************
3131 *  @brief  This struct contains the inter CTB params needed for the decision
3132 *   of the best inter CU results
3133 ******************************************************************************
3134 */
3135 typedef struct
3136 {
3137     hme_pred_buf_mngr_t s_pred_buf_mngr;
3138 
3139     /** X and y offset of ctb w.r.t. start of pic */
3140     WORD32 i4_ctb_x_off;
3141     WORD32 i4_ctb_y_off;
3142 
3143     /**
3144      * Pred buffer ptr, updated inside subpel refinement process. This
3145      * location passed to the leaf fxn for copying the winner pred buf
3146      */
3147     UWORD8 **ppu1_pred;
3148 
3149     /** Working mem passed to leaf fxns */
3150     UWORD8 *pu1_wkg_mem;
3151 
3152     /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
3153     WORD32 i4_pred_stride;
3154 
3155     /** Stride of input buf, updated inside subpel fxn */
3156     WORD32 i4_inp_stride;
3157 
3158     /** stride of recon buffer */
3159     WORD32 i4_rec_stride;
3160 
3161     /** Indicates if bi dir is enabled or not */
3162     WORD32 i4_bidir_enabled;
3163 
3164     /**
3165      * Total number of references of current picture which is enocded
3166      */
3167     UWORD8 u1_num_ref;
3168 
3169     /** Recon Pic buffer pointers for L0 list */
3170     recon_pic_buf_t **pps_rec_list_l0;
3171 
3172     /** Recon Pic buffer pointers for L1 list */
3173     recon_pic_buf_t **pps_rec_list_l1;
3174 
3175     /**
3176      * These pointers point to modified input, one each for one ref idx.
3177      * Instead of weighting the reference, we weight the input with inverse
3178      * wt and offset for list 0 and list 1.
3179      */
3180     UWORD8 *apu1_wt_inp[2][MAX_NUM_REF];
3181 
3182     /* Since ME uses weighted inputs, we use reciprocal of the actual weights */
3183     /* that are signaled in the bitstream */
3184     WORD32 *pi4_inv_wt;
3185     WORD32 *pi4_inv_wt_shift_val;
3186 
3187     /* Map between L0 Reference indices and LC indices */
3188     WORD8 *pi1_past_list;
3189 
3190     /* Map between L1 Reference indices and LC indices */
3191     WORD8 *pi1_future_list;
3192 
3193     /**
3194      * Points to the non-weighted input data for the current CTB
3195      */
3196     UWORD8 *pu1_non_wt_inp;
3197 
3198     /**
3199      * Store the pred lambda and lamda_qshifts for all the reference indices
3200      */
3201     WORD32 i4_lamda;
3202 
3203     UWORD8 u1_lamda_qshift;
3204 
3205     WORD32 wpred_log_wdc;
3206 
3207     /**
3208      * Number of active references in l0
3209      */
3210     UWORD8 u1_num_active_ref_l0;
3211 
3212     /**
3213      * Number of active references in l1
3214      */
3215     UWORD8 u1_num_active_ref_l1;
3216 
3217     /** The max_depth for inter tu_tree */
3218     UWORD8 u1_max_tr_depth;
3219 
3220     /** Quality Preset */
3221     WORD8 i1_quality_preset;
3222 
3223     /** SATD or SAD */
3224     UWORD8 u1_use_satd;
3225 
3226     /* Frame level QP */
3227     WORD32 i4_qstep_ls8;
3228 
3229     /* Pointer to an array of PU level src variances */
3230     UWORD32 *pu4_src_variance;
3231 
3232     WORD32 i4_alpha_stim_multiplier;
3233 
3234     UWORD8 u1_is_cu_noisy;
3235 
3236     ULWORD64 *pu8_part_src_sigmaX;
3237 
3238     ULWORD64 *pu8_part_src_sigmaXSquared;
3239 
3240     UWORD8 u1_max_2nx2n_tu_recur_cands;
3241 
3242 } inter_ctb_prms_t;
3243 
3244 /*****************************************************************************/
3245 /* Extern Variable Declarations                                              */
3246 /*****************************************************************************/
3247 extern const double lamda_modifier_for_I_pic[8];
3248 
3249 /*****************************************************************************/
3250 /* Extern Function Declarations                                              */
3251 /*****************************************************************************/
3252 
3253 #endif /* _IHEVCE_ENC_STRUCTS_H_ */
3254