1 /****************************************************************************** 2 * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /*! 21 ****************************************************************************** 22 * \file ihevce_enc_structs.h 23 * 24 * \brief 25 * This file contains structure definations of Encoder 26 * 27 * \date 28 * 18/09/2012 29 * 30 * \author 31 * Ittiam 32 * 33 ****************************************************************************** 34 */ 35 36 #ifndef _IHEVCE_ENC_STRUCTS_H_ 37 #define _IHEVCE_ENC_STRUCTS_H_ 38 39 /*****************************************************************************/ 40 /* Constant Macros */ 41 /*****************************************************************************/ 42 #define HEVCE_MAX_WIDTH 1920 43 #define HEVCE_MAX_HEIGHT 1088 44 45 #define HEVCE_MIN_WIDTH 64 46 #define HEVCE_MIN_HEIGHT 64 47 48 #define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE) 49 #define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE) 50 51 #define MIN_VERT_PROC_UNIT (8) 52 #define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT) 53 54 #define HEVCE_MAX_REF_PICS 8 55 #define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1) 56 57 #define PAD_HORZ 80 58 #define PAD_VERT 80 59 60 #define DEFAULT_MAX_REFERENCE_PICS 4 61 62 #define BLU_RAY_SUPPORT 231457 63 64 /** @brief max number of parts in minCU : max 4 for NxN */ 65 #define NUM_PU_PARTS 4 66 /** @brief max number of parts in Inter CU */ 67 #define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS) 68 #define SEND_BI_RDOPT 69 #ifdef SEND_BI_RDOPT 70 /** @brief */ 71 #define MAX_INTER_CU_CANDIDATES 4 72 #else 73 /** @brief */ 74 #define MAX_INTER_CU_CANDIDATES 3 75 #endif 76 /** @brief */ 77 #define MAX_INTRA_CU_CANDIDATES 3 78 79 #define MAX_INTRA_CANDIDATES 35 80 81 /** For each resolution & bit-rate instance, one entropy thread is created */ 82 #define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES) 83 84 /* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */ 85 #define NUM_BUFS_DECOMP_HME 1 86 87 /** Macro to indicate pre me and L0 ipe stagger in pre enc*/ 88 /** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/ 89 #define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ) 90 91 #define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL) 92 93 #define MIN_L0_IPE_ENC_STAGGER 1 94 95 /*stagger between L0 IPE and enc*/ 96 #define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER)) 97 98 #define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME) 99 100 #define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME) 101 102 /** @brief number of ctb contexts maintained at frame level b/w encode : entropy */ 103 #define NUM_FRMPROC_ENTCOD_BUFS 1 104 105 /** @brief number of extra recon buffs required for stagger design*/ 106 #define NUM_EXTRA_RECON_BUFS 0 107 108 /** recon picture buffer size need to be increased to support EncLoop Parallelism **/ 109 #define NUM_EXTRA_RECON_BUFS_FOR_ELP 0 110 111 /** @brief maximum number of bytes in 4x4 afetr scanning */ 112 #define MAX_SCAN_COEFFS_BYTES_4x4 (48) 113 114 /** @brief maximum number of luma coeffs bytes after scan at CTB level */ 115 #define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4) 116 117 /** @brief maximum number of chroma coeffs bytes after scan at CTB level */ 118 #define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4) 119 120 /** @brief maximum number of coeffs bytes after scan at CTB level */ 121 #define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB)) 122 123 /** @breif PU map CTB buffer buyes for neighbour availibility */ 124 #define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW) 125 126 /** @brief tottal system memory records */ 127 #define TOTAL_SYSTEM_MEM_RECS 120 128 129 /** @brief number of input async command buffers */ 130 #define NUM_AYSNC_CMD_BUFS 4 131 132 /** @brief Comand buffers size */ 133 #define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */ 134 135 /** @brief Number of output buffers */ 136 #define NUM_OUTPUT_BUFS 4 137 138 /** @brief Lamda for SATD cost estimation */ 139 #define LAMDA_SATD 1 140 141 /** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */ 142 #define MAX_GT_ONE 8 143 144 /** MAX num ipntra pred modes */ 145 #define MAX_NUM_IP_MODES 35 146 147 /** Number of best intra modes used for intra mode refinement */ 148 #define NUM_BEST_MODES 3 149 150 /** Maximim number of parallel frame processing threads in pre enocde group */ 151 #define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES 152 153 /** Maximim number of parallel frame processing threads in encode group */ 154 #define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES 155 156 /** Macro to indicate teh PING_PONG buffers for stagger*/ 157 #define PING_PONG_BUF 2 158 159 /** Max number of layers in Motion estimation 160 * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h 161 */ 162 163 #define MAX_NUM_HME_LAYERS 5 164 /** 165 ****************************************************************************** 166 * @brief Maximum number of layers allowed 167 ****************************************************************************** 168 */ 169 #define MAX_NUM_LAYERS 4 170 171 #define NUM_RC_PIC_TYPE 9 172 173 #define MAX_NUM_NODES_CU_TREE (85) 174 175 /* macros to control Dynamic load balance */ 176 #define DYN_LOAD_BAL_UPPER_LIMIT 0.80 177 178 #define DYN_LOAD_BAL_LOWER_LIMIT 0.20 179 180 #define NUM_SUB_GOP_DYN_BAL 1 181 182 #define MIN_NUM_FRMS_DYN_BAL 4 183 184 #define CORES_SRES_OR_MRES 2 185 186 #define HME_HIGH_SAD_BLK_THRESH 35 187 188 /* Enable to compare cabac states of final entropy thread with enc loop states */ 189 #define VERIFY_ENCLOOP_CABAC_STATES 0 190 191 #define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */ 192 193 /*****************************************************************************/ 194 /* Function Macros */ 195 /*****************************************************************************/ 196 197 /*****************************************************************************/ 198 /* Typedefs */ 199 /*****************************************************************************/ 200 typedef void (*pf_iq_it_rec)( 201 WORD16 *pi2_src, 202 WORD16 *pi2_tmp, 203 UWORD8 *pu1_pred, 204 WORD16 *pi2_dequant_coeff, 205 UWORD8 *pu1_dst, 206 WORD32 qp_div, /* qpscaled / 6 */ 207 WORD32 qp_rem, /* qpscaled % 6 */ 208 WORD32 src_strd, 209 WORD32 pred_strd, 210 WORD32 dst_strd, 211 WORD32 zero_cols, 212 WORD32 zero_rows); 213 214 typedef void (*pf_intra_pred)( 215 UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode); 216 217 typedef UWORD32 (*pf_res_trans_luma)( 218 UWORD8 *pu1_src, 219 UWORD8 *pu1_pred, 220 WORD32 *pi4_tmp, 221 WORD16 *pi2_dst, 222 WORD32 src_strd, 223 WORD32 pred_strd, 224 WORD32 dst_strd, 225 CHROMA_PLANE_ID_T e_chroma_plane); 226 227 typedef WORD32 (*pf_quant)( 228 WORD16 *pi2_coeffs, 229 WORD16 *pi2_quant_coeff, 230 WORD16 *pi2_dst, 231 WORD32 qp_div, /* qpscaled / 6 */ 232 WORD32 qp_rem, /* qpscaled % 6 */ 233 WORD32 q_add, 234 WORD32 src_strd, 235 WORD32 dst_strd, 236 UWORD8 *pu1_csbf_buf, 237 WORD32 csbf_strd, 238 WORD32 *zero_cols, 239 WORD32 *zero_row); 240 241 /*****************************************************************************/ 242 /* Enums */ 243 /*****************************************************************************/ 244 /// supported partition shape 245 typedef enum 246 { 247 SIZE_2Nx2N = 0, ///< symmetric motion partition, 2Nx2N 248 SIZE_2NxN = 1, ///< symmetric motion partition, 2Nx N 249 SIZE_Nx2N = 2, ///< symmetric motion partition, Nx2N 250 SIZE_NxN = 3, ///< symmetric motion partition, Nx N 251 SIZE_2NxnU = 4, ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2) 252 SIZE_2NxnD = 5, ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2) 253 SIZE_nLx2N = 6, ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N 254 SIZE_nRx2N = 7 ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N 255 } PART_SIZE_E; 256 257 /** @brief Interface level Queues of Encoder */ 258 259 typedef enum 260 { 261 IHEVCE_INPUT_DATA_CTRL_Q = 0, 262 IHEVCE_ENC_INPUT_Q, 263 IHEVCE_INPUT_ASYNCH_CTRL_Q, 264 IHEVCE_OUTPUT_DATA_Q, 265 IHEVCE_OUTPUT_STATUS_Q, 266 IHEVCE_RECON_DATA_Q, // /*que for holding recon buffer */ 267 268 IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */ 269 270 IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */ 271 272 IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */ 273 274 IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/ 275 276 /* should be last entry */ 277 IHEVCE_MAX_NUM_QUEUES 278 279 } IHEVCE_Q_DESC_T; 280 281 /*****************************************************************************/ 282 /* Structure */ 283 /*****************************************************************************/ 284 285 /** 286 RC_QP_QSCALE conversion structures 287 **/ 288 typedef struct 289 { 290 WORD16 i2_min_qp; 291 292 WORD16 i2_max_qp; 293 294 WORD16 i2_min_qscale; 295 296 WORD16 i2_max_qscale; 297 298 WORD32 *pi4_qscale_to_qp; 299 300 WORD32 *pi4_qp_to_qscale_q_factor; 301 302 WORD32 *pi4_qp_to_qscale; 303 304 WORD8 i1_qp_offset; 305 306 } rc_quant_t; 307 308 /** 309 ****************************************************************************** 310 * @brief 4x4 level structure which contains all the parameters 311 * for neighbour prediction puopose 312 ****************************************************************************** 313 */ 314 typedef struct 315 { 316 /** PU motion vectors */ 317 pu_mv_t mv; 318 /** Intra or Inter flag for each partition - 0 or 1 */ 319 UWORD16 b1_intra_flag : 1; 320 /** CU skip flag - 0 or 1 */ 321 UWORD16 b1_skip_flag : 1; 322 /** CU depth in CTB tree (0-3) */ 323 UWORD16 b2_cu_depth : 2; 324 325 /** Y Qp for loop filter */ 326 WORD16 b8_qp : 8; 327 328 /** Luma Intra Mode 0 - 34 */ 329 UWORD16 b6_luma_intra_mode : 6; 330 331 /** Y CBF for BS compute */ 332 UWORD16 b1_y_cbf : 1; 333 /** Pred L0 flag of current 4x4 */ 334 UWORD16 b1_pred_l0_flag : 1; 335 336 /** Pred L0 flag of current 4x4 */ 337 UWORD16 b1_pred_l1_flag : 1; 338 } nbr_4x4_t; 339 340 typedef struct 341 { 342 /** Bottom Left availability flag */ 343 UWORD8 u1_bot_lt_avail; 344 345 /** Left availability flag */ 346 UWORD8 u1_left_avail; 347 348 /** Top availability flag */ 349 UWORD8 u1_top_avail; 350 351 /** Top Right availability flag */ 352 UWORD8 u1_top_rt_avail; 353 354 /** Top Left availability flag */ 355 UWORD8 u1_top_lt_avail; 356 357 } nbr_avail_flags_t; 358 359 typedef struct 360 { 361 /** prev intra flag*/ 362 UWORD8 b1_prev_intra_luma_pred_flag : 1; 363 364 /** mpm_idx */ 365 UWORD8 b2_mpm_idx : 2; 366 367 /** reminder pred mode */ 368 UWORD8 b5_rem_intra_pred_mode : 5; 369 370 } intra_prev_rem_flags_t; 371 372 /** 373 ****************************************************************************** 374 * @brief calc (T+Q+RDOQ) output TU structure; entropy input TU structure 375 ****************************************************************************** 376 */ 377 typedef struct 378 { 379 /** base tu structure */ 380 tu_t s_tu; 381 382 /** offset of luma data in ecd buffer */ 383 WORD32 i4_luma_coeff_offset; 384 385 /** offset of cb data in ecd buffer */ 386 WORD32 ai4_cb_coeff_offset[2]; 387 388 /** offset of cr data in ecd buffer */ 389 WORD32 ai4_cr_coeff_offset[2]; 390 391 } tu_enc_loop_out_t; 392 393 typedef struct 394 { 395 /* L0 Motion Vector */ 396 mv_t s_l0_mv; 397 398 /* L1 Motion Vector */ 399 mv_t s_l1_mv; 400 401 /* L0 Ref index */ 402 WORD8 i1_l0_ref_idx; 403 404 /* L1 Ref index */ 405 WORD8 i1_l1_ref_idx; 406 407 /* L0 Ref Pic Buf ID */ 408 WORD8 i1_l0_pic_buf_id; 409 410 /* L1 Ref Pic Buf ID */ 411 WORD8 i1_l1_pic_buf_id; 412 413 /** intra flag */ 414 UWORD8 b1_intra_flag : 1; 415 416 /* Pred mode */ 417 UWORD8 b2_pred_mode : 2; 418 419 /* reserved flag can be used for something later */ 420 UWORD8 u1_reserved; 421 422 } pu_col_mv_t; 423 424 /*****************************************************************************/ 425 /* Encoder uses same structure as pu_t for prediction unit */ 426 /*****************************************************************************/ 427 428 /** 429 ****************************************************************************** 430 * @brief Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure 431 ****************************************************************************** 432 */ 433 typedef struct 434 { 435 /* CU X position in terms of min CU (8x8) units */ 436 UWORD32 b3_cu_pos_x : 3; 437 438 /* CU Y position in terms of min CU (8x8) units */ 439 UWORD32 b3_cu_pos_y : 3; 440 441 /** CU size in terms of min CU (8x8) units */ 442 UWORD32 b4_cu_size : 4; 443 444 /** transquant bypass flag ; 0 for this encoder */ 445 UWORD32 b1_tq_bypass_flag : 1; 446 447 /** cu skip flag */ 448 UWORD32 b1_skip_flag : 1; 449 450 /** intra / inter CU flag */ 451 UWORD32 b1_pred_mode_flag : 1; 452 453 /** indicates partition information for CU 454 * For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize 455 * For inter 0 : @sa PART_SIZE_E 456 */ 457 UWORD32 b3_part_mode : 3; 458 459 /** 0 for this encoder */ 460 UWORD32 b1_pcm_flag : 1; 461 462 /** only applicable for intra cu */ 463 UWORD32 b3_chroma_intra_pred_mode : 3; 464 465 /** no residue flag for cu */ 466 UWORD32 b1_no_residual_syntax_flag : 1; 467 468 /* flag to indicate if current CU is the first 469 CU of the Quantisation group*/ 470 UWORD32 b1_first_cu_in_qg : 1; 471 472 /** Intra prev and reminder flags 473 * if part is NxN the tntries 1,2,3 will be valid 474 * other wise only enry 0 will be set. 475 */ 476 intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS]; 477 478 /** 479 * Access valid number of pus in this array based on u1_part_mode 480 * Moiton vector differentials and reference idx should be 481 * populated in this structure 482 * @remarks shall be accessed only for inter pus 483 */ 484 pu_t *ps_pu; 485 486 /** 487 * pointer to first tu of this cu. Each TU need to be populated 488 * in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu 489 */ 490 tu_enc_loop_out_t *ps_enc_tu; 491 492 /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */ 493 UWORD16 u2_num_tus_in_cu; 494 495 /** Coeff bufer pointer */ 496 /* Pointer to transform coeff data */ 497 /*************************************************************************/ 498 /* Following format is repeated for every coded TU */ 499 /* Luma Block */ 500 /* num_coeffs : 16 bits */ 501 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 502 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 503 /* coeff_data : Non zero coefficients */ 504 /* Cb Block (only for last TU in 4x4 case else for every luma TU) */ 505 /* num_coeffs : 16 bits */ 506 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 507 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 508 /* coeff_data : Non zero coefficients */ 509 /* Cr Block (only for last TU in 4x4 case else for every luma TU) */ 510 /* num_coeffs : 16 bits */ 511 /* zero_cols : 8 bits ( 1 bit per 4 columns) */ 512 /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */ 513 /* coeff_data : Non zero coefficients */ 514 /*************************************************************************/ 515 void *pv_coeff; 516 517 /** qp used during for CU 518 * @remarks : 519 */ 520 WORD8 i1_cu_qp; 521 522 } cu_enc_loop_out_t; 523 524 /** 525 * SAO 526 */ 527 typedef struct 528 { 529 /** 530 * sao_type_idx_luma 531 */ 532 UWORD32 b3_y_type_idx : 3; 533 534 /** 535 * luma sao_band_position 536 */ 537 UWORD32 b5_y_band_pos : 5; 538 539 /** 540 * sao_type_idx_chroma 541 */ 542 UWORD32 b3_cb_type_idx : 3; 543 544 /** 545 * cb sao_band_position 546 */ 547 UWORD32 b5_cb_band_pos : 5; 548 549 /** 550 * sao_type_idx_chroma 551 */ 552 UWORD32 b3_cr_type_idx : 3; 553 554 /** 555 * cb sao_band_position 556 */ 557 UWORD32 b5_cr_band_pos : 5; 558 559 /*SAO Offsets 560 * In all these offsets, 0th element is not used 561 */ 562 /** 563 * luma SaoOffsetVal[i] 564 */ 565 WORD8 u1_y_offset[5]; 566 567 /** 568 * chroma cb SaoOffsetVal[i] 569 */ 570 WORD8 u1_cb_offset[5]; 571 572 /** 573 * chroma cr SaoOffsetVal[i] 574 */ 575 WORD8 u1_cr_offset[5]; 576 577 /** 578 * sao_merge_left_flag common for y,cb,cr 579 */ 580 UWORD32 b1_sao_merge_left_flag : 1; 581 582 /** 583 * sao_merge_up_flag common for y,cb,cr 584 */ 585 UWORD32 b1_sao_merge_up_flag : 1; 586 587 } sao_enc_t; 588 589 /** 590 ****************************************************************************** 591 * @brief ctb output structure; output of Encode loop, input to entropy 592 ****************************************************************************** 593 */ 594 typedef struct 595 { 596 /** 597 * bit0 : depth0 split flag, (64x64 splits) 598 * bits 1-3 : not used 599 * bits 4-7 : depth1 split flags; valid iff depth0 split=1 (32x32 splits) 600 * bits 8-23: depth2 split flags; (if 0 16x16 is cu else 8x8 min cu) 601 602 * if a split flag of n is set for depth 1, check the following split flags 603 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 604 * 605 */ 606 UWORD32 u4_cu_split_flags; 607 608 /*************************************************************** 609 * For any given CU position CU_posx, CU_posy access 610 * au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)] 611 * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0] 612 ****************************************************************/ 613 614 /** 615 * access bits corresponding to actual CU size till leaf nodes 616 * bit0 : (32x32 TU split flag) 617 * bits 1-3 : not used 618 * bits 4-7 : (16x16 TUsplit flags) 619 * bits 8-23: (8x8 TU split flags) 620 621 * if a split flag of n is set for depth 1, check the following split flags 622 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 623 * 624 * @remarks As tu sizes are relative to CU sizes the producer has to 625 * make sure the correctness of u4_packed_tu_split_flags. 626 * 627 * @remarks au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only 628 * for 64x64 ctb. 629 */ 630 UWORD32 au4_packed_tu_split_flags_cu[4]; 631 632 /** 633 * pointer to first CU of CTB. Each CU need to be populated 634 * in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb 635 */ 636 cu_enc_loop_out_t *ps_enc_cu; 637 638 /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */ 639 UWORD8 u1_num_cus_in_ctb; 640 641 /** CTB neighbour availability flags */ 642 nbr_avail_flags_t s_ctb_nbr_avail_flags; 643 644 /* SAO parameters of the CTB */ 645 sao_enc_t s_sao; 646 647 } ctb_enc_loop_out_t; 648 649 /** 650 ****************************************************************************** 651 * @brief cu inter candidate for encoder 652 ****************************************************************************** 653 */ 654 typedef struct 655 { 656 /** base pu structure 657 * access valid number of entries in this array based on u1_part_size 658 */ 659 pu_t as_inter_pu[NUM_INTER_PU_PARTS]; 660 661 /* TU split flag : tu_split_flag[0] represents the transform splits 662 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 663 * to respective 32x32 */ 664 /* For a 8x8 TU - 1 bit used to indicate split */ 665 /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */ 666 /* For a 32x32 TU - See above */ 667 WORD32 ai4_tu_split_flag[4]; 668 669 /* TU split flag : tu_split_flag[0] represents the transform splits 670 * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds 671 * to respective 32x32 */ 672 /* For a 8x8 TU - 1 bit used to indicate split */ 673 /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */ 674 /* For a 32x32 TU - See above */ 675 WORD32 ai4_tu_early_cbf[4]; 676 677 /**Pointer to the buffer having predicted data after mc in SATD stage 678 * Since we have 2 buffers for each candidate pred data for best merge candidate 679 * can be in one of the 2 buffers. 680 */ 681 UWORD8 *pu1_pred_data; 682 683 UWORD16 *pu2_pred_data; 684 685 UWORD8 *pu1_pred_data_scr; 686 687 UWORD16 *pu2_pred_data_src; 688 689 /* Total cost: SATD cost + MV cost */ 690 WORD32 i4_total_cost; 691 692 /** Stride for predicted data*/ 693 WORD32 i4_pred_data_stride; 694 695 /** @remarks u1_part_size can be non square only for Inter */ 696 UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */ 697 698 /** evaluate transform for cusize iff this flag is 1 */ 699 /** this flag should be set 0 if CU is 64x64 */ 700 UWORD8 b1_eval_tx_cusize : 1; 701 702 /** evaluate transform for cusize/2 iff this flag is 1 */ 703 UWORD8 b1_eval_tx_cusize_by2 : 1; 704 705 /** Skip Flag : ME should always set this 0 for the candidates */ 706 UWORD8 b1_skip_flag : 1; 707 708 UWORD8 b1_intra_has_won : 1; 709 710 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 711 /* if 1, this mode will be evaluated otherwise not.*/ 712 UWORD8 b1_eval_mark : 1; 713 714 } cu_inter_cand_t; 715 716 /** 717 ****************************************************************************** 718 * @brief cu intra candidate for encoder 719 ****************************************************************************** 720 */ 721 typedef struct 722 { 723 UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES]; 724 725 /** 726 * List of NxN PU candidates in CU for each partition 727 * valid only of if current cusize = mincusize 728 * +1 to signal the last flag invalid value of 255 needs to be stored 729 */ 730 UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1]; 731 732 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 733 /* if 1, this mode will be evaluated otherwise not.*/ 734 UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1]; 735 736 /** 737 * List of 2Nx2N PU candidates in CU 738 * +1 to signal the last flag invalid value of 255 needs to be stored 739 */ 740 UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1]; 741 742 /** 743 * List of 2Nx2N PU candidates in CU 744 * +1 to signal the last flag invalid value of 255 needs to be stored 745 */ 746 UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1]; 747 748 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 749 /* if 1, this mode will be evaluated otherwise not.*/ 750 UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1]; 751 752 /* used to mark if this mode needs to be evaluated in auxiliary mode */ 753 /* if 1, this mode will be evaluated otherwise not.*/ 754 UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1]; 755 756 UWORD8 au1_num_modes_added[NUM_PU_PARTS]; 757 758 /** evaluate transform for cusize iff this flag is 1 */ 759 /** this flag should be set 0 if CU is 64x64 */ 760 UWORD8 b1_eval_tx_cusize : 1; 761 762 /** evaluate transform for cusize/2 iff this flag is 1 */ 763 UWORD8 b1_eval_tx_cusize_by2 : 1; 764 765 /** number of intra candidates for SATD evaluation in */ 766 UWORD8 b6_num_intra_cands : 6; 767 768 } cu_intra_cand_t; 769 770 /** 771 ****************************************************************************** 772 * @brief cu structure for mode analysis/evaluation 773 ****************************************************************************** 774 */ 775 typedef struct 776 { 777 /** CU X position in terms of min CU (8x8) units */ 778 UWORD8 b3_cu_pos_x : 3; 779 780 /** CU Y position in terms of min CU (8x8) units */ 781 UWORD8 b3_cu_pos_y : 3; 782 783 /** reserved bytes */ 784 UWORD8 b2_reserved : 2; 785 786 /** CU size 2N (width or height) in pixels */ 787 UWORD8 u1_cu_size; 788 789 /** Intra CU candidates after FAST CU decision (output of IPE) 790 * 8421 algo along with transform size evalution will 791 * be done for these modes in Encode loop pass. 792 */ 793 cu_intra_cand_t s_cu_intra_cand; 794 795 /** indicates the angular mode (0 - 34) for chroma, 796 * Note : No provision currently to take chroma through RDOPT or SATD 797 */ 798 UWORD8 u1_chroma_intra_pred_mode; 799 800 /** number of inter candidates in as_cu_inter_cand[] 801 * shall be 0 for intra frames. 802 * These inters are evaluated for RDOPT apart from merge/skip candidates 803 */ 804 UWORD8 u1_num_inter_cands; 805 806 /** List of candidates to be evalauted (SATD/RDOPT) for this CU 807 * @remarks : all merge/skip candidates not a part of this list 808 */ 809 cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES]; 810 811 WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS]; 812 813 #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING 814 WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS]; 815 #endif 816 817 /* Flag to convey if Inta or Inter is the best candidate among the 818 candidates populated 819 0: If inter is the winner and 1: if Intra is winner*/ 820 UWORD8 u1_best_is_intra; 821 822 /** number of intra rdopt candidates 823 * @remarks : shall be <= u1_num_intra_cands 824 */ 825 UWORD8 u1_num_intra_rdopt_cands; 826 /** qp used during for CU 827 * @remarks : 828 */ 829 WORD8 i1_cu_qp; 830 /** Activity factor used in pre enc thread for deriving the Qp 831 * @remarks : This is in Q format 832 */ 833 WORD32 i4_act_factor[4][2]; 834 835 } cu_analyse_t; 836 837 /** 838 ****************************************************************************** 839 * @brief Structure for CU recursion 840 ****************************************************************************** 841 */ 842 typedef struct cur_ctb_cu_tree_t 843 { 844 /** CU X position in terms of min CU (8x8) units */ 845 UWORD8 b3_cu_pos_x : 3; 846 847 /** CU X position in terms of min CU (8x8) units */ 848 UWORD8 b3_cu_pos_y : 3; 849 850 /** reserved bytes */ 851 UWORD8 b2_reserved : 2; 852 853 UWORD8 u1_cu_size; 854 855 UWORD8 u1_intra_eval_enable; 856 857 UWORD8 u1_inter_eval_enable; 858 859 /* Flag that indicates whether to evaluate this node */ 860 /* during RDOPT evaluation. This does not mean that */ 861 /* evaluation of the children need to be abandoned */ 862 UWORD8 is_node_valid; 863 864 LWORD64 i8_best_rdopt_cost; 865 866 struct cur_ctb_cu_tree_t *ps_child_node_tl; 867 868 struct cur_ctb_cu_tree_t *ps_child_node_tr; 869 870 struct cur_ctb_cu_tree_t *ps_child_node_bl; 871 872 struct cur_ctb_cu_tree_t *ps_child_node_br; 873 874 } cur_ctb_cu_tree_t; 875 876 typedef struct 877 { 878 WORD32 num_best_results; 879 880 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 881 882 } block_data_32x32_t; 883 884 /** 885 ****************************************************************************** 886 * @brief Structure for storing data about all the 64x64 887 * block in a 64x64 CTB 888 ****************************************************************************** 889 */ 890 typedef block_data_32x32_t block_data_64x64_t; 891 892 /** 893 ****************************************************************************** 894 * @brief Structure for storing data about all 16 16x16 895 * blocks in a 64x64 CTB and each of their partitions 896 ****************************************************************************** 897 */ 898 typedef struct 899 { 900 WORD32 num_best_results; 901 902 /** 903 * mask of active partitions, Totally 17 bits. For a given partition 904 * id, as per PART_ID_T enum the corresponding bit position is 1/0 905 * indicating that partition is active or inactive 906 */ 907 /*WORD32 i4_part_mask;*/ 908 909 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 910 911 } block_data_16x16_t; 912 913 typedef struct 914 { 915 WORD32 num_best_results; 916 917 part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS]; 918 } block_data_8x8_t; 919 920 /** 921 ****************************************************************************** 922 * @brief Structure for data export from ME to Enc_Loop 923 ****************************************************************************** 924 */ 925 typedef struct 926 { 927 block_data_8x8_t as_8x8_block_data[64]; 928 929 block_data_16x16_t as_block_data[16]; 930 931 block_data_32x32_t as_32x32_block_data[4]; 932 933 block_data_64x64_t s_64x64_block_data; 934 935 } me_ctb_data_t; 936 937 /** 938 ****************************************************************************** 939 * @brief noise detection related structure 940 * 941 ****************************************************************************** 942 */ 943 944 typedef struct 945 { 946 WORD32 i4_noise_present; 947 948 UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB]; 949 950 UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB]; 951 } ihevce_ctb_noise_params; 952 953 /** 954 ****************************************************************************** 955 * @brief ctb structure for mode analysis/evaluation 956 ****************************************************************************** 957 */ 958 typedef struct 959 { 960 /** 961 * CU decision in a ctb is frozen by ME/IPE and populated in 962 * u4_packed_cu_split_flags. 963 * @remarks 964 * TODO:review comment 965 * bit0 : 64x64 split flag, (depth0 flag for 64x64 ctb unused for smaller ctb) 966 * bits 1-3 : not used 967 * bits 4-7 : 32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb) 968 * bits 8-23: 16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] ) 969 970 * if a split flag of n is set for depth 1, check the following split flags 971 * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2: 972 * 973 */ 974 UWORD32 u4_cu_split_flags; 975 976 UWORD8 u1_num_cus_in_ctb; 977 978 cur_ctb_cu_tree_t *ps_cu_tree; 979 980 me_ctb_data_t *ps_me_ctb_data; 981 982 ihevce_ctb_noise_params s_ctb_noise_params; 983 984 } ctb_analyse_t; 985 /** 986 ****************************************************************************** 987 * @brief Structures for tapping ssd and bit-estimate information for all CUs 988 ****************************************************************************** 989 */ 990 991 typedef struct 992 { 993 LWORD64 i8_cost; 994 WORD32 i4_idx; 995 } cost_idx_t; 996 997 /** 998 ****************************************************************************** 999 * @brief reference/non reference pic context for encoder 1000 ****************************************************************************** 1001 */ 1002 typedef struct 1003 1004 { 1005 /** 1006 * YUV buffer discriptor for the recon 1007 * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) + 2 * PAD_HORZ)* 1008 * ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT) 1009 */ 1010 iv_enc_yuv_buf_t s_yuv_buf_desc; 1011 1012 iv_enc_yuv_buf_src_t s_yuv_buf_desc_src; 1013 1014 /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid */ 1015 /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */ 1016 /* the fullpel plane for use as reference */ 1017 UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE]; 1018 1019 /** 1020 * frm level pointer to pu bank for colocated mv access 1021 * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) * 1022 * (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE) 1023 */ 1024 pu_col_mv_t *ps_frm_col_mv; 1025 /** 1026 ************************************************************************ 1027 * Pointer to a PU map stored at frame level, 1028 * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min 1029 * granularirty of MIN_PU_SIZE size. 1030 ************************************************************************ 1031 */ 1032 UWORD8 *pu1_frm_pu_map; 1033 1034 /** CTB level frame buffer to store the accumulated sum of 1035 * number of PUs for every row */ 1036 UWORD16 *pu2_num_pu_map; 1037 1038 /** Offsets in the PU buffer at every CTB level */ 1039 UWORD32 *pu4_pu_off; 1040 1041 /** Collocated POC for reference list 0 1042 * ToDo: Change the array size when multiple slices are to be supported */ 1043 WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS]; 1044 1045 /** Collocated POC for reference list 1 */ 1046 WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS]; 1047 1048 /** 0 = top field, 1 = bottom field */ 1049 WORD32 i4_bottom_field; 1050 1051 /** top field first input in case of interlaced case */ 1052 WORD32 i4_topfield_first; 1053 1054 /** top field first input in case of interlaced case */ 1055 WORD32 i4_poc; 1056 1057 /** unique buffer id */ 1058 WORD32 i4_buf_id; 1059 1060 /** is this reference frame or not */ 1061 WORD32 i4_is_reference; 1062 1063 /** Picture type of current picture */ 1064 WORD32 i4_pic_type; 1065 1066 /** Flag to indicate whether current pictute is free or in use */ 1067 WORD32 i4_is_free; 1068 1069 /** Bit0 - of this Flag to indicate whether current pictute needs to be deblocked, 1070 padded and hpel planes need to be generated. 1071 These are turned off typically in non referecne pictures when psnr 1072 and recon dump is disabled. 1073 1074 Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled 1075 */ 1076 WORD32 i4_deblk_pad_hpel_cur_pic; 1077 1078 /** 1079 * weight and offset for this ref pic. To be initialized for every pic 1080 * based on the lap output 1081 */ 1082 ihevce_wght_offst_t s_weight_offset; 1083 1084 /** 1085 * Reciprocal of the lumaweight in q15 format 1086 */ 1087 WORD32 i4_inv_luma_wt; 1088 1089 /** 1090 * Log to base 2 of the common denominator used for luma weights across all ref pics 1091 */ 1092 WORD32 i4_log2_wt_denom; 1093 1094 /** 1095 * Used as Reference for encoding current picture flag 1096 */ 1097 WORD32 i4_used_by_cur_pic_flag; 1098 1099 #if ADAPT_COLOCATED_FROM_L0_FLAG 1100 WORD32 i4_frame_qp; 1101 #endif 1102 /* 1103 * IDR GOP number 1104 */ 1105 1106 WORD32 i4_idr_gop_num; 1107 1108 /* 1109 * non-ref-free_flag 1110 */ 1111 WORD32 i4_non_ref_free_flag; 1112 /** 1113 * Dependency manager instance for ME - Prev recon dep 1114 */ 1115 void *pv_dep_mngr_recon; 1116 1117 /*display num*/ 1118 WORD32 i4_display_num; 1119 } recon_pic_buf_t; 1120 1121 /** 1122 ****************************************************************************** 1123 * @brief Lambda values used for various cost computations 1124 ****************************************************************************** 1125 */ 1126 typedef struct 1127 { 1128 /************************************************************************/ 1129 /* The fields with the string 'type2' in their names are required */ 1130 /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */ 1131 /* to the bit_depth != internal_bit_depth are stored in these fields */ 1132 /************************************************************************/ 1133 1134 /** 1135 * Closed loop SSD Lambda 1136 * This is multiplied with bits for RD cost computations in SSD mode 1137 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1138 */ 1139 LWORD64 i8_cl_ssd_lambda_qf; 1140 1141 LWORD64 i8_cl_ssd_type2_lambda_qf; 1142 1143 /** 1144 * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp) 1145 * This is multiplied with bits for RD cost computations in SSD mode 1146 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1147 */ 1148 LWORD64 i8_cl_ssd_lambda_chroma_qf; 1149 1150 LWORD64 i8_cl_ssd_type2_lambda_chroma_qf; 1151 1152 /** 1153 * Closed loop SAD Lambda 1154 * This is multiplied with bits for RD cost computations in SAD mode 1155 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1156 */ 1157 WORD32 i4_cl_sad_lambda_qf; 1158 1159 WORD32 i4_cl_sad_type2_lambda_qf; 1160 1161 /** 1162 * Open loop SAD Lambda 1163 * This is multiplied with bits for RD cost computations in SAD mode 1164 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1165 */ 1166 WORD32 i4_ol_sad_lambda_qf; 1167 1168 WORD32 i4_ol_sad_type2_lambda_qf; 1169 1170 /** 1171 * Closed loop SATD Lambda 1172 * This is multiplied with bits for RD cost computations in SATD mode 1173 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1174 */ 1175 WORD32 i4_cl_satd_lambda_qf; 1176 1177 WORD32 i4_cl_satd_type2_lambda_qf; 1178 1179 /** 1180 * Open loop SATD Lambda 1181 * This is multiplied with bits for RD cost computations in SATD mode 1182 * This is represented in q format with shift of LAMBDA_Q_SHIFT 1183 */ 1184 WORD32 i4_ol_satd_lambda_qf; 1185 1186 WORD32 i4_ol_satd_type2_lambda_qf; 1187 1188 double lambda_modifier; 1189 1190 double lambda_uv_modifier; 1191 1192 UWORD32 u4_chroma_cost_weighing_factor; 1193 1194 } frm_lambda_ctxt_t; 1195 /** 1196 ****************************************************************************** 1197 * @brief Mode attributes for 4x4 block populated by early decision 1198 ****************************************************************************** 1199 */ 1200 typedef struct 1201 { 1202 /* If best mode is present or not */ 1203 UWORD8 mode_present; 1204 1205 /** Best mode for the current 4x4 prediction block */ 1206 UWORD8 best_mode; 1207 1208 /** sad for the best mode for the current 4x4 prediction block */ 1209 UWORD16 sad; 1210 1211 /** cost for the best mode for the current 4x4 prediction block */ 1212 UWORD16 sad_cost; 1213 1214 } ihevce_ed_mode_attr_t; //early decision 1215 1216 /** 1217 ****************************************************************************** 1218 * @brief Structure at 4x4 block level which has parameters about early 1219 * intra or inter decision 1220 ****************************************************************************** 1221 */ 1222 typedef struct 1223 { 1224 /** 1225 * Final parameter of Intra-Inter early decision for the current 4x4. 1226 * 0 - invalid decision 1227 * 1 - eval intra only 1228 * 2 - eval inter only 1229 * 3 - eval both intra and inter 1230 */ 1231 UWORD8 intra_or_inter; 1232 1233 UWORD8 merge_success; 1234 1235 /** Best mode for the current 4x4 prediction block */ 1236 UWORD8 best_mode; 1237 1238 /** Best mode for the current 4x4 prediction block */ 1239 UWORD8 best_merge_mode; 1240 1241 /** Store SATD at 4*4 level for current layer (L1) */ 1242 WORD32 i4_4x4_satd; 1243 1244 } ihevce_ed_blk_t; //early decision 1245 1246 /* l1 ipe ctb analyze structure */ 1247 /* Contains cu level qp mod related information for all possible cu 1248 sizes (16,32,64 in L0) in a CTB*/ 1249 typedef struct 1250 { 1251 WORD32 i4_sum_4x4_satd[16]; 1252 WORD32 i4_min_4x4_satd[16]; 1253 1254 /* satd for L1_8x8 blocks in L1_32x32 1255 * [16] : num L1_8x8 in L1_32x32 1256 * [2] : 0 - sum of L1_4x4 @ L1_8x8 1257 * - equivalent to transform size of 16x16 @ L0 1258 * 1 - min/median of L1_4x4 @ L1_8x8 1259 * - equivalent to transform size of 8x8 @ L0 1260 */ 1261 WORD32 i4_8x8_satd[16][2]; 1262 1263 /* satd for L1_16x16 blocks in L1_32x32 1264 * [4] : num L1_16x16 in L1_32x32 1265 * [3] : 0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 1266 * - equivalent to transform size of 32x32 @ L0 1267 * 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 1268 * - equivalent to transform size of 16x16 @ L0 1269 * 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16 1270 * - equivalent to transform size of 8x8 @ L0 1271 */ 1272 WORD32 i4_16x16_satd[4][3]; 1273 1274 /* Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */ 1275 /* satd for L1_32x32 blocks in L1_32x32 1276 * [1] : num L1_32x32 in L1_32x32 1277 * [4] : 0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32 1278 * - equivalent to transform size of 32x32 @ L0 1279 * 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32 1280 * - equivalent to transform size of 16x16 @ L0 1281 * 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32 1282 * - equivalent to transform size of 8x8 @ L0 1283 * 3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32 1284 */ 1285 WORD32 i4_32x32_satd[1][4]; 1286 1287 /*Store SATD at 8x8 level for current layer (L1)*/ 1288 WORD32 i4_best_satd_8x8[16]; 1289 1290 /* EIID: This will be used for early inter intra decisions */ 1291 /*SAD at 8x8 level for current layer (l1) */ 1292 /*Cost based on sad at 8x8 level for current layer (l1) */ 1293 WORD32 i4_best_sad_cost_8x8_l1_ipe[16]; 1294 1295 WORD32 i4_best_sad_8x8_l1_ipe[16]; 1296 /* SAD at 8x8 level for ME. All other cost are IPE cost */ 1297 WORD32 i4_best_sad_cost_8x8_l1_me[16]; 1298 1299 /* SAD at 8x8 level for ME. for given reference */ 1300 WORD32 i4_sad_cost_me_for_ref[16]; 1301 1302 /* SAD at 8x8 level for ME. for given reference */ 1303 WORD32 i4_sad_me_for_ref[16]; 1304 1305 /* SAD at 8x8 level for ME. All other cost are IPE cost */ 1306 WORD32 i4_best_sad_8x8_l1_me[16]; 1307 1308 WORD32 i4_best_sad_8x8_l1_me_for_decide[16]; 1309 1310 /*Mean @ L0 16x16*/ 1311 WORD32 ai4_16x16_mean[16]; 1312 1313 /*Mean @ L0 32x32*/ 1314 WORD32 ai4_32x32_mean[4]; 1315 1316 /*Mean @ L0 64x64*/ 1317 WORD32 i4_64x64_mean; 1318 1319 } ihevce_ed_ctb_l1_t; //early decision 1320 1321 /** 1322 ****************************************************************************** 1323 * @brief 8x8 Intra analyze structure 1324 ****************************************************************************** 1325 */ 1326 typedef struct 1327 { 1328 /** Best intra modes for 8x8 transform. 1329 * Insert 255 in the end to limit number of modes 1330 */ 1331 UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1332 1333 /** Best 8x8 intra modes for 4x4 transform 1334 * Insert 255 in the end to limit number of modes 1335 */ 1336 UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1337 1338 /** Best 4x4 intra modes 1339 * Insert 255 in the end to limit number of modes 1340 */ 1341 UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1]; 1342 1343 /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */ 1344 UWORD8 b1_enable_nxn : 1; 1345 1346 /** valid cu flag : required for incomplete ctbs at frame boundaries */ 1347 UWORD8 b1_valid_cu : 1; 1348 1349 /** dummy bits */ 1350 UWORD8 b6_reserved : 6; 1351 1352 } intra8_analyse_t; 1353 1354 /** 1355 ****************************************************************************** 1356 * @brief 16x16 Intra analyze structure 1357 ****************************************************************************** 1358 */ 1359 typedef struct 1360 { 1361 /** Best intra modes for 16x16 transform. 1362 * Insert 255 in the end to limit number of modes 1363 */ 1364 UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1365 1366 /** Best 16x16 intra modes for 8x8 transform 1367 * Insert 255 in the end to limit number of modes 1368 */ 1369 UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1370 1371 /** 8x8 children intra analyze for this 16x16 */ 1372 intra8_analyse_t as_intra8_analyse[4]; 1373 1374 /* indicates if 16x16 is best cu or 8x8 cu */ 1375 UWORD8 b1_split_flag : 1; 1376 1377 /* indicates if 8x8 vs 16x16 rdo evaluation needed */ 1378 /* or only 8x8's rdo evaluation needed */ 1379 UWORD8 b1_merge_flag : 1; 1380 1381 /** 1382 * valid cu flag : required for incomplete ctbs at frame boundaries 1383 * or if CTB size is lower than 32 1384 */ 1385 UWORD8 b1_valid_cu : 1; 1386 1387 /** dummy bits */ 1388 UWORD8 b6_reserved : 5; 1389 1390 } intra16_analyse_t; 1391 1392 /** 1393 ****************************************************************************** 1394 * @brief 32x32 Intra analyze structure 1395 ****************************************************************************** 1396 */ 1397 typedef struct 1398 { 1399 /** Best intra modes for 32x32 transform. 1400 * Insert 255 in the end to limit number of modes 1401 */ 1402 UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1403 1404 /** Best 32x32 intra modes for 16x16 transform 1405 * Insert 255 in the end to limit number of modes 1406 */ 1407 UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1408 1409 /** 16x16 children intra analyze for this 32x32 */ 1410 intra16_analyse_t as_intra16_analyse[4]; 1411 1412 /* indicates if 32x32 is best cu or 16x16 cu */ 1413 UWORD8 b1_split_flag : 1; 1414 1415 /* indicates if 32x32 vs 16x16 rdo evaluation needed */ 1416 /* or 16x16 vs 8x8 evaluation is needed */ 1417 UWORD8 b1_merge_flag : 1; 1418 1419 /** 1420 * valid cu flag : required for incomplete ctbs at frame boundaries 1421 * or if CTB size is lower than 64 1422 */ 1423 UWORD8 b1_valid_cu : 1; 1424 1425 /** dummy bits */ 1426 UWORD8 b6_reserved : 5; 1427 1428 } intra32_analyse_t; 1429 1430 /** 1431 ****************************************************************************** 1432 * @brief IPE L0 analyze structure for L0 ME to do intra/inter CU decisions 1433 * This is a CTB level structure encapsulating IPE modes, cost at all 1434 * level. IPE also recommemds max intra CU sizes which is required 1435 * by ME for CU size determination in intra dominant CTB 1436 ****************************************************************************** 1437 */ 1438 typedef struct 1439 { 1440 /** Best 64x64 intra modes for 32x32 transform. 1441 * Insert 255 in the end to limit number of modes 1442 */ 1443 UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1]; 1444 1445 /** 32x32 children intra analyze for this 32x32 */ 1446 intra32_analyse_t as_intra32_analyse[4]; 1447 1448 /* indicates if 64x64 is best CUs or 32x32 CUs */ 1449 UWORD8 u1_split_flag; 1450 1451 /* CTB level best 8x8 intra costs */ 1452 WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB]; 1453 1454 /* CTB level best 16x16 intra costs */ 1455 WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2]; 1456 1457 /* CTB level best 32x32 intra costs */ 1458 WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4]; 1459 1460 /* best 64x64 intra cost */ 1461 WORD32 i4_best64x64_intra_cost; 1462 1463 /* 1464 @L0 level 1465 4 => 0 - 32x32 TU in 64x64 CU 1466 1 - 16x16 TU in 64x64 CU 1467 2 - 8x8 TU in 64x64 CU 1468 3 - 64x64 CU 1469 2 => Intra/Inter */ 1470 WORD32 i4_64x64_act_factor[4][2]; 1471 1472 /* 1473 @L0 level 1474 4 => num 32x32 in CTB 1475 3 => 0 - 32x32 TU in 64x64 CU 1476 1 - 16x16 TU in 64x64 CU 1477 2 - 8x8 TU in 64x64 CU 1478 2 => Intra/Inter */ 1479 WORD32 i4_32x32_act_factor[4][3][2]; 1480 1481 /* 1482 @L0 level 1483 16 => num 16x16 in CTB 1484 2 => 0 - 16x16 TU in 64x64 CU 1485 1 - 8x8 TU in 64x64 CU 1486 2 => Intra/Inter */ 1487 WORD32 i4_16x16_act_factor[16][2][2]; 1488 1489 WORD32 nodes_created_in_cu_tree; 1490 1491 cur_ctb_cu_tree_t *ps_cu_tree_root; 1492 1493 WORD32 ai4_8x8_act_factor[16]; 1494 WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB]; 1495 WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB]; 1496 WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB]; 1497 WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB]; 1498 1499 /*Ctb level accumalated satd*/ 1500 WORD32 i4_ctb_acc_satd; 1501 1502 /*Ctb level accumalated mpm bits*/ 1503 WORD32 i4_ctb_acc_mpm_bits; 1504 1505 } ipe_l0_ctb_analyse_for_me_t; 1506 1507 typedef struct 1508 { 1509 WORD16 i2_mv_x; 1510 WORD16 i2_mv_y; 1511 } global_mv_t; 1512 1513 /** 1514 ****************************************************************************** 1515 * @brief Pre Encode pass and ME pass shared variables and buffers 1516 ****************************************************************************** 1517 */ 1518 typedef struct 1519 { 1520 /** 1521 * Buffer id 1522 */ 1523 WORD32 i4_buf_id; 1524 1525 /** 1526 * Flag will be set to 1 by frame processing thread after receiving flush 1527 * command from application 1528 */ 1529 WORD32 i4_end_flag; 1530 1531 /** frame leve ctb analyse buffer pointer */ 1532 ctb_analyse_t *ps_ctb_analyse; 1533 1534 /** frame level cu analyse buffer pointer for IPE */ 1535 //cu_analyse_t *ps_cu_analyse; 1536 1537 /** current input pointer */ 1538 ihevce_lap_enc_buf_t *ps_curr_inp; 1539 1540 /** current inp buffer id */ 1541 WORD32 curr_inp_buf_id; 1542 1543 /** Slice header parameters */ 1544 slice_header_t s_slice_hdr; 1545 1546 /** sps parameters activated by current slice */ 1547 sps_t *ps_sps; 1548 1549 /** pps parameters activated by current slice */ 1550 pps_t *ps_pps; 1551 1552 /** vps parameters activated by current slice */ 1553 vps_t *ps_vps; 1554 /** Pointer to Penultilate Layer context memory internally has MV bank buff and related params */ 1555 void *pv_me_lyr_ctxt; 1556 1557 /** Pointer to Penultilate Layer NV bank context memory */ 1558 void *pv_me_lyr_bnk_ctxt; 1559 1560 /** Pointer to Penultilate Layer MV bank buff */ 1561 void *pv_me_mv_bank; 1562 1563 /** Pointer to Penultilate Layer reference idx buffer */ 1564 void *pv_me_ref_idx; 1565 /** 1566 * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost) 1567 * The order of storing is raster scan order within CTB and 1568 * CTB order is raster scan within frame. 1569 */ 1570 double *plf_intra_8x8_cost; 1571 1572 /** 1573 * L0 layer ctb anaylse frame level buffer. 1574 * IPE wil populate the cost and best modes at all levels in this buffer 1575 * for every CTB in a frame 1576 */ 1577 // moved to shorter buffer queue 1578 //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb; 1579 1580 /** Layer L1 buffer pointer */ 1581 ihevce_ed_blk_t *ps_layer1_buf; 1582 1583 /** Layer L2 buffer pointer */ 1584 ihevce_ed_blk_t *ps_layer2_buf; 1585 1586 /*ME reverse map info*/ 1587 UWORD8 *pu1_me_reverse_map_info; 1588 1589 /** Buffer pointer for CTB level information in pre intra pass*/ 1590 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1; 1591 1592 #ifndef DISABLE_SEI 1593 /** vps parameters activated by current slice */ 1594 sei_params_t s_sei; 1595 #endif 1596 1597 /** nal_type for the slice to be encoded */ 1598 WORD32 i4_slice_nal_type; 1599 1600 /** input time stamp in terms of ticks: lower 32 */ 1601 WORD32 i4_inp_timestamp_low; 1602 1603 /** input time stamp in terms of ticks: higher 32 */ 1604 WORD32 i4_inp_timestamp_high; 1605 1606 /** input frame ctxt of app to be retured in output buffer */ 1607 void *pv_app_frm_ctxt; 1608 1609 /** current frm valid flag : 1610 * will be 1 if valid input was processed by frame proc thrd 1611 */ 1612 WORD32 i4_frm_proc_valid_flag; 1613 1614 /** 1615 * Qp to be used for current frame 1616 */ 1617 WORD32 i4_curr_frm_qp; 1618 1619 /** 1620 * Frame level Lambda parameters 1621 */ 1622 frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES]; 1623 1624 /** Frame-levelSATDcost accumalator */ 1625 LWORD64 i8_frame_acc_satd_cost; 1626 1627 /** Frame - L1 coarse me cost accumulated */ 1628 LWORD64 i8_acc_frame_coarse_me_cost; 1629 /** Frame - L1 coarse me cost accumulated */ 1630 //LWORD64 i8_acc_frame_coarse_me_cost_for_ref; 1631 1632 /** Frame - L1 coarse me sad accumulated */ 1633 LWORD64 i8_acc_frame_coarse_me_sad; 1634 1635 /* Averge activity of 4x4 blocks from previous frame 1636 * If L1, maps to 8*8 in L0 1637 */ 1638 WORD32 i4_curr_frame_4x4_avg_act; 1639 1640 WORD32 ai4_mod_factor_derived_by_variance[2]; 1641 1642 float f_strength; 1643 1644 /* Averge activity of 8x8 blocks from previous frame 1645 * If L1, maps to 16*16 in L0 1646 */ 1647 1648 long double ld_curr_frame_8x8_log_avg[2]; 1649 1650 LWORD64 i8_curr_frame_8x8_avg_act[2]; 1651 1652 LWORD64 i8_curr_frame_8x8_sum_act[2]; 1653 1654 WORD32 i4_curr_frame_8x8_sum_act_for_strength[2]; 1655 1656 ULWORD64 u8_curr_frame_8x8_sum_act_sqr; 1657 1658 WORD32 i4_curr_frame_8x8_num_blks[2]; 1659 1660 LWORD64 i8_acc_frame_8x8_sum_act[2]; 1661 LWORD64 i8_acc_frame_8x8_sum_act_sqr; 1662 WORD32 i4_acc_frame_8x8_num_blks[2]; 1663 LWORD64 i8_acc_frame_8x8_sum_act_for_strength; 1664 LWORD64 i8_curr_frame_8x8_sum_act_for_strength; 1665 1666 /* Averge activity of 16x16 blocks from previous frame 1667 * If L1, maps to 32*32 in L0 1668 */ 1669 1670 long double ld_curr_frame_16x16_log_avg[3]; 1671 1672 LWORD64 i8_curr_frame_16x16_avg_act[3]; 1673 1674 LWORD64 i8_curr_frame_16x16_sum_act[3]; 1675 1676 WORD32 i4_curr_frame_16x16_num_blks[3]; 1677 1678 LWORD64 i8_acc_frame_16x16_sum_act[3]; 1679 WORD32 i4_acc_frame_16x16_num_blks[3]; 1680 1681 /* Averge activity of 32x32 blocks from previous frame 1682 * If L1, maps to 64*64 in L0 1683 */ 1684 1685 long double ld_curr_frame_32x32_log_avg[3]; 1686 1687 LWORD64 i8_curr_frame_32x32_avg_act[3]; 1688 1689 global_mv_t s_global_mv[MAX_NUM_REF]; 1690 LWORD64 i8_curr_frame_32x32_sum_act[3]; 1691 1692 WORD32 i4_curr_frame_32x32_num_blks[3]; 1693 1694 LWORD64 i8_acc_frame_32x32_sum_act[3]; 1695 WORD32 i4_acc_frame_32x32_num_blks[3]; 1696 1697 LWORD64 i8_acc_num_blks_high_sad; 1698 1699 LWORD64 i8_total_blks; 1700 1701 WORD32 i4_complexity_percentage; 1702 1703 WORD32 i4_is_high_complex_region; 1704 1705 WORD32 i4_avg_noise_thrshld_4x4; 1706 1707 LWORD64 i8_curr_frame_mean_sum; 1708 WORD32 i4_curr_frame_mean_num_blks; 1709 LWORD64 i8_curr_frame_avg_mean_act; 1710 1711 } pre_enc_me_ctxt_t; 1712 1713 /** 1714 ****************************************************************************** 1715 * @brief buffers from L0 IPE to ME and enc loop 1716 ****************************************************************************** 1717 */ 1718 typedef struct 1719 { 1720 WORD32 i4_size; 1721 1722 ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb; 1723 } pre_enc_L0_ipe_encloop_ctxt_t; 1724 /** 1725 ****************************************************************************** 1726 * @brief Frame process and Entropy coding pass shared variables and buffers 1727 ****************************************************************************** 1728 */ 1729 1730 typedef struct 1731 { 1732 /*PIC level Info*/ 1733 ULWORD64 i8_total_cu; 1734 ULWORD64 i8_total_cu_min_8x8; 1735 ULWORD64 i8_total_pu; 1736 ULWORD64 i8_total_intra_cu; 1737 ULWORD64 i8_total_inter_cu; 1738 ULWORD64 i8_total_skip_cu; 1739 ULWORD64 i8_total_cu_based_on_size[4]; 1740 1741 ULWORD64 i8_total_intra_pu; 1742 ULWORD64 i8_total_merge_pu; 1743 ULWORD64 i8_total_non_skipped_inter_pu; 1744 1745 ULWORD64 i8_total_2nx2n_intra_pu[4]; 1746 ULWORD64 i8_total_nxn_intra_pu; 1747 ULWORD64 i8_total_2nx2n_inter_pu[4]; 1748 ULWORD64 i8_total_smp_inter_pu[4]; 1749 ULWORD64 i8_total_amp_inter_pu[3]; 1750 ULWORD64 i8_total_nxn_inter_pu[3]; 1751 1752 ULWORD64 i8_total_L0_mode; 1753 ULWORD64 i8_total_L1_mode; 1754 ULWORD64 i8_total_BI_mode; 1755 1756 ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE]; 1757 ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE]; 1758 1759 ULWORD64 i8_total_tu; 1760 ULWORD64 i8_total_non_coded_tu; 1761 ULWORD64 i8_total_inter_coded_tu; 1762 ULWORD64 i8_total_intra_coded_tu; 1763 1764 ULWORD64 i8_total_tu_based_on_size[4]; 1765 ULWORD64 i8_total_tu_cu64[4]; 1766 ULWORD64 i8_total_tu_cu32[4]; 1767 ULWORD64 i8_total_tu_cu16[3]; 1768 ULWORD64 i8_total_tu_cu8[2]; 1769 1770 LWORD64 i8_total_qp; 1771 LWORD64 i8_total_qp_min_cu; 1772 WORD32 i4_min_qp; 1773 WORD32 i4_max_qp; 1774 LWORD64 i8_sum_squared_frame_qp; 1775 LWORD64 i8_total_frame_qp; 1776 WORD32 i4_max_frame_qp; 1777 float f_total_buffer_underflow; 1778 float f_total_buffer_overflow; 1779 float f_max_buffer_underflow; 1780 float f_max_buffer_overflow; 1781 1782 UWORD8 i1_num_ref_idx_l0_active; 1783 UWORD8 i1_num_ref_idx_l1_active; 1784 1785 WORD32 i4_ref_poc_l0[MAX_DPB_SIZE]; 1786 WORD32 i4_ref_poc_l1[MAX_DPB_SIZE]; 1787 1788 WORD8 i1_list_entry_l0[MAX_DPB_SIZE]; 1789 DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE]; 1790 WORD16 i2_luma_offset_l0[MAX_DPB_SIZE]; 1791 WORD8 i1_list_entry_l1[MAX_DPB_SIZE]; 1792 DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE]; 1793 WORD16 i2_luma_offset_l1[MAX_DPB_SIZE]; 1794 1795 ULWORD64 u8_bits_estimated_intra; 1796 ULWORD64 u8_bits_estimated_inter; 1797 ULWORD64 u8_bits_estimated_slice_header; 1798 ULWORD64 u8_bits_estimated_sao; 1799 ULWORD64 u8_bits_estimated_split_cu_flag; 1800 ULWORD64 u8_bits_estimated_cu_hdr_bits; 1801 ULWORD64 u8_bits_estimated_split_tu_flag; 1802 ULWORD64 u8_bits_estimated_qp_delta_bits; 1803 ULWORD64 u8_bits_estimated_cbf_luma_bits; 1804 ULWORD64 u8_bits_estimated_cbf_chroma_bits; 1805 1806 ULWORD64 u8_bits_estimated_res_luma_bits; 1807 ULWORD64 u8_bits_estimated_res_chroma_bits; 1808 1809 ULWORD64 u8_bits_estimated_ref_id; 1810 ULWORD64 u8_bits_estimated_mvd; 1811 ULWORD64 u8_bits_estimated_merge_flag; 1812 ULWORD64 u8_bits_estimated_mpm_luma; 1813 ULWORD64 u8_bits_estimated_mpm_chroma; 1814 1815 ULWORD64 u8_total_bits_generated; 1816 ULWORD64 u8_total_bits_vbv; 1817 1818 ULWORD64 u8_total_I_bits_generated; 1819 ULWORD64 u8_total_P_bits_generated; 1820 ULWORD64 u8_total_B_bits_generated; 1821 1822 UWORD32 u4_frame_sad; 1823 UWORD32 u4_frame_intra_sad; 1824 UWORD32 u4_frame_inter_sad; 1825 1826 ULWORD64 i8_frame_cost; 1827 ULWORD64 i8_frame_intra_cost; 1828 ULWORD64 i8_frame_inter_cost; 1829 } s_pic_level_acc_info_t; 1830 1831 #ifndef DISABLE_SEI 1832 typedef struct 1833 { 1834 UWORD32 u4_target_bit_rate_sei_entropy; 1835 UWORD32 u4_buffer_size_sei_entropy; 1836 UWORD32 u4_dbf_entropy; 1837 1838 } s_pic_level_sei_info_t; 1839 #endif 1840 /** 1841 ****************************************************************************** 1842 * @brief ME pass and Main enocde pass shared variables and buffers 1843 ****************************************************************************** 1844 */ 1845 typedef struct 1846 { 1847 /** 1848 * Buffer id 1849 */ 1850 WORD32 i4_buf_id; 1851 1852 /** 1853 * Flag will be set to 1 by frame processing thread after receiving flush 1854 * command from application 1855 */ 1856 WORD32 i4_end_flag; 1857 1858 /** current input pointer */ 1859 ihevce_lap_enc_buf_t *ps_curr_inp; 1860 1861 /** current inp buffer id */ 1862 WORD32 curr_inp_buf_id; 1863 1864 /** current input buffers from ME */ 1865 pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms; 1866 1867 /** current inp buffer id from ME */ 1868 WORD32 curr_inp_from_me_buf_id; 1869 1870 /** current input buffers from L0 IPE */ 1871 pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms; 1872 1873 /** current inp buffer id from L0 IPE */ 1874 WORD32 curr_inp_from_l0_ipe_buf_id; 1875 1876 /** Slice header parameters */ 1877 slice_header_t s_slice_hdr; 1878 1879 /** current frm valid flag : 1880 * will be 1 if valid input was processed by frame proc thrd 1881 */ 1882 WORD32 i4_frm_proc_valid_flag; 1883 1884 /** 1885 * Array of reference picture list for ping instance 1886 * 2=> ref_pic_list0 and ref_pic_list1 1887 */ 1888 recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2]; 1889 1890 /** 1891 * Array of reference picture list 1892 * 2=> ref_pic_list0 and ref_pic_list1 1893 */ 1894 recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2]; 1895 1896 /** Job Queue Memory encode */ 1897 job_queue_t *ps_job_q_enc; 1898 1899 /** Array of Job Queue handles of enc group for ping and pong instance*/ 1900 job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES]; 1901 1902 /** Array of Job Queue handles of enc group for re-encode*/ 1903 job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES]; 1904 /** frame level me_ctb_data_t buffer pointer 1905 */ 1906 me_ctb_data_t *ps_cur_ctb_me_data; 1907 1908 /** frame level cur_ctb_cu_tree_t buffer pointer for ME 1909 */ 1910 cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree; 1911 1912 /** Pointer to Dep. Mngr for CTBs processed in every row of a frame. 1913 * ME is producer, EncLoop is the consumer 1914 */ 1915 void *pv_dep_mngr_encloop_dep_me; 1916 1917 } me_enc_rdopt_ctxt_t; 1918 1919 #ifndef DISABLE_SEI 1920 typedef struct 1921 { 1922 UWORD32 u4_payload_type; 1923 UWORD32 u4_payload_length; 1924 UWORD8 *pu1_sei_payload; 1925 } sei_payload_t; 1926 #endif 1927 1928 typedef struct 1929 { 1930 /** 1931 * Flag will be set to 1 by frame processing thread after receiving flush 1932 * command from application 1933 */ 1934 WORD32 i4_end_flag; 1935 1936 /** frame level ctb allocation for ctb after aligning to max cu size */ 1937 ctb_enc_loop_out_t *ps_frm_ctb_data; 1938 1939 /** frame level cu allocation for ctb after aligning to max cu size */ 1940 cu_enc_loop_out_t *ps_frm_cu_data; 1941 1942 /** frame level tu allocation for ctb after aligning to max cu size */ 1943 tu_enc_loop_out_t *ps_frm_tu_data; 1944 1945 /** frame level pu allocation for ctb after aligning to max cu size */ 1946 pu_t *ps_frm_pu_data; 1947 1948 /** frame level coeff allocation for ctb after aligning to max cu size */ 1949 void *pv_coeff_data; 1950 1951 /** Slice header parameters */ 1952 slice_header_t s_slice_hdr; 1953 1954 /** sps parameters activated by current slice */ 1955 sps_t *ps_sps; 1956 1957 /** pps parameters activated by current slice */ 1958 pps_t *ps_pps; 1959 1960 /** vps parameters activated by current slice */ 1961 vps_t *ps_vps; 1962 1963 #ifndef DISABLE_SEI 1964 /** vps parameters activated by current slice */ 1965 sei_params_t s_sei; 1966 #endif 1967 1968 /* Flag to indicate if AUD NAL is present */ 1969 WORD8 i1_aud_present_flag; 1970 1971 /* Flag to indicate if EOS NAL is present */ 1972 WORD8 i1_eos_present_flag; 1973 1974 /** nal_type for the slice to be encoded */ 1975 WORD32 i4_slice_nal_type; 1976 1977 /** input time stamp in terms of ticks: lower 32 */ 1978 WORD32 i4_inp_timestamp_low; 1979 1980 /** input time stamp in terms of ticks: higher 32 */ 1981 WORD32 i4_inp_timestamp_high; 1982 1983 /** input frame ctxt of app to be retured in output buffer */ 1984 void *pv_app_frm_ctxt; 1985 1986 /** current frm valid flag : 1987 * will be 1 if valid input was processed by frame proc thrd 1988 */ 1989 WORD32 i4_frm_proc_valid_flag; 1990 1991 /** To support entropy sync the bitstream offset of each CTB row 1992 * is populated in this array any put in slice header in the end 1993 */ 1994 WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM]; 1995 1996 /** RDopt estimation of bytes generated based on which rc update happens 1997 * 1998 */ 1999 WORD32 i4_rdopt_bits_generated_estimate; 2000 2001 /* These params are passed from enc-threads to entropy thread for 2002 passing params needed for PSNR caclulation and encoding 2003 summary prints */ 2004 DOUBLE lf_luma_mse; 2005 DOUBLE lf_cb_mse; 2006 DOUBLE lf_cr_mse; 2007 2008 DOUBLE lf_luma_ssim; 2009 DOUBLE lf_cb_ssim; 2010 DOUBLE lf_cr_ssim; 2011 2012 WORD32 i4_qp; 2013 WORD32 i4_poc; 2014 WORD32 i4_display_num; 2015 WORD32 i4_pic_type; 2016 2017 /** I-only SCD */ 2018 WORD32 i4_is_I_scenecut; 2019 2020 WORD32 i4_is_non_I_scenecut; 2021 WORD32 i4_sub_pic_level_rc; 2022 2023 WORD32 ai4_frame_bits_estimated; 2024 s_pic_level_acc_info_t s_pic_level_info; 2025 2026 LWORD64 i8_buf_level_bitrate_change; 2027 2028 WORD32 i4_is_end_of_idr_gop; 2029 2030 #ifndef DISABLE_SEI 2031 sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD]; 2032 2033 UWORD32 u4_num_sei_payload; 2034 #endif 2035 /* Flag used only in mres single output case to flush out one res and start with next */ 2036 WORD32 i4_out_flush_flag; 2037 2038 } frm_proc_ent_cod_ctxt_t; 2039 2040 /** 2041 ****************************************************************************** 2042 * @brief ME pass and Main enocde pass shared variables and buffers 2043 ****************************************************************************** 2044 */ 2045 typedef struct 2046 { 2047 /*BitRate ID*/ 2048 WORD32 i4_br_id; 2049 2050 /*Frame ID*/ 2051 WORD32 i4_frm_id; 2052 2053 /*Number of CTB, after ich data is populated*/ 2054 WORD32 i4_ctb_count_in_data; 2055 2056 /*Number of CTB, after ich scale is computed*/ 2057 WORD32 i4_ctb_count_out_scale; 2058 2059 /*Bits estimated for the frame */ 2060 /* For NON-I SCD max buf bits*/ 2061 LWORD64 i8_frame_bits_estimated; 2062 2063 /* Bits consumed till the nctb*/ 2064 LWORD64 i8_nctb_bits_consumed; 2065 2066 /* Bits consumed till the nctb*/ 2067 LWORD64 i8_acc_bits_consumed; 2068 2069 /*Frame level Best of Ipe and ME sad*/ 2070 LWORD64 i8_frame_l1_me_sad; 2071 2072 /*SAD accumalted till NCTB*/ 2073 LWORD64 i8_nctb_l1_me_sad; 2074 2075 /*Frame level IPE sad*/ 2076 LWORD64 i8_frame_l1_ipe_sad; 2077 2078 /*SAD accumalted till NCTB*/ 2079 LWORD64 i8_nctb_l1_ipe_sad; 2080 2081 /*Frame level L0 IPE satd*/ 2082 LWORD64 i8_frame_l0_ipe_satd; 2083 2084 /*L0 SATD accumalted till NCTB*/ 2085 LWORD64 i8_nctb_l0_ipe_satd; 2086 2087 /*Frame level Activity factor acc at 8x8 level */ 2088 LWORD64 i8_frame_l1_activity_fact; 2089 2090 /*NCTB Activity factor acc at 8x8 level */ 2091 LWORD64 i8_nctb_l1_activity_fact; 2092 2093 /*L0 MPM bits accumalted till NCTB*/ 2094 LWORD64 i8_nctb_l0_mpm_bits; 2095 2096 /*Encoder hdr accumalted till NCTB*/ 2097 LWORD64 i8_nctb_hdr_bits_consumed; 2098 2099 } ihevce_sub_pic_rc_ctxt_t; 2100 2101 /** 2102 ****************************************************************************** 2103 * @brief Memoery manager context (stores the memory tables allcoated) 2104 ****************************************************************************** 2105 */ 2106 typedef struct 2107 { 2108 /** 2109 * Total number of memtabs (Modules and system) 2110 * during create time 2111 */ 2112 WORD32 i4_num_create_memtabs; 2113 2114 /** 2115 * Pointer to the mem tabs 2116 * of crate time 2117 */ 2118 iv_mem_rec_t *ps_create_memtab; 2119 2120 /** 2121 * Total number of memtabs Data and control Ques 2122 * during Ques create time 2123 */ 2124 WORD32 i4_num_q_memtabs; 2125 2126 /** 2127 * Pointer to the mem tabs 2128 * of crate time 2129 */ 2130 iv_mem_rec_t *ps_q_memtab; 2131 2132 } enc_mem_mngr_ctxt; 2133 2134 /** 2135 ****************************************************************************** 2136 * @brief Encoder Interafce Queues Context 2137 ****************************************************************************** 2138 */ 2139 typedef struct 2140 { 2141 /** Number of Queues at interface context level */ 2142 WORD32 i4_num_queues; 2143 2144 /** Array of Queues handle */ 2145 void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES]; 2146 2147 /** Mutex for encuring thread safety of the access of the queues */ 2148 void *pv_q_mutex_hdl; 2149 2150 } enc_q_ctxt_t; 2151 2152 /** 2153 ****************************************************************************** 2154 * @brief Module context of different modules in encoder 2155 ****************************************************************************** 2156 */ 2157 2158 typedef struct 2159 { 2160 /** Motion estimation context pointer */ 2161 void *pv_me_ctxt; 2162 /** Coarse Motion estimation context pointer */ 2163 void *pv_coarse_me_ctxt; 2164 2165 /** Intra Prediction context pointer */ 2166 void *pv_ipe_ctxt; 2167 2168 /** Encode Loop context pointer */ 2169 void *pv_enc_loop_ctxt; 2170 2171 /** Entropy Coding context pointer */ 2172 void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES]; 2173 2174 /** Look Ahead Processing context pointer */ 2175 void *pv_lap_ctxt; 2176 /** Rate control context pointer */ 2177 void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES]; 2178 /** Decomposition pre intra context pointer */ 2179 void *pv_decomp_pre_intra_ctxt; 2180 2181 } module_ctxt_t; 2182 2183 /** 2184 ****************************************************************************** 2185 * @brief Threads semaphore handles 2186 ****************************************************************************** 2187 */ 2188 typedef struct 2189 { 2190 /** LAP semaphore handle */ 2191 void *pv_lap_sem_handle; 2192 2193 /** Encode frame Process semaphore handle */ 2194 void *pv_enc_frm_proc_sem_handle; 2195 2196 /** Pre Encode frame Process semaphore handle */ 2197 void *pv_pre_enc_frm_proc_sem_handle; 2198 2199 /** Entropy coding semaphore handle 2200 One semaphore for each entropy thread, i.e. for each bit-rate instance*/ 2201 void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2202 2203 /** 2204 * Semaphore handle corresponding to get free inp frame buff 2205 * function call from app if called in blocking mode 2206 */ 2207 void *pv_inp_data_sem_handle; 2208 2209 /** 2210 * Semaphore handle corresponding to get free inp control command buff 2211 * function call from app if called in blocking mode 2212 */ 2213 void *pv_inp_ctrl_sem_handle; 2214 2215 /** 2216 * Semaphore handle corresponding to get filled out bitstream buff 2217 * function call from app if called in blocking mode 2218 */ 2219 void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2220 2221 /** 2222 * Semaphore handle corresponding to get filled out recon buff 2223 * function call from app if called in blocking mode 2224 */ 2225 void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES]; 2226 2227 /** 2228 * Semaphore handle corresponding to get filled out control status buff 2229 * function call from app if called in blocking mode 2230 */ 2231 void *pv_out_ctrl_sem_handle; 2232 2233 /** 2234 * Semaphore handle corresponding to get filled out control status buff 2235 * function call from app if called in blocking mode 2236 */ 2237 void *pv_lap_inp_data_sem_hdl; 2238 2239 /** 2240 * Semaphore handle corresponding to get filled out control status buff 2241 * function call from app if called in blocking mode 2242 */ 2243 void *pv_preenc_inp_data_sem_hdl; 2244 2245 /** 2246 * Semaphore handle corresponding to Multi Res Single output case 2247 */ 2248 void *pv_ent_common_mres_sem_hdl; 2249 void *pv_out_common_mres_sem_hdl; 2250 2251 } thrd_que_sem_hdl_t; 2252 2253 /** 2254 ****************************************************************************** 2255 * @brief Frame level structure which has parameters about CTBs 2256 ****************************************************************************** 2257 */ 2258 typedef struct 2259 { 2260 /** CTB size of all CTB in a frame in pixels 2261 * this will be create time value, 2262 * run time change in this value is not supported 2263 */ 2264 WORD32 i4_ctb_size; 2265 2266 /** Minimum CU size of CTB in a frame in pixels 2267 * this will be create time value, 2268 * run time change in this value is not supported 2269 */ 2270 WORD32 i4_min_cu_size; 2271 2272 /** Worst case num CUs in CTB based on i4_ctb_size */ 2273 WORD32 i4_num_cus_in_ctb; 2274 2275 /** Worst case num PUs in CTB based on i4_ctb_size */ 2276 WORD32 i4_num_pus_in_ctb; 2277 2278 /** Worst case num TUs in CTB based on i4_ctb_size */ 2279 WORD32 i4_num_tus_in_ctb; 2280 2281 /** Number of CTBs in horizontal direction 2282 * this is based on run time source width and i4_ctb_size 2283 */ 2284 WORD32 i4_num_ctbs_horz; 2285 2286 /** Number of CTBs in vertical direction 2287 * this is based on run time source height and i4_ctb_size 2288 */ 2289 WORD32 i4_num_ctbs_vert; 2290 2291 /** MAX CUs in horizontal direction 2292 * this is based on run time source width, i4_ctb_size and i4_num_cus_in_ctb 2293 */ 2294 WORD32 i4_max_cus_in_row; 2295 2296 /** MAX PUs in horizontal direction 2297 * this is based on run time source width, i4_ctb_size and i4_num_pus_in_ctb 2298 */ 2299 WORD32 i4_max_pus_in_row; 2300 2301 /** MAX TUs in horizontal direction 2302 * this is based on run time source width, i4_ctb_size and i4_num_tus_in_ctb 2303 */ 2304 WORD32 i4_max_tus_in_row; 2305 2306 /** 2307 * CU aligned picture width (currently aligned to MAX CU size) 2308 * should be modified to be aligned to MIN CU size 2309 */ 2310 2311 WORD32 i4_cu_aligned_pic_wd; 2312 2313 /** 2314 * CU aligned picture height (currently aligned to MAX CU size) 2315 * should be modified to be aligned to MIN CU size 2316 */ 2317 2318 WORD32 i4_cu_aligned_pic_ht; 2319 2320 /* Pointer to a frame level memory, 2321 Stride is = 1 + (num ctbs in a ctb-row) + 1 2322 Hieght is = 1 + (num ctbs in a ctb-col) 2323 Contains tile-id of each ctb */ 2324 WORD32 *pi4_tile_id_map; 2325 2326 /* stride in units of ctb */ 2327 WORD32 i4_tile_id_ctb_map_stride; 2328 2329 } frm_ctb_ctxt_t; 2330 2331 /** 2332 ****************************************************************************** 2333 * @brief ME Job Queue desc 2334 ****************************************************************************** 2335 */ 2336 typedef struct 2337 { 2338 /** Number of output dependencies which need to be set after 2339 * current job is complete, 2340 * should be less than or equal to MAX_OUT_DEP defined in 2341 * ihevce_multi_thrd_structs.h 2342 */ 2343 WORD32 i4_num_output_dep; 2344 2345 /** Array of offsets from the start of output dependent layer's Job Ques 2346 * which are dependent on current Job to be complete 2347 */ 2348 WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP]; 2349 2350 /** Number of input dependencies to be resolved for current job to start 2351 * these many jobs in lower layer should be complete to 2352 * start the current JOB 2353 */ 2354 WORD32 i4_num_inp_dep; 2355 2356 } multi_thrd_me_job_q_prms_t; 2357 2358 /** 2359 * @brief structure in which recon data 2360 * and related parameters are sent from Encoder 2361 */ 2362 typedef struct 2363 { 2364 /** Kept for maintaining backwards compatibility in future */ 2365 WORD32 i4_size; 2366 2367 /** Buffer id for the current buffer */ 2368 WORD32 i4_buf_id; 2369 2370 /** POC of the current buffer */ 2371 WORD32 i4_poc; 2372 2373 /** End flag to communicate this is last frame output from encoder */ 2374 WORD32 i4_end_flag; 2375 2376 /** End flag to communicate encoder that this is the last buffer from application 2377 1 - Last buf, 0 - Not last buffer. No other values are supported. 2378 Application has to set the appropriate value before queing in encoder queue */ 2379 2380 WORD32 i4_is_last_buf; 2381 2382 /** Recon luma buffer pointer */ 2383 void *pv_y_buf; 2384 2385 /** Recon cb buffer pointer */ 2386 void *pv_cb_buf; 2387 2388 /** Recon cr buffer pointer */ 2389 void *pv_cr_buf; 2390 2391 /** Luma size **/ 2392 WORD32 i4_y_pixels; 2393 2394 /** Chroma size **/ 2395 WORD32 i4_uv_pixels; 2396 2397 } iv_enc_recon_data_buffs_t; 2398 2399 /** 2400 ****************************************************************************** 2401 * @brief Multi Thread context structure 2402 ****************************************************************************** 2403 */ 2404 typedef struct 2405 { 2406 /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/ 2407 WORD32 i4_force_end_flag; 2408 2409 /** Force all active threads flag 2410 * This flag will be set to 1 if all Number of cores givento the encoder 2411 * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode 2412 * All pre enc threads and enc threads will run of the same cores with 2413 * time sharing ar frame level 2414 */ 2415 WORD32 i4_all_thrds_active_flag; 2416 2417 /** Flag to indicate that core manager has been configured to enable 2418 * sequential execution 2419 */ 2420 WORD32 i4_seq_mode_enabled_flag; 2421 /*-----------------------------------------------------------------------*/ 2422 /*--------- Params related to encode group -----------------------------*/ 2423 /*-----------------------------------------------------------------------*/ 2424 2425 /** Number of processing threads created runtime in encode group */ 2426 WORD32 i4_num_enc_proc_thrds; 2427 2428 /** Number of processing threads active for a given frame 2429 * This value will be monitored at frame level, so as to 2430 * have provsion for increasing / decreasing threads 2431 * based on Load balance b/w stage in encoder 2432 */ 2433 WORD32 i4_num_active_enc_thrds; 2434 2435 /** Mutex for ensuring thread safety of the access of Job queues in encode group */ 2436 void *pv_job_q_mutex_hdl_enc_grp_me; 2437 2438 /** Mutex for ensuring thread safety of the access of Job queues in encode group */ 2439 void *pv_job_q_mutex_hdl_enc_grp_enc_loop; 2440 2441 /** Array of Semaphore handles (for each frame processing threads ) */ 2442 void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC]; 2443 2444 /** Array for ME to export the Job que dependency for all layers */ 2445 multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM]; 2446 2447 /* pointer to the mutex handle*/ 2448 void *apv_mutex_handle[MAX_NUM_ME_PARALLEL]; 2449 2450 /* pointer to the mutex handle for frame init*/ 2451 void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL]; 2452 2453 /* pointer to the mutex handle for frame init*/ 2454 void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL]; 2455 2456 /*pointer to the mutex handle*/ 2457 void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL]; 2458 2459 /* Flag to indicate that master has done ME init*/ 2460 WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL]; 2461 2462 /* Counter to keep track of me num of thrds exiting critical section*/ 2463 WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL]; 2464 2465 /* Flag to indicate that master has done the frame init*/ 2466 WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL]; 2467 2468 /* Counter to keep track of num of thrds exiting critical section*/ 2469 WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL]; 2470 2471 /* Counter to keep track of num of thrds exiting critical section for re-encode*/ 2472 WORD32 num_thrds_exited_for_reenc; 2473 2474 /* Array to store the curr qp for ping and pong instance*/ 2475 WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2476 2477 /* Pointers to store output buffers for ping and pong instance*/ 2478 frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2479 2480 /* Pointer to store input buffers for me*/ 2481 pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL]; 2482 2483 /*pointers to store output buffers from me */ 2484 me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS]; 2485 2486 /*pointers to store input buffers to enc-rdopt */ 2487 me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS]; 2488 2489 /*Shared memory for Sub Pic rc */ 2490 /*Qscale calulated by sub pic rc bit control for Intra Pic*/ 2491 WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2492 2493 /*Header bits error by sub pic rc bit control*/ 2494 float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2495 2496 /*Accumalated ME SAD for NCTB*/ 2497 LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2498 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2499 2500 /*Accumalated IPE SAD for NCTB*/ 2501 LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2502 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2503 2504 /*Accumalated L0 IPE SAD for NCTB*/ 2505 LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2506 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2507 2508 /*Accumalated Activity Factor for NCTB*/ 2509 LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2510 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2511 2512 /*Accumalated Ctb counter across all threads*/ 2513 WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2514 2515 /*Bits threshold reached for across all threads*/ 2516 WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2517 2518 /*To hold the Previous In-frame RC chunk QP*/ 2519 WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2520 2521 /*Accumalated Ctb counter across all threads*/ 2522 WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2523 2524 /*Flag to check if thread is initialized */ 2525 WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2526 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2527 2528 /*Accumalated Ctb counter across all threads*/ 2529 //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC]; 2530 2531 /*Accumalated bits consumed for nctbs across all threads*/ 2532 LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2533 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2534 2535 /*Accumalated hdr bits consumed for nctbs across all threads*/ 2536 LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2537 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2538 2539 /*Accumalated l0 mpm bits consumed for nctbs across all threads*/ 2540 LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2541 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2542 2543 /*Accumalated bits consumed for total ctbs across all threads*/ 2544 LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2545 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2546 2547 /*Accumalated bits consumed for total ctbs across all threads*/ 2548 LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] 2549 [MAX_NUM_FRM_PROC_THRDS_ENC]; 2550 2551 /*Qscale calulated by sub pic rc bit control */ 2552 WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2553 /* End of Sub pic rc variables */ 2554 2555 /* Pointers to store input (only L0 IPE)*/ 2556 pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL]; 2557 2558 /* Array tp store L0 IPE input buf ids*/ 2559 WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL]; 2560 2561 /* Array to store output buffer ids for ping and pong instances*/ 2562 WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2563 2564 /* Array of pointers to store the recon buf pointers*/ 2565 iv_enc_recon_data_buffs_t *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2566 2567 /* Array of pointers to frame recon for ping and pong instances*/ 2568 recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES]; 2569 2570 /* Array of recon buffer ids for ping and pong instance*/ 2571 WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES]; 2572 2573 /* Counter to keep track of num thrds done*/ 2574 WORD32 num_thrds_done; 2575 2576 /* Flags to keep track of dumped ping pong recon buffer*/ 2577 WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2578 2579 /* Flags to keep track of dumped ping pong output buffer*/ 2580 WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2581 2582 /* flag to produce output buffer by the thread who ever is finishing 2583 enc-loop processing first, so that the entropy thread can start processing */ 2584 WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; 2585 2586 /* Flags to keep track of dumped ping pong input buffer*/ 2587 WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL]; 2588 2589 /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/ 2590 WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL]; 2591 2592 /** Dependency manager for checking whether prev. EncLoop done before 2593 current frame EncLoop starts */ 2594 void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL]; 2595 2596 /** Dependency manager for checking whether prev. EncLoop done before 2597 re-encode of the current frame */ 2598 void *pv_dep_mngr_prev_frame_enc_done_for_reenc; 2599 2600 /** Dependency manager for checking whether prev. me done before 2601 current frame me starts */ 2602 void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL]; 2603 2604 /** ME coarsest layer JOB queue type */ 2605 WORD32 i4_me_coarsest_lyr_type; 2606 2607 /** number of encloop frames running in parallel */ 2608 WORD32 i4_num_enc_loop_frm_pllel; 2609 2610 /** number of me frames running in parallel */ 2611 WORD32 i4_num_me_frm_pllel; 2612 2613 /*-----------------------------------------------------------------------*/ 2614 /*--------- Params related to pre-enc stage -----------------------------*/ 2615 /*-----------------------------------------------------------------------*/ 2616 2617 /** Number of processing threads created runtime in pre encode group */ 2618 WORD32 i4_num_pre_enc_proc_thrds; 2619 2620 /** Number of processing threads active for a given frame 2621 * This value will be monitored at frame level, so as to 2622 * have provsion for increasing / decreasing threads 2623 * based on Load balance b/w stage in encoder 2624 */ 2625 WORD32 i4_num_active_pre_enc_thrds; 2626 /** number of threads that have done processing the current frame 2627 Use to find out the last thread that is coming out of pre-enc processing 2628 so that the last thread can do de-init of pre-enc stage */ 2629 WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2630 2631 /** number of threads that have done processing the current frame 2632 Use to find out the first thread and last inoder to get qp query. As the query 2633 is not read only , the quer should be done only once by thread that comes first 2634 and other threads should get same value*/ 2635 WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2636 2637 /** number of threads that have done proessing decomp_intra 2638 Used to find out the last thread that is coming out so that 2639 the last thread can set flag for decomp_pre_intra_finish */ 2640 WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2641 2642 /** number of threads that have done proessing coarse_me 2643 Used to find out the last thread that is coming out so that 2644 the last thread can set flag for coarse_me_finish */ 2645 WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2646 2647 /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done. 2648 Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */ 2649 WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2650 2651 /** Dependency manager for checking whether prev. frame decomp_intra 2652 done before current frame decomp_intra starts */ 2653 void *pv_dep_mngr_prev_frame_pre_enc_l1; 2654 2655 /** Dependency manager for checking whether prev. frame L0 IPE done before 2656 current frame L0 IPE starts */ 2657 void *pv_dep_mngr_prev_frame_pre_enc_l0; 2658 2659 /** Dependency manager for checking whether prev. frame coarse_me done before 2660 current frame coarse_me starts */ 2661 void *pv_dep_mngr_prev_frame_pre_enc_coarse_me; 2662 2663 /** flag to indicate if pre_enc_init is done for current frame */ 2664 WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2665 2666 /** flag to indicate if pre_enc_hme_init is done for current frame */ 2667 WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2668 2669 /** flag to indicate if pre_enc_deinit is done for current frame */ 2670 WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2671 2672 /** Flag to indicate the end of processing when all the frames are done processing */ 2673 WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2674 2675 /** Flag to indicate the control blocking mode indicating input command to pre-enc 2676 group should be blocking or unblocking */ 2677 WORD32 i4_ctrl_blocking_mode; 2678 2679 /** Current input pointer */ 2680 ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2681 2682 WORD32 i4_last_inp_buf; 2683 2684 /* buffer id for input buffer */ 2685 WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2686 2687 /** Current output pointer */ 2688 pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2689 2690 /*Current L0 IPE to enc output pointer */ 2691 pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc; 2692 2693 /** buffer id for output buffer */ 2694 WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2695 2696 /** buffer id for L0 IPE enc buffer*/ 2697 WORD32 i4_L0_IPE_out_buf_id; 2698 2699 /** Current picture Qp */ 2700 WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2701 2702 /** Decomp layer buffers indicies */ 2703 WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2704 2705 /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done, 2706 an array of MAX_PRE_ENC_STAGGER might not be required*/ 2707 WORD32 i4_qp_update_l0_ipe; 2708 2709 /** Current picture encoded is the last picture to be encoded flag */ 2710 WORD32 i4_last_pic_flag; 2711 2712 /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */ 2713 void *pv_job_q_mutex_hdl_pre_enc_decomp; 2714 2715 /** Mutex for ensuring thread safety of the access of Job queues in HME group */ 2716 void *pv_job_q_mutex_hdl_pre_enc_hme; 2717 2718 /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */ 2719 void *pv_job_q_mutex_hdl_pre_enc_l0ipe; 2720 2721 /** mutex handle for pre-enc init */ 2722 void *pv_mutex_hdl_pre_enc_init; 2723 2724 /** mutex handle for pre-enc decomp deinit */ 2725 void *pv_mutex_hdl_pre_enc_decomp_deinit; 2726 2727 /** mutex handle for pre enc hme init */ 2728 void *pv_mutex_hdl_pre_enc_hme_init; 2729 2730 /** mutex handle for pre-enc hme deinit */ 2731 void *pv_mutex_hdl_pre_enc_hme_deinit; 2732 2733 /*qp qurey before l0 ipe is done by multiple frame*/ 2734 /** mutex handle for L0 ipe(pre-enc init)*/ 2735 void *pv_mutex_hdl_l0_ipe_init; 2736 2737 /** mutex handle for pre-enc deinit */ 2738 void *pv_mutex_hdl_pre_enc_deinit; 2739 2740 /** Array of Semaphore handles (for each frame processing threads ) */ 2741 void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC]; 2742 /** array which will tell the number of CTB processed in each row, 2743 * used for Row level sync in IPE pass 2744 */ 2745 WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM]; 2746 2747 /** Job Queue Memory pre encode */ 2748 job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]; 2749 2750 /** Array of Job Queue handles enc group */ 2751 job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2752 [NUM_PRE_ENC_JOBS_QUES]; 2753 2754 /* accumulate intra sad across all thread to get qp before L0 IPE*/ 2755 WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2756 [MAX_NUM_FRM_PROC_THRDS_PRE_ENC]; 2757 2758 WORD32 i4_delay_pre_me_btw_l0_ipe; 2759 2760 /*** This variable has the maximum delay between hme and l0ipe ***/ 2761 /*** This is used for wrapping around L0IPE index ***/ 2762 WORD32 i4_max_delay_pre_me_btw_l0_ipe; 2763 2764 /* This is to register the handles of Dep Mngr b/w EncLoop and ME */ 2765 /* This is used to delete the Mngr at the end */ 2766 void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS]; 2767 /*flag to track buffer in me/enc que is produced or not*/ 2768 WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS]; 2769 2770 /*out buf que id for me */ 2771 WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS]; 2772 2773 /*in buf que id for enc from me*/ 2774 WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS]; 2775 2776 /* This is used to tell whether the free of recon buffers are done or not */ 2777 WORD32 i4_is_recon_free_done; 2778 2779 /* index for DVSR population */ 2780 WORD32 i4_idx_dvsr_p; 2781 WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME] 2782 [(HEVCE_MAX_HEIGHT >> 1) / 8]; 2783 2784 WORD32 i4_rc_l0_qp; 2785 2786 /* Used for mres single out cases. Checks whether a particular resolution is active or passive */ 2787 /* Only one resolution should be active for mres_single_out case */ 2788 WORD32 *pi4_active_res_id; 2789 2790 /** 2791 * Sub Pic bit control mutex lock handle 2792 */ 2793 void *pv_sub_pic_rc_mutex_lock_hdl; 2794 2795 void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl; 2796 2797 WORD32 i4_encode; 2798 WORD32 i4_in_frame_rc_enabled; 2799 WORD32 i4_num_re_enc; 2800 2801 } multi_thrd_ctxt_t; 2802 2803 /** 2804 * @brief Structure to describe tile params 2805 */ 2806 typedef struct 2807 { 2808 /* flag to indicate tile encoding enabled/disabled */ 2809 WORD32 i4_tiles_enabled_flag; 2810 2811 /* flag to indicate unifrom spacing of tiles */ 2812 WORD32 i4_uniform_spacing_flag; 2813 2814 /* num tiles in a tile-row. num tiles in tile-col */ 2815 WORD32 i4_num_tile_cols; 2816 WORD32 i4_num_tile_rows; 2817 2818 /* Curr tile width and height*/ 2819 WORD32 i4_curr_tile_width; 2820 WORD32 i4_curr_tile_height; 2821 2822 /* Curr tile width and heignt in CTB units*/ 2823 WORD32 i4_curr_tile_wd_in_ctb_unit; 2824 WORD32 i4_curr_tile_ht_in_ctb_unit; 2825 2826 /* frame resolution */ 2827 //WORD32 i4_frame_width; /* encode-width */ 2828 //WORD32 i4_frame_height; /* encode-height */ 2829 2830 /* total num of tiles "in frame" */ 2831 WORD32 i4_num_tiles; 2832 2833 /* Curr tile id. Assigned by raster scan order in a frame */ 2834 WORD32 i4_curr_tile_id; 2835 2836 /* x-pos of first ctb of the slice in ctb */ 2837 /* y-pos of first ctb of the slice in ctb */ 2838 WORD32 i4_first_ctb_x; 2839 WORD32 i4_first_ctb_y; 2840 2841 /* x-pos of first ctb of the slice in samples */ 2842 /* y-pos of first ctb of the slice in samples */ 2843 WORD32 i4_first_sample_x; 2844 WORD32 i4_first_sample_y; 2845 2846 } ihevce_tile_params_t; 2847 2848 /** 2849 ****************************************************************************** 2850 * @brief Encoder context structure 2851 ****************************************************************************** 2852 */ 2853 2854 typedef struct 2855 { 2856 /** 2857 * vps parameters 2858 */ 2859 vps_t as_vps[IHEVCE_MAX_NUM_BITRATES]; 2860 2861 /** 2862 * sps parameters 2863 */ 2864 sps_t as_sps[IHEVCE_MAX_NUM_BITRATES]; 2865 2866 /** 2867 * pps parameters 2868 * Required for each bitrate separately, mainly because 2869 * init qp etc parameters needs to be different for each instance 2870 */ 2871 pps_t as_pps[IHEVCE_MAX_NUM_BITRATES]; 2872 2873 /** 2874 * Rate control mutex lock handle 2875 */ 2876 void *pv_rc_mutex_lock_hdl; 2877 2878 /** frame level cu analyse buffer pointer for ME 2879 * ME will get ps_ctb_analyse structure populated with ps_cu pointers 2880 * pointing to ps_cu_analyse buffer from IPE. 2881 */ 2882 //cu_analyse_t *ps_cu_analyse_inter[PING_PONG_BUF]; 2883 2884 /** 2885 * CTB frame context between encoder (producer) and entropy (consumer) 2886 */ 2887 enc_q_ctxt_t s_enc_ques; 2888 2889 /** 2890 * Encoder memory manager ctxt 2891 */ 2892 enc_mem_mngr_ctxt s_mem_mngr; 2893 2894 /** 2895 * Semaphores of all the threads created in HLE 2896 * and Que handle for buffers b/w frame process and entropy 2897 */ 2898 thrd_que_sem_hdl_t s_thrd_sem_ctxt; 2899 2900 /** 2901 * Reference /recon buffer Que pointer 2902 */ 2903 recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES]; 2904 2905 /** 2906 * Number of buffers in Recon buffer queue 2907 */ 2908 WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES]; 2909 2910 /** 2911 * Reference / recon buffer Que pointer for Pre Encode group 2912 * this will be just a container and no buffers will be allcoated 2913 */ 2914 recon_pic_buf_t **pps_pre_enc_recon_buf_q; 2915 2916 /** 2917 * Number of buffers in Recon buffer queue 2918 */ 2919 WORD32 i4_pre_enc_num_buf_recon_q; 2920 2921 /** 2922 * frame level CTB parameters and worst PU CU and TU in a CTB row 2923 */ 2924 frm_ctb_ctxt_t s_frm_ctb_prms; 2925 2926 /* 2927 * Moudle ctxt pointers of all modules 2928 */ 2929 module_ctxt_t s_module_ctxt; 2930 2931 /* 2932 * LAP static parameters 2933 */ 2934 ihevce_lap_static_params_t s_lap_stat_prms; 2935 2936 /* 2937 * Run time dynamic source params 2938 */ 2939 2940 ihevce_src_params_t s_runtime_src_prms; 2941 2942 /* 2943 *Target params 2944 */ 2945 ihevce_tgt_params_t s_runtime_tgt_params; 2946 2947 /* 2948 * Run time dynamic coding params 2949 */ 2950 ihevce_coding_params_t s_runtime_coding_prms; 2951 2952 /** 2953 * Pointer to static config params 2954 */ 2955 ihevce_static_cfg_params_t *ps_stat_prms; 2956 2957 /** 2958 * the following structure members used for copying recon buf info 2959 * in case of duplicate pics 2960 */ 2961 2962 /** 2963 * Array of reference picture list for pre enc group 2964 * Separate list for ping_pong instnaces 2965 * 2=> ref_pic_list0 and ref_pic_list1 2966 */ 2967 recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2] 2968 [HEVCE_MAX_REF_PICS * 2]; 2969 2970 /** 2971 * Array of reference picture list for pre enc group 2972 * Separate list for ping_pong instnaces 2973 * 2=> ref_pic_list0 and ref_pic_list1 2974 */ 2975 recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2] 2976 [HEVCE_MAX_REF_PICS * 2]; 2977 2978 /** 2979 * Number of input frames per input queue 2980 */ 2981 WORD32 i4_num_input_buf_per_queue; 2982 2983 /** 2984 * poc of the Clean Random Access(CRA)Ipic 2985 */ 2986 WORD32 i4_cra_poc; 2987 2988 /** Number of ref pics in list 0 for any given frame */ 2989 WORD32 i4_num_ref_l0; 2990 2991 /** Number of ref pics in list 1 for any given frame */ 2992 WORD32 i4_num_ref_l1; 2993 2994 /** Number of active ref pics in list 0 for cur frame */ 2995 WORD32 i4_num_ref_l0_active; 2996 2997 /** Number of active ref pics in list 1 for cur frame */ 2998 WORD32 i4_num_ref_l1_active; 2999 3000 /** Number of ref pics in list 0 for any given frame pre encode stage */ 3001 WORD32 i4_pre_enc_num_ref_l0; 3002 3003 /** Number of ref pics in list 1 for any given frame pre encode stage */ 3004 WORD32 i4_pre_enc_num_ref_l1; 3005 3006 /** Number of active ref pics in list 0 for cur frame pre encode stage */ 3007 WORD32 i4_pre_enc_num_ref_l0_active; 3008 3009 /** Number of active ref pics in list 1 for cur frame pre encode stage */ 3010 WORD32 i4_pre_enc_num_ref_l1_active; 3011 3012 /** 3013 * working mem to be used for frm level activities 3014 * One example is interplation at frame level. This requires memory 3015 * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes. 3016 * This is so since we generate interp output for max_width + 16 x 3017 * max_height + 16, and then the intermediate output is 16 bit and 3018 * is max_height + 16 + 7 rows 3019 */ 3020 UWORD8 *pu1_frm_lvl_wkg_mem; 3021 3022 /** 3023 * Multi thread processing context 3024 * This memory contains the variables and pointers shared across threads 3025 * in enc-group and pre-enc-group 3026 */ 3027 multi_thrd_ctxt_t s_multi_thrd; 3028 3029 /** I/O Queues created status */ 3030 WORD32 i4_io_queues_created; 3031 3032 WORD32 i4_end_flag; 3033 3034 /** number of bit-rate instances running */ 3035 WORD32 i4_num_bitrates; 3036 3037 /** number of enc frames running in parallel */ 3038 WORD32 i4_num_enc_loop_frm_pllel; 3039 3040 /*ref bitrate id*/ 3041 WORD32 i4_ref_mbr_id; 3042 3043 /* Flag to indicate app, that end of processing has reached */ 3044 WORD32 i4_frame_limit_reached; 3045 3046 /*Structure to store the function selector 3047 * pointers for common and encoder */ 3048 func_selector_t s_func_selector; 3049 3050 /*ref resolution id*/ 3051 WORD32 i4_resolution_id; 3052 3053 /*hle context*/ 3054 void *pv_hle_ctxt; 3055 3056 rc_quant_t s_rc_quant; 3057 /*ME cost of P pic stored for the next ref B pic*/ 3058 //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2]; 3059 3060 UWORD32 u4_cur_pic_encode_cnt; 3061 UWORD32 u4_cur_pic_encode_cnt_dbp; 3062 /*past 2 p pics high complexity status*/ 3063 WORD32 ai4_is_past_pic_complex[2]; 3064 3065 WORD32 i4_is_I_reset_done; 3066 WORD32 i4_past_RC_reset_count; 3067 3068 WORD32 i4_future_RC_reset; 3069 3070 WORD32 i4_past_RC_scd_reset_count; 3071 3072 WORD32 i4_future_RC_scd_reset; 3073 WORD32 i4_poc_reset_values; 3074 3075 /*Place holder to store the length of LAP in first pass*/ 3076 /** Number of frames to look-ahead for RC by - 3077 * counts 2 fields as one frame for interlaced 3078 */ 3079 WORD32 i4_look_ahead_frames_in_first_pass; 3080 3081 WORD32 ai4_mod_factor_derived_by_variance[2]; 3082 float f_strength; 3083 3084 /*for B frames use the avg activity 3085 from the layer 0 (I or P) which is the average over 3086 Lap2 window*/ 3087 LWORD64 ai8_lap2_8x8_avg_act_from_T0[2]; 3088 3089 LWORD64 ai8_lap2_16x16_avg_act_from_T0[3]; 3090 3091 LWORD64 ai8_lap2_32x32_avg_act_from_T0[3]; 3092 3093 /*for B frames use the log of avg activity 3094 from the layer 0 (I or P) which is the average over 3095 Lap2 window*/ 3096 long double ald_lap2_8x8_log_avg_act_from_T0[2]; 3097 3098 long double ald_lap2_16x16_log_avg_act_from_T0[3]; 3099 3100 long double ald_lap2_32x32_log_avg_act_from_T0[3]; 3101 3102 ihevce_tile_params_t *ps_tile_params_base; 3103 3104 WORD32 ai4_column_width_array[MAX_TILE_COLUMNS]; 3105 3106 WORD32 ai4_row_height_array[MAX_TILE_ROWS]; 3107 3108 /* Architecture */ 3109 IV_ARCH_T e_arch_type; 3110 3111 UWORD8 u1_is_popcnt_available; 3112 3113 WORD32 i4_active_scene_num; 3114 3115 WORD32 i4_max_fr_enc_loop_parallel_rc; 3116 WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES]; 3117 WORD32 i4_active_enc_frame_id; 3118 3119 /** 3120 * LAP interface ctxt pointer 3121 */ 3122 void *pv_lap_interface_ctxt; 3123 3124 /* If enable, enables blu ray compatibility of op*/ 3125 WORD32 i4_blu_ray_spec; 3126 3127 } enc_ctxt_t; 3128 3129 /** 3130 ****************************************************************************** 3131 * @brief This struct contains the inter CTB params needed for the decision 3132 * of the best inter CU results 3133 ****************************************************************************** 3134 */ 3135 typedef struct 3136 { 3137 hme_pred_buf_mngr_t s_pred_buf_mngr; 3138 3139 /** X and y offset of ctb w.r.t. start of pic */ 3140 WORD32 i4_ctb_x_off; 3141 WORD32 i4_ctb_y_off; 3142 3143 /** 3144 * Pred buffer ptr, updated inside subpel refinement process. This 3145 * location passed to the leaf fxn for copying the winner pred buf 3146 */ 3147 UWORD8 **ppu1_pred; 3148 3149 /** Working mem passed to leaf fxns */ 3150 UWORD8 *pu1_wkg_mem; 3151 3152 /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */ 3153 WORD32 i4_pred_stride; 3154 3155 /** Stride of input buf, updated inside subpel fxn */ 3156 WORD32 i4_inp_stride; 3157 3158 /** stride of recon buffer */ 3159 WORD32 i4_rec_stride; 3160 3161 /** Indicates if bi dir is enabled or not */ 3162 WORD32 i4_bidir_enabled; 3163 3164 /** 3165 * Total number of references of current picture which is enocded 3166 */ 3167 UWORD8 u1_num_ref; 3168 3169 /** Recon Pic buffer pointers for L0 list */ 3170 recon_pic_buf_t **pps_rec_list_l0; 3171 3172 /** Recon Pic buffer pointers for L1 list */ 3173 recon_pic_buf_t **pps_rec_list_l1; 3174 3175 /** 3176 * These pointers point to modified input, one each for one ref idx. 3177 * Instead of weighting the reference, we weight the input with inverse 3178 * wt and offset for list 0 and list 1. 3179 */ 3180 UWORD8 *apu1_wt_inp[2][MAX_NUM_REF]; 3181 3182 /* Since ME uses weighted inputs, we use reciprocal of the actual weights */ 3183 /* that are signaled in the bitstream */ 3184 WORD32 *pi4_inv_wt; 3185 WORD32 *pi4_inv_wt_shift_val; 3186 3187 /* Map between L0 Reference indices and LC indices */ 3188 WORD8 *pi1_past_list; 3189 3190 /* Map between L1 Reference indices and LC indices */ 3191 WORD8 *pi1_future_list; 3192 3193 /** 3194 * Points to the non-weighted input data for the current CTB 3195 */ 3196 UWORD8 *pu1_non_wt_inp; 3197 3198 /** 3199 * Store the pred lambda and lamda_qshifts for all the reference indices 3200 */ 3201 WORD32 i4_lamda; 3202 3203 UWORD8 u1_lamda_qshift; 3204 3205 WORD32 wpred_log_wdc; 3206 3207 /** 3208 * Number of active references in l0 3209 */ 3210 UWORD8 u1_num_active_ref_l0; 3211 3212 /** 3213 * Number of active references in l1 3214 */ 3215 UWORD8 u1_num_active_ref_l1; 3216 3217 /** The max_depth for inter tu_tree */ 3218 UWORD8 u1_max_tr_depth; 3219 3220 /** Quality Preset */ 3221 WORD8 i1_quality_preset; 3222 3223 /** SATD or SAD */ 3224 UWORD8 u1_use_satd; 3225 3226 /* Frame level QP */ 3227 WORD32 i4_qstep_ls8; 3228 3229 /* Pointer to an array of PU level src variances */ 3230 UWORD32 *pu4_src_variance; 3231 3232 WORD32 i4_alpha_stim_multiplier; 3233 3234 UWORD8 u1_is_cu_noisy; 3235 3236 ULWORD64 *pu8_part_src_sigmaX; 3237 3238 ULWORD64 *pu8_part_src_sigmaXSquared; 3239 3240 UWORD8 u1_max_2nx2n_tu_recur_cands; 3241 3242 } inter_ctb_prms_t; 3243 3244 /*****************************************************************************/ 3245 /* Extern Variable Declarations */ 3246 /*****************************************************************************/ 3247 extern const double lamda_modifier_for_I_pic[8]; 3248 3249 /*****************************************************************************/ 3250 /* Extern Function Declarations */ 3251 /*****************************************************************************/ 3252 3253 #endif /* _IHEVCE_ENC_STRUCTS_H_ */ 3254