xref: /aosp_15_r20/external/libhevc/encoder/ihevce_recur_bracketing.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar  *
3*c83a76b0SSuyog Pawar  * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar  *
5*c83a76b0SSuyog Pawar  * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar  * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar  * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar  *
9*c83a76b0SSuyog Pawar  * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar  *
11*c83a76b0SSuyog Pawar  * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar  * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar  * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar  * limitations under the License.
16*c83a76b0SSuyog Pawar  *
17*c83a76b0SSuyog Pawar  *****************************************************************************
18*c83a76b0SSuyog Pawar  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar 
21*c83a76b0SSuyog Pawar /*!
22*c83a76b0SSuyog Pawar ******************************************************************************
23*c83a76b0SSuyog Pawar * \file ihevce_recur_bracketing.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * \brief
26*c83a76b0SSuyog Pawar *    This file contains interface functions of recursive bracketing
27*c83a76b0SSuyog Pawar *    module
28*c83a76b0SSuyog Pawar * \date
29*c83a76b0SSuyog Pawar *    12/02/2012
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar * \author
32*c83a76b0SSuyog Pawar *    Ittiam
33*c83a76b0SSuyog Pawar *
34*c83a76b0SSuyog Pawar * List of Functions
35*c83a76b0SSuyog Pawar *
36*c83a76b0SSuyog Pawar *
37*c83a76b0SSuyog Pawar ******************************************************************************
38*c83a76b0SSuyog Pawar */
39*c83a76b0SSuyog Pawar 
40*c83a76b0SSuyog Pawar /*****************************************************************************/
41*c83a76b0SSuyog Pawar /* File Includes                                                             */
42*c83a76b0SSuyog Pawar /*****************************************************************************/
43*c83a76b0SSuyog Pawar /* System include files */
44*c83a76b0SSuyog Pawar #include <stdio.h>
45*c83a76b0SSuyog Pawar #include <string.h>
46*c83a76b0SSuyog Pawar #include <stdlib.h>
47*c83a76b0SSuyog Pawar #include <assert.h>
48*c83a76b0SSuyog Pawar #include <stdarg.h>
49*c83a76b0SSuyog Pawar #include <math.h>
50*c83a76b0SSuyog Pawar 
51*c83a76b0SSuyog Pawar /* User include files */
52*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
53*c83a76b0SSuyog Pawar #include "itt_video_api.h"
54*c83a76b0SSuyog Pawar #include "ihevce_api.h"
55*c83a76b0SSuyog Pawar 
56*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
57*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
58*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
59*c83a76b0SSuyog Pawar 
60*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
61*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
62*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
63*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
64*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
65*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
66*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
67*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
69*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
70*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
71*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
72*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
73*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
74*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
75*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
76*c83a76b0SSuyog Pawar 
77*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
78*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
79*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
80*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
81*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
82*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
83*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
84*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
85*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
86*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
87*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
88*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
89*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
90*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
91*c83a76b0SSuyog Pawar #include "ihevce_ipe_instr_set_router.h"
92*c83a76b0SSuyog Pawar #include "ihevce_ipe_structs.h"
93*c83a76b0SSuyog Pawar #include "ihevce_ipe_pass.h"
94*c83a76b0SSuyog Pawar #include "ihevce_recur_bracketing.h"
95*c83a76b0SSuyog Pawar #include "ihevce_nbr_avail.h"
96*c83a76b0SSuyog Pawar #include "ihevc_common_tables.h"
97*c83a76b0SSuyog Pawar #include "ihevce_decomp_pre_intra_structs.h"
98*c83a76b0SSuyog Pawar #include "ihevce_decomp_pre_intra_pass.h"
99*c83a76b0SSuyog Pawar 
100*c83a76b0SSuyog Pawar #include "cast_types.h"
101*c83a76b0SSuyog Pawar #include "osal.h"
102*c83a76b0SSuyog Pawar #include "osal_defaults.h"
103*c83a76b0SSuyog Pawar 
104*c83a76b0SSuyog Pawar /*****************************************************************************/
105*c83a76b0SSuyog Pawar /* Constant Macros                                                           */
106*c83a76b0SSuyog Pawar /*****************************************************************************/
107*c83a76b0SSuyog Pawar #define IP_DBG_L1_l2 0
108*c83a76b0SSuyog Pawar #define CHILD_BIAS 12
109*c83a76b0SSuyog Pawar 
110*c83a76b0SSuyog Pawar /*****************************************************************************/
111*c83a76b0SSuyog Pawar /* Globals                                                                   */
112*c83a76b0SSuyog Pawar /*****************************************************************************/
113*c83a76b0SSuyog Pawar extern pf_intra_pred g_apf_lum_ip[10];
114*c83a76b0SSuyog Pawar 
115*c83a76b0SSuyog Pawar extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES];
116*c83a76b0SSuyog Pawar 
117*c83a76b0SSuyog Pawar UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7,
118*c83a76b0SSuyog Pawar                              6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1,
119*c83a76b0SSuyog Pawar                              2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 };
120*c83a76b0SSuyog Pawar 
121*c83a76b0SSuyog Pawar UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0,
122*c83a76b0SSuyog Pawar                              1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7,
123*c83a76b0SSuyog Pawar                              6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 };
124*c83a76b0SSuyog Pawar 
125*c83a76b0SSuyog Pawar #define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit))
126*c83a76b0SSuyog Pawar 
127*c83a76b0SSuyog Pawar /*****************************************************************************/
128*c83a76b0SSuyog Pawar /* Function Definitions                                                      */
129*c83a76b0SSuyog Pawar /*****************************************************************************/
130*c83a76b0SSuyog Pawar 
131*c83a76b0SSuyog Pawar /*!
132*c83a76b0SSuyog Pawar ******************************************************************************
133*c83a76b0SSuyog Pawar * \if Function name : ihevce_update_cand_list \endif
134*c83a76b0SSuyog Pawar *
135*c83a76b0SSuyog Pawar * \brief
136*c83a76b0SSuyog Pawar *    Final Candidate list population, nbr flag andd nbr mode update function
137*c83a76b0SSuyog Pawar *
138*c83a76b0SSuyog Pawar * \param[in] ps_row_cu : pointer to cu analyse struct
139*c83a76b0SSuyog Pawar * \param[in] ps_cu_node : pointer to cu node info buffer
140*c83a76b0SSuyog Pawar * \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer
141*c83a76b0SSuyog Pawar * \param[in] pu1_cand_mode_list  : pointer to candidate list buffer
142*c83a76b0SSuyog Pawar *
143*c83a76b0SSuyog Pawar * \return
144*c83a76b0SSuyog Pawar *    None
145*c83a76b0SSuyog Pawar *
146*c83a76b0SSuyog Pawar * \author
147*c83a76b0SSuyog Pawar *  Ittiam
148*c83a76b0SSuyog Pawar *
149*c83a76b0SSuyog Pawar *****************************************************************************
150*c83a76b0SSuyog Pawar */
ihevce_update_cand_list(ihevce_ipe_cu_tree_t * ps_cu_node,ihevce_ed_blk_t * ps_ed_blk_l1,ihevce_ipe_ctxt_t * ps_ctxt)151*c83a76b0SSuyog Pawar void ihevce_update_cand_list(
152*c83a76b0SSuyog Pawar     ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt)
153*c83a76b0SSuyog Pawar {
154*c83a76b0SSuyog Pawar     WORD32 row, col, x, y, size;
155*c83a76b0SSuyog Pawar 
156*c83a76b0SSuyog Pawar     /* Candidate mode Update */
157*c83a76b0SSuyog Pawar     (void)ps_ed_blk_l1;
158*c83a76b0SSuyog Pawar     /* Update CTB mode map for the finalised CU */
159*c83a76b0SSuyog Pawar     x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
160*c83a76b0SSuyog Pawar     y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
161*c83a76b0SSuyog Pawar     size = ps_cu_node->u1_cu_size >> 2;
162*c83a76b0SSuyog Pawar     for(row = y; row < (y + size); row++)
163*c83a76b0SSuyog Pawar     {
164*c83a76b0SSuyog Pawar         for(col = x; col < (x + size); col++)
165*c83a76b0SSuyog Pawar         {
166*c83a76b0SSuyog Pawar             ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode;
167*c83a76b0SSuyog Pawar         }
168*c83a76b0SSuyog Pawar     }
169*c83a76b0SSuyog Pawar     return;
170*c83a76b0SSuyog Pawar }
171*c83a76b0SSuyog Pawar 
172*c83a76b0SSuyog Pawar /*!
173*c83a76b0SSuyog Pawar ******************************************************************************
174*c83a76b0SSuyog Pawar * \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif
175*c83a76b0SSuyog Pawar *
176*c83a76b0SSuyog Pawar * \brief
177*c83a76b0SSuyog Pawar *    Mpm indx calc function based on left and top available modes
178*c83a76b0SSuyog Pawar *
179*c83a76b0SSuyog Pawar * \param[in] top_intra_mode : Top available intra mode
180*c83a76b0SSuyog Pawar * \param[in] left_intra_mode : Left available intra mode
181*c83a76b0SSuyog Pawar * \param[in] available_top : Top availability flag
182*c83a76b0SSuyog Pawar * \param[in] available_left : Left availability flag
183*c83a76b0SSuyog Pawar * \param[in] cu_pos_y : cu position wrt to CTB
184*c83a76b0SSuyog Pawar * \param[in] mode_bits_cost : pointer to mode bits buffer
185*c83a76b0SSuyog Pawar * \param[in] lambda : Lambda value (SAD/SATD)
186*c83a76b0SSuyog Pawar * \param[in] cand_mode_list  : pointer to candidate list buffer
187*c83a76b0SSuyog Pawar *
188*c83a76b0SSuyog Pawar * \return
189*c83a76b0SSuyog Pawar *    None
190*c83a76b0SSuyog Pawar *
191*c83a76b0SSuyog Pawar * \author
192*c83a76b0SSuyog Pawar *  Ittiam
193*c83a76b0SSuyog Pawar *
194*c83a76b0SSuyog Pawar *****************************************************************************
195*c83a76b0SSuyog Pawar */
ihevce_intra_populate_mode_bits_cost_bracketing(WORD32 top_intra_mode,WORD32 left_intra_mode,WORD32 available_top,WORD32 available_left,WORD32 cu_pos_y,UWORD16 * mode_bits_cost,UWORD16 * mode_bits,WORD32 lambda,WORD32 * cand_mode_list)196*c83a76b0SSuyog Pawar void ihevce_intra_populate_mode_bits_cost_bracketing(
197*c83a76b0SSuyog Pawar     WORD32 top_intra_mode,
198*c83a76b0SSuyog Pawar     WORD32 left_intra_mode,
199*c83a76b0SSuyog Pawar     WORD32 available_top,
200*c83a76b0SSuyog Pawar     WORD32 available_left,
201*c83a76b0SSuyog Pawar     WORD32 cu_pos_y,
202*c83a76b0SSuyog Pawar     UWORD16 *mode_bits_cost,
203*c83a76b0SSuyog Pawar     UWORD16 *mode_bits,
204*c83a76b0SSuyog Pawar     WORD32 lambda,
205*c83a76b0SSuyog Pawar     WORD32 *cand_mode_list)
206*c83a76b0SSuyog Pawar {
207*c83a76b0SSuyog Pawar     /* local variables */
208*c83a76b0SSuyog Pawar     WORD32 i;
209*c83a76b0SSuyog Pawar     WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
210*c83a76b0SSuyog Pawar 
211*c83a76b0SSuyog Pawar     UWORD16 one_bits_cost =
212*c83a76b0SSuyog Pawar         COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1));  //1.5 * lambda
213*c83a76b0SSuyog Pawar     UWORD16 two_bits_cost =
214*c83a76b0SSuyog Pawar         COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1));  //2.5 * lambda
215*c83a76b0SSuyog Pawar     UWORD16 five_bits_cost =
216*c83a76b0SSuyog Pawar         COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1));  //5.5 * lambda
217*c83a76b0SSuyog Pawar 
218*c83a76b0SSuyog Pawar     for(i = 0; i < 35; i++)
219*c83a76b0SSuyog Pawar     {
220*c83a76b0SSuyog Pawar         mode_bits_cost[i] = five_bits_cost;
221*c83a76b0SSuyog Pawar         mode_bits[i] = 5;
222*c83a76b0SSuyog Pawar     }
223*c83a76b0SSuyog Pawar 
224*c83a76b0SSuyog Pawar     /* EIID: set availability flag to zero if modes are invalid.
225*c83a76b0SSuyog Pawar        Required since some CU's might be skipped (though available)
226*c83a76b0SSuyog Pawar        and their modes will be set to 255 (-1)*/
227*c83a76b0SSuyog Pawar     if(35 < top_intra_mode || 0 > top_intra_mode)
228*c83a76b0SSuyog Pawar         available_top = 0;
229*c83a76b0SSuyog Pawar     if(35 < left_intra_mode || 0 > left_intra_mode)
230*c83a76b0SSuyog Pawar         available_left = 0;
231*c83a76b0SSuyog Pawar 
232*c83a76b0SSuyog Pawar     /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
233*c83a76b0SSuyog Pawar     /* N = top */
234*c83a76b0SSuyog Pawar     if(0 == available_top)
235*c83a76b0SSuyog Pawar     {
236*c83a76b0SSuyog Pawar         cand_intra_pred_mode_top = INTRA_DC;
237*c83a76b0SSuyog Pawar     }
238*c83a76b0SSuyog Pawar     /* for neighbour != INTRA, setting DC is done outside */
239*c83a76b0SSuyog Pawar     else if(0 == cu_pos_y) /* It's on the CTB boundary */
240*c83a76b0SSuyog Pawar     {
241*c83a76b0SSuyog Pawar         cand_intra_pred_mode_top = INTRA_DC;
242*c83a76b0SSuyog Pawar     }
243*c83a76b0SSuyog Pawar     else
244*c83a76b0SSuyog Pawar     {
245*c83a76b0SSuyog Pawar         cand_intra_pred_mode_top = top_intra_mode;
246*c83a76b0SSuyog Pawar     }
247*c83a76b0SSuyog Pawar 
248*c83a76b0SSuyog Pawar     /* N = left */
249*c83a76b0SSuyog Pawar     if(0 == available_left)
250*c83a76b0SSuyog Pawar     {
251*c83a76b0SSuyog Pawar         cand_intra_pred_mode_left = INTRA_DC;
252*c83a76b0SSuyog Pawar         //cand_intra_pred_mode_left = cand_intra_pred_mode_top;
253*c83a76b0SSuyog Pawar     }
254*c83a76b0SSuyog Pawar     /* for neighbour != INTRA, setting DC is done outside */
255*c83a76b0SSuyog Pawar     else
256*c83a76b0SSuyog Pawar     {
257*c83a76b0SSuyog Pawar         cand_intra_pred_mode_left = left_intra_mode;
258*c83a76b0SSuyog Pawar     }
259*c83a76b0SSuyog Pawar 
260*c83a76b0SSuyog Pawar     /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
261*c83a76b0SSuyog Pawar     if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
262*c83a76b0SSuyog Pawar     {
263*c83a76b0SSuyog Pawar         if(cand_intra_pred_mode_left < 2)
264*c83a76b0SSuyog Pawar         {
265*c83a76b0SSuyog Pawar             cand_mode_list[0] = INTRA_PLANAR;
266*c83a76b0SSuyog Pawar             cand_mode_list[1] = INTRA_DC;
267*c83a76b0SSuyog Pawar             cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
268*c83a76b0SSuyog Pawar         }
269*c83a76b0SSuyog Pawar         else
270*c83a76b0SSuyog Pawar         {
271*c83a76b0SSuyog Pawar             cand_mode_list[0] = cand_intra_pred_mode_left;
272*c83a76b0SSuyog Pawar             cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
273*c83a76b0SSuyog Pawar             cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
274*c83a76b0SSuyog Pawar         }
275*c83a76b0SSuyog Pawar     }
276*c83a76b0SSuyog Pawar     else
277*c83a76b0SSuyog Pawar     {
278*c83a76b0SSuyog Pawar         if(0 == available_left)
279*c83a76b0SSuyog Pawar         {
280*c83a76b0SSuyog Pawar             cand_mode_list[0] = cand_intra_pred_mode_top;
281*c83a76b0SSuyog Pawar             cand_mode_list[1] = cand_intra_pred_mode_left;
282*c83a76b0SSuyog Pawar         }
283*c83a76b0SSuyog Pawar         else
284*c83a76b0SSuyog Pawar         {
285*c83a76b0SSuyog Pawar             cand_mode_list[0] = cand_intra_pred_mode_left;
286*c83a76b0SSuyog Pawar             cand_mode_list[1] = cand_intra_pred_mode_top;
287*c83a76b0SSuyog Pawar         }
288*c83a76b0SSuyog Pawar         if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
289*c83a76b0SSuyog Pawar            (cand_intra_pred_mode_top != INTRA_PLANAR))
290*c83a76b0SSuyog Pawar         {
291*c83a76b0SSuyog Pawar             cand_mode_list[2] = INTRA_PLANAR;
292*c83a76b0SSuyog Pawar         }
293*c83a76b0SSuyog Pawar         else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
294*c83a76b0SSuyog Pawar         {
295*c83a76b0SSuyog Pawar             cand_mode_list[2] = INTRA_DC;
296*c83a76b0SSuyog Pawar         }
297*c83a76b0SSuyog Pawar         else
298*c83a76b0SSuyog Pawar         {
299*c83a76b0SSuyog Pawar             cand_mode_list[2] = INTRA_ANGULAR(26);
300*c83a76b0SSuyog Pawar         }
301*c83a76b0SSuyog Pawar     }
302*c83a76b0SSuyog Pawar     mode_bits_cost[cand_mode_list[0]] = one_bits_cost;
303*c83a76b0SSuyog Pawar     mode_bits_cost[cand_mode_list[1]] = two_bits_cost;
304*c83a76b0SSuyog Pawar     mode_bits_cost[cand_mode_list[2]] = two_bits_cost;
305*c83a76b0SSuyog Pawar 
306*c83a76b0SSuyog Pawar     mode_bits[cand_mode_list[0]] = 2;
307*c83a76b0SSuyog Pawar     mode_bits[cand_mode_list[1]] = 3;
308*c83a76b0SSuyog Pawar     mode_bits[cand_mode_list[2]] = 3;
309*c83a76b0SSuyog Pawar }
310*c83a76b0SSuyog Pawar 
311*c83a76b0SSuyog Pawar /*!
312*c83a76b0SSuyog Pawar ******************************************************************************
313*c83a76b0SSuyog Pawar * \if Function name : ihevce_pu_calc_4x4_blk \endif
314*c83a76b0SSuyog Pawar *
315*c83a76b0SSuyog Pawar * \brief
316*c83a76b0SSuyog Pawar *    4x4 pu (8x8 CU) mode decision using step 8421 method
317*c83a76b0SSuyog Pawar *
318*c83a76b0SSuyog Pawar * \param[in] ps_cu_node : pointer to cu node info buffer
319*c83a76b0SSuyog Pawar * \param[in] pu1_src : pointer to src pixels
320*c83a76b0SSuyog Pawar * \param[in] src_stride : frm source stride
321*c83a76b0SSuyog Pawar * \param[in] ref : pointer to reference pixels for prediction
322*c83a76b0SSuyog Pawar * \param[in] cand_mode_list  : pointer to candidate list buffer
323*c83a76b0SSuyog Pawar * \param[in] best_costs_4x4  : pointer to 3 best cost buffer
324*c83a76b0SSuyog Pawar * \param[in] best_modes_4x4  : pointer to 3 best mode buffer
325*c83a76b0SSuyog Pawar *
326*c83a76b0SSuyog Pawar * \return
327*c83a76b0SSuyog Pawar *    None
328*c83a76b0SSuyog Pawar *
329*c83a76b0SSuyog Pawar * \author
330*c83a76b0SSuyog Pawar *  Ittiam
331*c83a76b0SSuyog Pawar *
332*c83a76b0SSuyog Pawar *****************************************************************************
333*c83a76b0SSuyog Pawar */
ihevce_pu_calc_4x4_blk(ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,UWORD8 * pu1_src,WORD32 src_stride,UWORD8 * ref,UWORD16 * mode_bits_cost,WORD32 * best_costs_4x4,UWORD8 * best_modes_4x4,func_selector_t * ps_func_selector)334*c83a76b0SSuyog Pawar void ihevce_pu_calc_4x4_blk(
335*c83a76b0SSuyog Pawar     ihevce_ipe_ctxt_t *ps_ctxt,
336*c83a76b0SSuyog Pawar     ihevce_ipe_cu_tree_t *ps_cu_node,
337*c83a76b0SSuyog Pawar     UWORD8 *pu1_src,
338*c83a76b0SSuyog Pawar     WORD32 src_stride,
339*c83a76b0SSuyog Pawar     UWORD8 *ref,
340*c83a76b0SSuyog Pawar     UWORD16 *mode_bits_cost,
341*c83a76b0SSuyog Pawar     WORD32 *best_costs_4x4,
342*c83a76b0SSuyog Pawar     UWORD8 *best_modes_4x4,
343*c83a76b0SSuyog Pawar     func_selector_t *ps_func_selector)
344*c83a76b0SSuyog Pawar {
345*c83a76b0SSuyog Pawar     WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp;
346*c83a76b0SSuyog Pawar     WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out;
347*c83a76b0SSuyog Pawar     UWORD8 u1_use_satd = ps_ctxt->u1_use_satd;
348*c83a76b0SSuyog Pawar     UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on;
349*c83a76b0SSuyog Pawar 
350*c83a76b0SSuyog Pawar     WORD32 i, j = 0, i_end;
351*c83a76b0SSuyog Pawar     UWORD8 mode, best_amode = 255;
352*c83a76b0SSuyog Pawar     UWORD8 pred[16];
353*c83a76b0SSuyog Pawar 
354*c83a76b0SSuyog Pawar     UWORD16 sad;
355*c83a76b0SSuyog Pawar     WORD32 sad_cost = 0;
356*c83a76b0SSuyog Pawar     WORD32 best_asad_cost = 0xFFFFF;
357*c83a76b0SSuyog Pawar     WORD32 temp;
358*c83a76b0SSuyog Pawar     UWORD8 modes_to_eval[5];
359*c83a76b0SSuyog Pawar     WORD32 costs_4x4[5];
360*c83a76b0SSuyog Pawar     UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
361*c83a76b0SSuyog Pawar 
362*c83a76b0SSuyog Pawar     /* LO resolution hence low resolution disable */
363*c83a76b0SSuyog Pawar     WORD32 u1_low_resol = 0;
364*c83a76b0SSuyog Pawar     UWORD8 au1_best_modes[1] = { 0 };
365*c83a76b0SSuyog Pawar     WORD32 ai4_best_sad_costs[1] = { 0 };
366*c83a76b0SSuyog Pawar 
367*c83a76b0SSuyog Pawar     WORD16 *pi2_tmp = &pi2_trans_tmp[0];
368*c83a76b0SSuyog Pawar 
369*c83a76b0SSuyog Pawar     ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list =
370*c83a76b0SSuyog Pawar         &ps_ctxt->s_ipe_optimised_function_list;
371*c83a76b0SSuyog Pawar 
372*c83a76b0SSuyog Pawar     //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1;
373*c83a76b0SSuyog Pawar     //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit;
374*c83a76b0SSuyog Pawar 
375*c83a76b0SSuyog Pawar     for(i = 0; i < 5; i++)
376*c83a76b0SSuyog Pawar     {
377*c83a76b0SSuyog Pawar         costs_4x4[i] = MAX_INTRA_COST_IPE;
378*c83a76b0SSuyog Pawar     }
379*c83a76b0SSuyog Pawar 
380*c83a76b0SSuyog Pawar     ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes(
381*c83a76b0SSuyog Pawar         pu1_src,
382*c83a76b0SSuyog Pawar         src_stride,
383*c83a76b0SSuyog Pawar         ref,
384*c83a76b0SSuyog Pawar         mode_bits_cost,
385*c83a76b0SSuyog Pawar         au1_best_modes,
386*c83a76b0SSuyog Pawar         ai4_best_sad_costs,
387*c83a76b0SSuyog Pawar         u1_low_resol,
388*c83a76b0SSuyog Pawar         ps_ipe_optimised_function_list->pf_4x4_sad_computer);
389*c83a76b0SSuyog Pawar 
390*c83a76b0SSuyog Pawar     best_amode = au1_best_modes[0];
391*c83a76b0SSuyog Pawar     best_asad_cost = ai4_best_sad_costs[0];
392*c83a76b0SSuyog Pawar 
393*c83a76b0SSuyog Pawar     ASSERT(best_amode != 255);
394*c83a76b0SSuyog Pawar     /* Around best level 4 angular mode, search for best level 2 mode */
395*c83a76b0SSuyog Pawar     modes_to_eval[0] = best_amode - 2;
396*c83a76b0SSuyog Pawar     modes_to_eval[1] = best_amode + 2;
397*c83a76b0SSuyog Pawar     i = 0;
398*c83a76b0SSuyog Pawar     i_end = 2;
399*c83a76b0SSuyog Pawar     if(best_amode == 2)
400*c83a76b0SSuyog Pawar         i = 1;
401*c83a76b0SSuyog Pawar     else if(best_amode == 34)
402*c83a76b0SSuyog Pawar         i_end = 1;
403*c83a76b0SSuyog Pawar     for(; i < i_end; i++)
404*c83a76b0SSuyog Pawar     {
405*c83a76b0SSuyog Pawar         mode = modes_to_eval[i];
406*c83a76b0SSuyog Pawar 
407*c83a76b0SSuyog Pawar         g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
408*c83a76b0SSuyog Pawar 
409*c83a76b0SSuyog Pawar         sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
410*c83a76b0SSuyog Pawar 
411*c83a76b0SSuyog Pawar         sad_cost = sad;
412*c83a76b0SSuyog Pawar         sad_cost += mode_bits_cost[mode];
413*c83a76b0SSuyog Pawar 
414*c83a76b0SSuyog Pawar         if(sad_cost < best_asad_cost)
415*c83a76b0SSuyog Pawar         {
416*c83a76b0SSuyog Pawar             best_amode = mode;
417*c83a76b0SSuyog Pawar             best_asad_cost = sad_cost;
418*c83a76b0SSuyog Pawar         }
419*c83a76b0SSuyog Pawar     }
420*c83a76b0SSuyog Pawar 
421*c83a76b0SSuyog Pawar     /* Around best level 2 angular mode, search for best level 1 mode */
422*c83a76b0SSuyog Pawar     /* Also evaluate for non-angular mode */
423*c83a76b0SSuyog Pawar 
424*c83a76b0SSuyog Pawar     i = 0;
425*c83a76b0SSuyog Pawar     /*Level 1 refinement is disabled for ES preset */
426*c83a76b0SSuyog Pawar     if(1 == u1_level_1_refine_on)
427*c83a76b0SSuyog Pawar     {
428*c83a76b0SSuyog Pawar         if(best_amode != 2)
429*c83a76b0SSuyog Pawar             modes_to_eval[i++] = best_amode - 1;
430*c83a76b0SSuyog Pawar         modes_to_eval[i++] = best_amode;
431*c83a76b0SSuyog Pawar     }
432*c83a76b0SSuyog Pawar 
433*c83a76b0SSuyog Pawar     modes_to_eval[i++] = 0;
434*c83a76b0SSuyog Pawar     modes_to_eval[i++] = 1;
435*c83a76b0SSuyog Pawar 
436*c83a76b0SSuyog Pawar     if(1 == u1_level_1_refine_on)
437*c83a76b0SSuyog Pawar     {
438*c83a76b0SSuyog Pawar         if(best_amode != 34)
439*c83a76b0SSuyog Pawar             modes_to_eval[i++] = best_amode + 1;
440*c83a76b0SSuyog Pawar     }
441*c83a76b0SSuyog Pawar     i_end = i;
442*c83a76b0SSuyog Pawar     i = 0;
443*c83a76b0SSuyog Pawar 
444*c83a76b0SSuyog Pawar     for(; i < i_end; i++)
445*c83a76b0SSuyog Pawar     {
446*c83a76b0SSuyog Pawar         mode = modes_to_eval[i];
447*c83a76b0SSuyog Pawar 
448*c83a76b0SSuyog Pawar         g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
449*c83a76b0SSuyog Pawar 
450*c83a76b0SSuyog Pawar         /* Hard coding to use SATD */
451*c83a76b0SSuyog Pawar         if(u1_use_satd)
452*c83a76b0SSuyog Pawar         {
453*c83a76b0SSuyog Pawar             ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr(
454*c83a76b0SSuyog Pawar                 pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, 4, NULL_PLANE);
455*c83a76b0SSuyog Pawar 
456*c83a76b0SSuyog Pawar             sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4);
457*c83a76b0SSuyog Pawar         }
458*c83a76b0SSuyog Pawar         else
459*c83a76b0SSuyog Pawar         {
460*c83a76b0SSuyog Pawar             sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(
461*c83a76b0SSuyog Pawar                 pu1_src, &pred[0], src_stride, 4);
462*c83a76b0SSuyog Pawar         }
463*c83a76b0SSuyog Pawar         sad_cost = sad;
464*c83a76b0SSuyog Pawar         sad_cost += mode_bits_cost[mode];
465*c83a76b0SSuyog Pawar 
466*c83a76b0SSuyog Pawar         costs_4x4[i] = sad_cost;
467*c83a76b0SSuyog Pawar     }
468*c83a76b0SSuyog Pawar 
469*c83a76b0SSuyog Pawar     /* Arrange the reference array in ascending order */
470*c83a76b0SSuyog Pawar     for(i = 0; i < (i_end - 1); i++)
471*c83a76b0SSuyog Pawar     {
472*c83a76b0SSuyog Pawar         for(j = i + 1; j < i_end; j++)
473*c83a76b0SSuyog Pawar         {
474*c83a76b0SSuyog Pawar             if(costs_4x4[i] > costs_4x4[j])
475*c83a76b0SSuyog Pawar             {
476*c83a76b0SSuyog Pawar                 temp = costs_4x4[i];
477*c83a76b0SSuyog Pawar                 costs_4x4[i] = costs_4x4[j];
478*c83a76b0SSuyog Pawar                 costs_4x4[j] = temp;
479*c83a76b0SSuyog Pawar 
480*c83a76b0SSuyog Pawar                 temp = modes_4x4[i];
481*c83a76b0SSuyog Pawar                 modes_4x4[i] = modes_4x4[j];
482*c83a76b0SSuyog Pawar                 modes_4x4[j] = temp;
483*c83a76b0SSuyog Pawar             }
484*c83a76b0SSuyog Pawar         }
485*c83a76b0SSuyog Pawar     }
486*c83a76b0SSuyog Pawar     for(i = 0; i < 3; i++)
487*c83a76b0SSuyog Pawar     {
488*c83a76b0SSuyog Pawar         best_costs_4x4[i] = costs_4x4[i];
489*c83a76b0SSuyog Pawar         best_modes_4x4[i] = modes_to_eval[modes_4x4[i]];
490*c83a76b0SSuyog Pawar     }
491*c83a76b0SSuyog Pawar 
492*c83a76b0SSuyog Pawar     {
493*c83a76b0SSuyog Pawar         ps_cu_node->best_mode = best_modes_4x4[0];
494*c83a76b0SSuyog Pawar         ps_cu_node->best_cost = best_costs_4x4[0];
495*c83a76b0SSuyog Pawar         ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode];
496*c83a76b0SSuyog Pawar     }
497*c83a76b0SSuyog Pawar }
498*c83a76b0SSuyog Pawar 
499*c83a76b0SSuyog Pawar /*!
500*c83a76b0SSuyog Pawar ******************************************************************************
501*c83a76b0SSuyog Pawar * \if Function name : ihevce_pu_calc_8x8_blk \endif
502*c83a76b0SSuyog Pawar *
503*c83a76b0SSuyog Pawar * \brief
504*c83a76b0SSuyog Pawar *    4x4 pu (8x8 CU) mode decision loop using step 8421 method
505*c83a76b0SSuyog Pawar *
506*c83a76b0SSuyog Pawar * \param[in] ps_curr_src : pointer to src pixels struct
507*c83a76b0SSuyog Pawar * \param[in] ps_ctxt : pointer to IPE context struct
508*c83a76b0SSuyog Pawar * \param[in] ps_cu_node : pointer to cu node info buffer
509*c83a76b0SSuyog Pawar *
510*c83a76b0SSuyog Pawar * \return
511*c83a76b0SSuyog Pawar *    None
512*c83a76b0SSuyog Pawar *
513*c83a76b0SSuyog Pawar * \author
514*c83a76b0SSuyog Pawar *  Ittiam
515*c83a76b0SSuyog Pawar *
516*c83a76b0SSuyog Pawar *****************************************************************************
517*c83a76b0SSuyog Pawar */
ihevce_pu_calc_8x8_blk(iv_enc_yuv_buf_t * ps_curr_src,ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,func_selector_t * ps_func_selector)518*c83a76b0SSuyog Pawar void ihevce_pu_calc_8x8_blk(
519*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_t *ps_curr_src,
520*c83a76b0SSuyog Pawar     ihevce_ipe_ctxt_t *ps_ctxt,
521*c83a76b0SSuyog Pawar     ihevce_ipe_cu_tree_t *ps_cu_node,
522*c83a76b0SSuyog Pawar     func_selector_t *ps_func_selector)
523*c83a76b0SSuyog Pawar {
524*c83a76b0SSuyog Pawar     WORD32 i, j;
525*c83a76b0SSuyog Pawar     WORD32 nbr_flags;
526*c83a76b0SSuyog Pawar     nbr_avail_flags_t s_nbr;
527*c83a76b0SSuyog Pawar     WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1;
528*c83a76b0SSuyog Pawar 
529*c83a76b0SSuyog Pawar     UWORD8 *pu1_src_4x4;
530*c83a76b0SSuyog Pawar     WORD32 xA, xB, yA, yB;
531*c83a76b0SSuyog Pawar     //WORD32 x, y, size;
532*c83a76b0SSuyog Pawar     WORD32 top_intra_mode;
533*c83a76b0SSuyog Pawar     WORD32 left_intra_mode;
534*c83a76b0SSuyog Pawar     //    WORD8 *top_intra_mode_ptr;
535*c83a76b0SSuyog Pawar     //  WORD8 *left_intra_mode_ptr;
536*c83a76b0SSuyog Pawar     UWORD8 *pu1_orig;
537*c83a76b0SSuyog Pawar     WORD32 src_strd = ps_curr_src->i4_y_strd;
538*c83a76b0SSuyog Pawar 
539*c83a76b0SSuyog Pawar     WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1;
540*c83a76b0SSuyog Pawar     WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1;
541*c83a76b0SSuyog Pawar     ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
542*c83a76b0SSuyog Pawar 
543*c83a76b0SSuyog Pawar     ihevc_intra_pred_luma_ref_substitution_fptr =
544*c83a76b0SSuyog Pawar         ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
545*c83a76b0SSuyog Pawar 
546*c83a76b0SSuyog Pawar     pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) +
547*c83a76b0SSuyog Pawar                ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) +
548*c83a76b0SSuyog Pawar                (ps_cu_node->ps_parent->u2_x0 << 3);
549*c83a76b0SSuyog Pawar     for(i = 0; i < 2; i++)
550*c83a76b0SSuyog Pawar     {
551*c83a76b0SSuyog Pawar         for(j = 0; j < 2; j++)
552*c83a76b0SSuyog Pawar         {
553*c83a76b0SSuyog Pawar             WORD32 cand_mode_list[3];
554*c83a76b0SSuyog Pawar             pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size);
555*c83a76b0SSuyog Pawar             /* get the neighbour availability flags */
556*c83a76b0SSuyog Pawar             nbr_flags = ihevce_get_nbr_intra(
557*c83a76b0SSuyog Pawar                 &s_nbr,
558*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
559*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
560*c83a76b0SSuyog Pawar                 cu_pos_x + ((j) * (trans_size >> 2)),
561*c83a76b0SSuyog Pawar                 cu_pos_y + ((i) * (trans_size >> 2)),
562*c83a76b0SSuyog Pawar                 trans_size >> 2);
563*c83a76b0SSuyog Pawar 
564*c83a76b0SSuyog Pawar             /* call the function which populates sad cost for all the modes */
565*c83a76b0SSuyog Pawar             xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j;
566*c83a76b0SSuyog Pawar             yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
567*c83a76b0SSuyog Pawar             xB = xA + 1;
568*c83a76b0SSuyog Pawar             yB = yA - 1;
569*c83a76b0SSuyog Pawar             left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
570*c83a76b0SSuyog Pawar             top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
571*c83a76b0SSuyog Pawar 
572*c83a76b0SSuyog Pawar             ihevce_intra_populate_mode_bits_cost_bracketing(
573*c83a76b0SSuyog Pawar                 top_intra_mode,
574*c83a76b0SSuyog Pawar                 left_intra_mode,
575*c83a76b0SSuyog Pawar                 s_nbr.u1_top_avail,
576*c83a76b0SSuyog Pawar                 s_nbr.u1_left_avail,
577*c83a76b0SSuyog Pawar                 ps_cu_node->ps_parent->u2_y0,
578*c83a76b0SSuyog Pawar                 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
579*c83a76b0SSuyog Pawar                 &ps_ctxt->au2_mode_bits_8x8_pu[0],
580*c83a76b0SSuyog Pawar                 ps_ctxt->i4_ol_sad_lambda,
581*c83a76b0SSuyog Pawar                 cand_mode_list);
582*c83a76b0SSuyog Pawar 
583*c83a76b0SSuyog Pawar             /* call the function which populates ref data for intra predicion */
584*c83a76b0SSuyog Pawar             ihevc_intra_pred_luma_ref_substitution_fptr(
585*c83a76b0SSuyog Pawar                 pu1_src_4x4 - src_strd - 1,
586*c83a76b0SSuyog Pawar                 pu1_src_4x4 - src_strd,
587*c83a76b0SSuyog Pawar                 pu1_src_4x4 - 1,
588*c83a76b0SSuyog Pawar                 src_strd,
589*c83a76b0SSuyog Pawar                 4,
590*c83a76b0SSuyog Pawar                 nbr_flags,
591*c83a76b0SSuyog Pawar                 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
592*c83a76b0SSuyog Pawar                 0);
593*c83a76b0SSuyog Pawar 
594*c83a76b0SSuyog Pawar             ihevce_pu_calc_4x4_blk(
595*c83a76b0SSuyog Pawar                 ps_ctxt,
596*c83a76b0SSuyog Pawar                 ps_cu_node->ps_sub_cu[(i * 2) + j],
597*c83a76b0SSuyog Pawar                 pu1_src_4x4,
598*c83a76b0SSuyog Pawar                 src_strd,
599*c83a76b0SSuyog Pawar                 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
600*c83a76b0SSuyog Pawar                 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
601*c83a76b0SSuyog Pawar                 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0],
602*c83a76b0SSuyog Pawar                 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0],
603*c83a76b0SSuyog Pawar                 ps_func_selector);
604*c83a76b0SSuyog Pawar 
605*c83a76b0SSuyog Pawar             /*&au4_cost_4x4[i*2 + j][0],
606*c83a76b0SSuyog Pawar                 &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition
607*c83a76b0SSuyog Pawar 
608*c83a76b0SSuyog Pawar             ihevce_set_nbr_map(
609*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
610*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
611*c83a76b0SSuyog Pawar                 cu_pos_x + ((j) * (trans_size >> 2)),
612*c83a76b0SSuyog Pawar                 cu_pos_y + ((i) * (trans_size >> 2)),
613*c83a76b0SSuyog Pawar                 (trans_size >> 2),
614*c83a76b0SSuyog Pawar                 1);
615*c83a76b0SSuyog Pawar 
616*c83a76b0SSuyog Pawar             xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j;
617*c83a76b0SSuyog Pawar             yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
618*c83a76b0SSuyog Pawar             ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode;
619*c83a76b0SSuyog Pawar             ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost =
620*c83a76b0SSuyog Pawar                 ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode];
621*c83a76b0SSuyog Pawar         }
622*c83a76b0SSuyog Pawar     }
623*c83a76b0SSuyog Pawar }
624*c83a76b0SSuyog Pawar 
625*c83a76b0SSuyog Pawar /*!
626*c83a76b0SSuyog Pawar ******************************************************************************
627*c83a76b0SSuyog Pawar * \if Function name : ihevce_bracketing_analysis \endif
628*c83a76b0SSuyog Pawar *
629*c83a76b0SSuyog Pawar * \brief
630*c83a76b0SSuyog Pawar *    Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size
631*c83a76b0SSuyog Pawar *    info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn
632*c83a76b0SSuyog Pawar *    2 CUS and determines the actual CU size and best 3 modes to be given to rdopt
633*c83a76b0SSuyog Pawar *
634*c83a76b0SSuyog Pawar * \param[in] ps_ctxt : pointer to IPE context struct
635*c83a76b0SSuyog Pawar * \param[in] ps_cu_node : pointer to cu node info buffer
636*c83a76b0SSuyog Pawar * \param[in] ps_curr_src : pointer to src pixels struct
637*c83a76b0SSuyog Pawar * \param[in] ps_ctb_out : pointer to ip ctb out struct
638*c83a76b0SSuyog Pawar * \param[in] ps_row_cu : pointer to cu analyse struct
639*c83a76b0SSuyog Pawar * \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct
640*c83a76b0SSuyog Pawar * \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct
641*c83a76b0SSuyog Pawar * \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct
642*c83a76b0SSuyog Pawar *
643*c83a76b0SSuyog Pawar * \return
644*c83a76b0SSuyog Pawar *    None
645*c83a76b0SSuyog Pawar *
646*c83a76b0SSuyog Pawar * \author
647*c83a76b0SSuyog Pawar *  Ittiam
648*c83a76b0SSuyog Pawar *
649*c83a76b0SSuyog Pawar *****************************************************************************
650*c83a76b0SSuyog Pawar */
ihevce_bracketing_analysis(ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,iv_enc_yuv_buf_t * ps_curr_src,ctb_analyse_t * ps_ctb_out,ihevce_ed_blk_t * ps_ed_l1_ctb,ihevce_ed_blk_t * ps_ed_l2_ctb,ihevce_ed_ctb_l1_t * ps_ed_ctb_l1,ipe_l0_ctb_analyse_for_me_t * ps_l0_ipe_out_ctb)651*c83a76b0SSuyog Pawar void ihevce_bracketing_analysis(
652*c83a76b0SSuyog Pawar     ihevce_ipe_ctxt_t *ps_ctxt,
653*c83a76b0SSuyog Pawar     ihevce_ipe_cu_tree_t *ps_cu_node,
654*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_t *ps_curr_src,
655*c83a76b0SSuyog Pawar     ctb_analyse_t *ps_ctb_out,
656*c83a76b0SSuyog Pawar     //cu_analyse_t         *ps_row_cu,
657*c83a76b0SSuyog Pawar     ihevce_ed_blk_t *ps_ed_l1_ctb,
658*c83a76b0SSuyog Pawar     ihevce_ed_blk_t *ps_ed_l2_ctb,
659*c83a76b0SSuyog Pawar     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
660*c83a76b0SSuyog Pawar     ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb)
661*c83a76b0SSuyog Pawar {
662*c83a76b0SSuyog Pawar     WORD32 cu_pos_x = 0;
663*c83a76b0SSuyog Pawar     WORD32 cu_pos_y = 0;
664*c83a76b0SSuyog Pawar 
665*c83a76b0SSuyog Pawar     UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width;
666*c83a76b0SSuyog Pawar     UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height;
667*c83a76b0SSuyog Pawar     WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3);
668*c83a76b0SSuyog Pawar     WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3);
669*c83a76b0SSuyog Pawar 
670*c83a76b0SSuyog Pawar     ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb;
671*c83a76b0SSuyog Pawar     ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb;
672*c83a76b0SSuyog Pawar 
673*c83a76b0SSuyog Pawar     WORD32 i;
674*c83a76b0SSuyog Pawar     WORD32 cand_mode_list[3];
675*c83a76b0SSuyog Pawar     //cu_analyse_t *ps_curr_cu = ps_row_cu;
676*c83a76b0SSuyog Pawar     WORD32 blk_cnt = 0;
677*c83a76b0SSuyog Pawar     WORD32 j = 0;
678*c83a76b0SSuyog Pawar     WORD32 merge_32x32_l1, merge_32x32_l2;
679*c83a76b0SSuyog Pawar 
680*c83a76b0SSuyog Pawar     WORD32 i4_skip_intra_eval_32x32_l1;
681*c83a76b0SSuyog Pawar     //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block
682*c83a76b0SSuyog Pawar 
683*c83a76b0SSuyog Pawar     WORD32 parent_cost = 0;
684*c83a76b0SSuyog Pawar     WORD32 child_cost[4] = { 0 };
685*c83a76b0SSuyog Pawar     WORD32 child_cost_least = 0;
686*c83a76b0SSuyog Pawar     WORD32 child_satd[4] = { 0 };
687*c83a76b0SSuyog Pawar     WORD32 x, y, size;
688*c83a76b0SSuyog Pawar     WORD32 merge_64x64 = 1;
689*c83a76b0SSuyog Pawar     UWORD8 au1_best_32x32_modes[4];
690*c83a76b0SSuyog Pawar     WORD32 au4_best_32x32_cost[4];
691*c83a76b0SSuyog Pawar     WORD32 parent_best_mode;
692*c83a76b0SSuyog Pawar     UWORD8 best_mode;
693*c83a76b0SSuyog Pawar 
694*c83a76b0SSuyog Pawar     WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
695*c83a76b0SSuyog Pawar     /* flag to control 1CU-4TU modes based on quality preset                */
696*c83a76b0SSuyog Pawar     /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */
697*c83a76b0SSuyog Pawar     WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
698*c83a76b0SSuyog Pawar                                (i4_quality_preset == IHEVCE_QUALITY_P0);
699*c83a76b0SSuyog Pawar 
700*c83a76b0SSuyog Pawar     /* flag to control 4CU-16TU mode based on quality preset                */
701*c83a76b0SSuyog Pawar     /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/
702*c83a76b0SSuyog Pawar     WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
703*c83a76b0SSuyog Pawar                                 (i4_quality_preset == IHEVCE_QUALITY_P0);
704*c83a76b0SSuyog Pawar 
705*c83a76b0SSuyog Pawar     WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN;  //2;
706*c83a76b0SSuyog Pawar     float f_strength;
707*c83a76b0SSuyog Pawar     /* Accumalte satd */
708*c83a76b0SSuyog Pawar     LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0;
709*c83a76b0SSuyog Pawar     WORD32 i4_ctb_acc_satd = 0;
710*c83a76b0SSuyog Pawar 
711*c83a76b0SSuyog Pawar     /* Accumalate Mode bits cost */
712*c83a76b0SSuyog Pawar     LWORD64 i8_frame_acc_mode_bits_cost = 0;
713*c83a76b0SSuyog Pawar 
714*c83a76b0SSuyog Pawar     /* Step2 is bypassed for parent, uses children modes*/
715*c83a76b0SSuyog Pawar     WORD32 step2_bypass = 1;
716*c83a76b0SSuyog Pawar 
717*c83a76b0SSuyog Pawar     if(1 == ps_ctxt->u1_disable_child_cu_decide)
718*c83a76b0SSuyog Pawar         step2_bypass = 0;
719*c83a76b0SSuyog Pawar 
720*c83a76b0SSuyog Pawar     ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree;
721*c83a76b0SSuyog Pawar     for(i = 0; i < 4; i++)
722*c83a76b0SSuyog Pawar     {
723*c83a76b0SSuyog Pawar         ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i;
724*c83a76b0SSuyog Pawar     }
725*c83a76b0SSuyog Pawar 
726*c83a76b0SSuyog Pawar     /* Loop for all 8x8 block in a CTB */
727*c83a76b0SSuyog Pawar     ps_ctb_out->u4_cu_split_flags = 0x1;
728*c83a76b0SSuyog Pawar 
729*c83a76b0SSuyog Pawar     /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */
730*c83a76b0SSuyog Pawar     for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++)
731*c83a76b0SSuyog Pawar     {
732*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE;
733*c83a76b0SSuyog Pawar     }
734*c83a76b0SSuyog Pawar 
735*c83a76b0SSuyog Pawar     for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++)
736*c83a76b0SSuyog Pawar     {
737*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE;
738*c83a76b0SSuyog Pawar     }
739*c83a76b0SSuyog Pawar 
740*c83a76b0SSuyog Pawar     for(i = 0; i < (MAX_CU_IN_CTB); i++)
741*c83a76b0SSuyog Pawar     {
742*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE;
743*c83a76b0SSuyog Pawar     }
744*c83a76b0SSuyog Pawar 
745*c83a76b0SSuyog Pawar     ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE;
746*c83a76b0SSuyog Pawar 
747*c83a76b0SSuyog Pawar     /* by default 64x64 modes are set to default values DC and Planar */
748*c83a76b0SSuyog Pawar     ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0;
749*c83a76b0SSuyog Pawar     ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1;
750*c83a76b0SSuyog Pawar     ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255;
751*c83a76b0SSuyog Pawar 
752*c83a76b0SSuyog Pawar     /* by default 64x4 split is set to 1 */
753*c83a76b0SSuyog Pawar     ps_l0_ipe_out_ctb->u1_split_flag = 1;
754*c83a76b0SSuyog Pawar 
755*c83a76b0SSuyog Pawar     /* Modulation factor calculated based on spatial variance instead of hardcoded val*/
756*c83a76b0SSuyog Pawar     i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1];  //16;
757*c83a76b0SSuyog Pawar 
758*c83a76b0SSuyog Pawar     f_strength = ps_ctxt->f_strength;
759*c83a76b0SSuyog Pawar 
760*c83a76b0SSuyog Pawar     /* ------------------------------------------------ */
761*c83a76b0SSuyog Pawar     /* populate the early decisions done by L1 analysis */
762*c83a76b0SSuyog Pawar     /* ------------------------------------------------ */
763*c83a76b0SSuyog Pawar     for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++)
764*c83a76b0SSuyog Pawar     {
765*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[i] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i];
766*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[i] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i];
767*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[i] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i];
768*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[i] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i];
769*c83a76b0SSuyog Pawar     }
770*c83a76b0SSuyog Pawar 
771*c83a76b0SSuyog Pawar     /* Init CTB level accumalated SATD and MPM bits */
772*c83a76b0SSuyog Pawar     ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0;
773*c83a76b0SSuyog Pawar     ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0;
774*c83a76b0SSuyog Pawar 
775*c83a76b0SSuyog Pawar     /* ------------------------------------------------ */
776*c83a76b0SSuyog Pawar     /* Loop over all the blocks in current CTB          */
777*c83a76b0SSuyog Pawar     /* ------------------------------------------------ */
778*c83a76b0SSuyog Pawar     {
779*c83a76b0SSuyog Pawar         /* 64 8x8 blocks should be encountered for the do,while loop to exit */
780*c83a76b0SSuyog Pawar         do
781*c83a76b0SSuyog Pawar         {
782*c83a76b0SSuyog Pawar             intra32_analyse_t *ps_intra32_analyse;
783*c83a76b0SSuyog Pawar             intra16_analyse_t *ps_intra16_analyse;
784*c83a76b0SSuyog Pawar             WORD32 *pi4_intra_32_cost;
785*c83a76b0SSuyog Pawar             WORD32 *pi4_intra_16_cost;
786*c83a76b0SSuyog Pawar             WORD32 *pi4_intra_8_cost;
787*c83a76b0SSuyog Pawar             WORD32 merge_16x16_l1;
788*c83a76b0SSuyog Pawar 
789*c83a76b0SSuyog Pawar             /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */
790*c83a76b0SSuyog Pawar             cu_pos_x = gau1_cu_pos_x[blk_cnt];
791*c83a76b0SSuyog Pawar             cu_pos_y = gau1_cu_pos_y[blk_cnt];
792*c83a76b0SSuyog Pawar 
793*c83a76b0SSuyog Pawar             /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */
794*c83a76b0SSuyog Pawar             au1_best_32x32_modes[blk_cnt >> 4] = 255;
795*c83a76b0SSuyog Pawar 
796*c83a76b0SSuyog Pawar             /* get the corresponding intra 32 analyse pointer  use (blk_cnt / 16) */
797*c83a76b0SSuyog Pawar             /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
798*c83a76b0SSuyog Pawar             ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4];
799*c83a76b0SSuyog Pawar 
800*c83a76b0SSuyog Pawar             /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
801*c83a76b0SSuyog Pawar             /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
802*c83a76b0SSuyog Pawar             ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2];
803*c83a76b0SSuyog Pawar 
804*c83a76b0SSuyog Pawar             /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */
805*c83a76b0SSuyog Pawar             if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
806*c83a76b0SSuyog Pawar             {
807*c83a76b0SSuyog Pawar                 /* Reset to zero for every cu decision */
808*c83a76b0SSuyog Pawar                 merge_32x32_l1 = 0;
809*c83a76b0SSuyog Pawar 
810*c83a76b0SSuyog Pawar                 child_cost_least = 0;
811*c83a76b0SSuyog Pawar 
812*c83a76b0SSuyog Pawar                 /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */
813*c83a76b0SSuyog Pawar                 ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2);
814*c83a76b0SSuyog Pawar 
815*c83a76b0SSuyog Pawar                 pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4];
816*c83a76b0SSuyog Pawar 
817*c83a76b0SSuyog Pawar                 /* by default 32x32 modes are set to default values DC and Planar */
818*c83a76b0SSuyog Pawar                 ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0;
819*c83a76b0SSuyog Pawar                 ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1;
820*c83a76b0SSuyog Pawar                 ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255;
821*c83a76b0SSuyog Pawar 
822*c83a76b0SSuyog Pawar                 /* By default 32x32 split is set to 1 */
823*c83a76b0SSuyog Pawar                 ps_intra32_analyse->b1_split_flag = 1;
824*c83a76b0SSuyog Pawar 
825*c83a76b0SSuyog Pawar                 ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0;
826*c83a76b0SSuyog Pawar                 ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1;
827*c83a76b0SSuyog Pawar                 ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255;
828*c83a76b0SSuyog Pawar 
829*c83a76b0SSuyog Pawar                 /* 16x16 cost & 8x8 cost are stored in Raster scan order */
830*c83a76b0SSuyog Pawar                 /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1      */
831*c83a76b0SSuyog Pawar                 /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW             */
832*c83a76b0SSuyog Pawar                 {
833*c83a76b0SSuyog Pawar                     WORD32 pos_x_8x8, pos_y_8x8;
834*c83a76b0SSuyog Pawar 
835*c83a76b0SSuyog Pawar                     pos_x_8x8 = gau1_cu_pos_x[blk_cnt];
836*c83a76b0SSuyog Pawar                     pos_y_8x8 = gau1_cu_pos_y[blk_cnt];
837*c83a76b0SSuyog Pawar 
838*c83a76b0SSuyog Pawar                     pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0];
839*c83a76b0SSuyog Pawar 
840*c83a76b0SSuyog Pawar                     pi4_intra_16_cost +=
841*c83a76b0SSuyog Pawar                         ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1)));
842*c83a76b0SSuyog Pawar 
843*c83a76b0SSuyog Pawar                     pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0];
844*c83a76b0SSuyog Pawar 
845*c83a76b0SSuyog Pawar                     pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW));
846*c83a76b0SSuyog Pawar                 }
847*c83a76b0SSuyog Pawar 
848*c83a76b0SSuyog Pawar                 merge_32x32_l1 = 0;
849*c83a76b0SSuyog Pawar                 merge_32x32_l2 = 0;
850*c83a76b0SSuyog Pawar                 i4_skip_intra_eval_32x32_l1 = 0;
851*c83a76b0SSuyog Pawar 
852*c83a76b0SSuyog Pawar                 /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */
853*c83a76b0SSuyog Pawar                 merge_16x16_l1 = 0;
854*c83a76b0SSuyog Pawar                 if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2))
855*c83a76b0SSuyog Pawar                 {
856*c83a76b0SSuyog Pawar #if !ENABLE_UNIFORM_CU_SIZE_8x8
857*c83a76b0SSuyog Pawar                     merge_16x16_l1 = ps_ed_blk_l1->merge_success;
858*c83a76b0SSuyog Pawar #else
859*c83a76b0SSuyog Pawar                     merge_16x16_l1 = 0;
860*c83a76b0SSuyog Pawar #endif
861*c83a76b0SSuyog Pawar                 }
862*c83a76b0SSuyog Pawar 
863*c83a76b0SSuyog Pawar                 /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */
864*c83a76b0SSuyog Pawar                 if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4))
865*c83a76b0SSuyog Pawar                 {
866*c83a76b0SSuyog Pawar                     /* Check 4 flags of L1(8x8) say merge */
867*c83a76b0SSuyog Pawar                     for(i = 0; i < 4; i++)
868*c83a76b0SSuyog Pawar                     {
869*c83a76b0SSuyog Pawar                         merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success;
870*c83a76b0SSuyog Pawar 
871*c83a76b0SSuyog Pawar                         //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval
872*c83a76b0SSuyog Pawar                         i4_skip_intra_eval_32x32_l1 +=
873*c83a76b0SSuyog Pawar                             ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0;
874*c83a76b0SSuyog Pawar                     }
875*c83a76b0SSuyog Pawar 
876*c83a76b0SSuyog Pawar #if !ENABLE_UNIFORM_CU_SIZE_8x8
877*c83a76b0SSuyog Pawar                     /* Check 1 flag from L2(16x16) say merge */
878*c83a76b0SSuyog Pawar                     merge_32x32_l2 = ps_ed_blk_l2->merge_success;
879*c83a76b0SSuyog Pawar #else
880*c83a76b0SSuyog Pawar                     merge_32x32_l1 = 0;
881*c83a76b0SSuyog Pawar                     merge_32x32_l2 = 0;
882*c83a76b0SSuyog Pawar #endif
883*c83a76b0SSuyog Pawar                 }
884*c83a76b0SSuyog Pawar 
885*c83a76b0SSuyog Pawar #if DISABLE_L2_IPE_IN_PB_L1_IN_B
886*c83a76b0SSuyog Pawar                 if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE))
887*c83a76b0SSuyog Pawar                 {
888*c83a76b0SSuyog Pawar                     merge_32x32_l2 = 0;
889*c83a76b0SSuyog Pawar                     ps_ed_blk_l2->merge_success = 0;
890*c83a76b0SSuyog Pawar                 }
891*c83a76b0SSuyog Pawar #endif
892*c83a76b0SSuyog Pawar 
893*c83a76b0SSuyog Pawar                 ps_intra32_analyse->b1_valid_cu = 1;
894*c83a76b0SSuyog Pawar 
895*c83a76b0SSuyog Pawar                 /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */
896*c83a76b0SSuyog Pawar                 /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval,
897*c83a76b0SSuyog Pawar                 if no blocks to be skipped then eval entire 32x32,
898*c83a76b0SSuyog Pawar                 else break the merge and go to 16x16 level eval */
899*c83a76b0SSuyog Pawar                 if((merge_32x32_l1 == 4) && merge_32x32_l2 &&
900*c83a76b0SSuyog Pawar                    ((i4_skip_intra_eval_32x32_l1 == 0) ||
901*c83a76b0SSuyog Pawar                     (i4_skip_intra_eval_32x32_l1 == 4))  //comment this line to disable break-merge
902*c83a76b0SSuyog Pawar                 )
903*c83a76b0SSuyog Pawar                 {
904*c83a76b0SSuyog Pawar #if IP_DBG_L1_l2
905*c83a76b0SSuyog Pawar                     /* Populate params for 32x32 block analysis */
906*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
907*c83a76b0SSuyog Pawar 
908*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u1_cu_size = 32;
909*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
910*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
911*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode;
912*c83a76b0SSuyog Pawar                     /* CU size 32x32 and fill the final cu params */
913*c83a76b0SSuyog Pawar 
914*c83a76b0SSuyog Pawar                     ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
915*c83a76b0SSuyog Pawar 
916*c83a76b0SSuyog Pawar                     /* Increment pointers */
917*c83a76b0SSuyog Pawar                     ps_ed_blk_l1 += 16;
918*c83a76b0SSuyog Pawar                     blk_cnt += 16;
919*c83a76b0SSuyog Pawar                     ps_row_cu++;
920*c83a76b0SSuyog Pawar                     merge_64x64 &= 1;
921*c83a76b0SSuyog Pawar #else
922*c83a76b0SSuyog Pawar 
923*c83a76b0SSuyog Pawar                     /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/
924*c83a76b0SSuyog Pawar                     if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE))
925*c83a76b0SSuyog Pawar                     {
926*c83a76b0SSuyog Pawar                         WORD32 i4_local_ctr1, i4_local_ctr2;
927*c83a76b0SSuyog Pawar 
928*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
929*c83a76b0SSuyog Pawar 
930*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u1_cu_size = 32;
931*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_x0 =
932*c83a76b0SSuyog Pawar                             gau1_cu_pos_x[blk_cnt]; /* Populate properly */
933*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_y0 =
934*c83a76b0SSuyog Pawar                             gau1_cu_pos_y[blk_cnt]; /* Populate properly */
935*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->best_mode =
936*c83a76b0SSuyog Pawar                             INTRA_DC;  //ps_ed_blk_l2->best_merge_mode;
937*c83a76b0SSuyog Pawar                         /* CU size 32x32 and fill the final cu params */
938*c83a76b0SSuyog Pawar 
939*c83a76b0SSuyog Pawar                         /* fill in the first modes as invalid */
940*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
941*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
942*c83a76b0SSuyog Pawar                             INTRA_DC;  //for safery. Since update_cand_list will set num_modes as 3
943*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
944*c83a76b0SSuyog Pawar 
945*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
946*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
947*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
948*c83a76b0SSuyog Pawar 
949*c83a76b0SSuyog Pawar                         ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
950*c83a76b0SSuyog Pawar 
951*c83a76b0SSuyog Pawar                         //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
952*c83a76b0SSuyog Pawar                         //ps_row_cu->u1_num_intra_rdopt_cands = 0;
953*c83a76b0SSuyog Pawar 
954*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_valid_cu = 0;
955*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_split_flag = 0;
956*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_merge_flag = 0;
957*c83a76b0SSuyog Pawar                         /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu,
958*c83a76b0SSuyog Pawar                         255,
959*c83a76b0SSuyog Pawar                         NUM_BEST_MODES);
960*c83a76b0SSuyog Pawar                         memset (&ps_intra32_analyse->au1_best_modes_16x16_tu,
961*c83a76b0SSuyog Pawar                         255,
962*c83a76b0SSuyog Pawar                         NUM_BEST_MODES);*/
963*c83a76b0SSuyog Pawar                         //set only first mode since if it's 255. it wont go ahead
964*c83a76b0SSuyog Pawar                         ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255;
965*c83a76b0SSuyog Pawar                         ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255;
966*c83a76b0SSuyog Pawar 
967*c83a76b0SSuyog Pawar                         *pi4_intra_32_cost = MAX_INTRA_COST_IPE;
968*c83a76b0SSuyog Pawar 
969*c83a76b0SSuyog Pawar                         /*since ME will start evaluating from bottom up, set the lower
970*c83a76b0SSuyog Pawar                         cu size data invalid */
971*c83a76b0SSuyog Pawar                         for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++)
972*c83a76b0SSuyog Pawar                         {
973*c83a76b0SSuyog Pawar                             WORD32 *pi4_intra_8_cost_curr16;
974*c83a76b0SSuyog Pawar 
975*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
976*c83a76b0SSuyog Pawar                                 .au1_best_modes_16x16_tu[0] = 255;
977*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
978*c83a76b0SSuyog Pawar                                 .au1_best_modes_8x8_tu[0] = 255;
979*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0;
980*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0;
981*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0;
982*c83a76b0SSuyog Pawar 
983*c83a76b0SSuyog Pawar                             pi4_intra_16_cost
984*c83a76b0SSuyog Pawar                                 [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) *
985*c83a76b0SSuyog Pawar                                                         (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE;
986*c83a76b0SSuyog Pawar 
987*c83a76b0SSuyog Pawar                             pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1);
988*c83a76b0SSuyog Pawar                             pi4_intra_8_cost_curr16 +=
989*c83a76b0SSuyog Pawar                                 ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW;
990*c83a76b0SSuyog Pawar 
991*c83a76b0SSuyog Pawar                             for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++)
992*c83a76b0SSuyog Pawar                             {
993*c83a76b0SSuyog Pawar                                 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
994*c83a76b0SSuyog Pawar                                     .as_intra8_analyse[i4_local_ctr2]
995*c83a76b0SSuyog Pawar                                     .au1_4x4_best_modes[0][0] = 255;
996*c83a76b0SSuyog Pawar                                 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
997*c83a76b0SSuyog Pawar                                     .as_intra8_analyse[i4_local_ctr2]
998*c83a76b0SSuyog Pawar                                     .au1_4x4_best_modes[1][0] = 255;
999*c83a76b0SSuyog Pawar                                 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1000*c83a76b0SSuyog Pawar                                     .as_intra8_analyse[i4_local_ctr2]
1001*c83a76b0SSuyog Pawar                                     .au1_4x4_best_modes[2][0] = 255;
1002*c83a76b0SSuyog Pawar                                 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1003*c83a76b0SSuyog Pawar                                     .as_intra8_analyse[i4_local_ctr2]
1004*c83a76b0SSuyog Pawar                                     .au1_4x4_best_modes[3][0] = 255;
1005*c83a76b0SSuyog Pawar                                 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1006*c83a76b0SSuyog Pawar                                     .as_intra8_analyse[i4_local_ctr2]
1007*c83a76b0SSuyog Pawar                                     .au1_best_modes_8x8_tu[0] = 255;
1008*c83a76b0SSuyog Pawar                                 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1009*c83a76b0SSuyog Pawar                                     .as_intra8_analyse[i4_local_ctr2]
1010*c83a76b0SSuyog Pawar                                     .au1_best_modes_4x4_tu[0] = 255;
1011*c83a76b0SSuyog Pawar                                 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1012*c83a76b0SSuyog Pawar                                     .as_intra8_analyse[i4_local_ctr2]
1013*c83a76b0SSuyog Pawar                                     .b1_valid_cu = 0;
1014*c83a76b0SSuyog Pawar 
1015*c83a76b0SSuyog Pawar                                 pi4_intra_8_cost_curr16
1016*c83a76b0SSuyog Pawar                                     [(i4_local_ctr2 & 1) +
1017*c83a76b0SSuyog Pawar                                      (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] =
1018*c83a76b0SSuyog Pawar                                         MAX_INTRA_COST_IPE;
1019*c83a76b0SSuyog Pawar                             }
1020*c83a76b0SSuyog Pawar                         }
1021*c83a76b0SSuyog Pawar 
1022*c83a76b0SSuyog Pawar                         /* set neighbours even if intra is not evaluated, since source is always available. */
1023*c83a76b0SSuyog Pawar                         ihevce_set_nbr_map(
1024*c83a76b0SSuyog Pawar                             ps_ctxt->pu1_ctb_nbr_map,
1025*c83a76b0SSuyog Pawar                             ps_ctxt->i4_nbr_map_strd,
1026*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->u2_x0 << 1,
1027*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->u2_y0 << 1,
1028*c83a76b0SSuyog Pawar                             (ps_cu_node->ps_parent->u1_cu_size >> 2),
1029*c83a76b0SSuyog Pawar                             1);
1030*c83a76b0SSuyog Pawar 
1031*c83a76b0SSuyog Pawar                         /* cost accumalation of best cu size candiate */
1032*c83a76b0SSuyog Pawar                         /*i8_frame_acc_satd_cost += parent_cost;*/
1033*c83a76b0SSuyog Pawar 
1034*c83a76b0SSuyog Pawar                         /* Mode bits cost accumalation for best cu size and cu mode */
1035*c83a76b0SSuyog Pawar                         /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/
1036*c83a76b0SSuyog Pawar 
1037*c83a76b0SSuyog Pawar                         /*satd/mod_qp accumulation of best cu */
1038*c83a76b0SSuyog Pawar                         /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/
1039*c83a76b0SSuyog Pawar 
1040*c83a76b0SSuyog Pawar                         /* Increment pointers */
1041*c83a76b0SSuyog Pawar                         ps_ed_blk_l1 += 16;
1042*c83a76b0SSuyog Pawar                         blk_cnt += 16;
1043*c83a76b0SSuyog Pawar                         //ps_row_cu++;
1044*c83a76b0SSuyog Pawar                         merge_64x64 = 0;
1045*c83a76b0SSuyog Pawar 
1046*c83a76b0SSuyog Pawar                         /* increment for stat purpose only. Increment is valid only on single thread */
1047*c83a76b0SSuyog Pawar                         ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4;
1048*c83a76b0SSuyog Pawar                     }
1049*c83a76b0SSuyog Pawar                     else
1050*c83a76b0SSuyog Pawar                     {
1051*c83a76b0SSuyog Pawar                         /* Revaluation of 4 16x16 blocks at 8x8 prediction level */
1052*c83a76b0SSuyog Pawar                         //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1053*c83a76b0SSuyog Pawar 
1054*c83a76b0SSuyog Pawar                         if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
1055*c83a76b0SSuyog Pawar                            (ps_ctxt->i4_slice_type == PSLICE))
1056*c83a76b0SSuyog Pawar                         {
1057*c83a76b0SSuyog Pawar                             ps_ctxt->u1_disable_child_cu_decide = 1;
1058*c83a76b0SSuyog Pawar                             step2_bypass = 0;
1059*c83a76b0SSuyog Pawar                         }
1060*c83a76b0SSuyog Pawar 
1061*c83a76b0SSuyog Pawar                         /* Based on the flag, Child modes decision can be disabled*/
1062*c83a76b0SSuyog Pawar                         if(0 == ps_ctxt->u1_disable_child_cu_decide)
1063*c83a76b0SSuyog Pawar                         {
1064*c83a76b0SSuyog Pawar                             for(j = 0; j < 4; j++)
1065*c83a76b0SSuyog Pawar                             {
1066*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[j]->u2_x0 =
1067*c83a76b0SSuyog Pawar                                     gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */
1068*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[j]->u2_y0 =
1069*c83a76b0SSuyog Pawar                                     gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */
1070*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16;
1071*c83a76b0SSuyog Pawar 
1072*c83a76b0SSuyog Pawar                                 {
1073*c83a76b0SSuyog Pawar                                     WORD32 best_ang_mode =
1074*c83a76b0SSuyog Pawar                                         (ps_ed_blk_l1 + (j * 4))->best_merge_mode;
1075*c83a76b0SSuyog Pawar 
1076*c83a76b0SSuyog Pawar                                     if(best_ang_mode < 2)
1077*c83a76b0SSuyog Pawar                                         best_ang_mode = 26;
1078*c83a76b0SSuyog Pawar 
1079*c83a76b0SSuyog Pawar                                     ihevce_mode_eval_filtering(
1080*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j],
1081*c83a76b0SSuyog Pawar                                         ps_cu_node,
1082*c83a76b0SSuyog Pawar                                         ps_ctxt,
1083*c83a76b0SSuyog Pawar                                         ps_curr_src,
1084*c83a76b0SSuyog Pawar                                         best_ang_mode,
1085*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1086*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1087*c83a76b0SSuyog Pawar                                         !step2_bypass,
1088*c83a76b0SSuyog Pawar                                         1);
1089*c83a76b0SSuyog Pawar 
1090*c83a76b0SSuyog Pawar                                     if(i4_enable_4cu_16tu)
1091*c83a76b0SSuyog Pawar                                     {
1092*c83a76b0SSuyog Pawar                                         ihevce_mode_eval_filtering(
1093*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j],
1094*c83a76b0SSuyog Pawar                                             ps_cu_node,
1095*c83a76b0SSuyog Pawar                                             ps_ctxt,
1096*c83a76b0SSuyog Pawar                                             ps_curr_src,
1097*c83a76b0SSuyog Pawar                                             best_ang_mode,
1098*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1099*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1100*c83a76b0SSuyog Pawar                                             !step2_bypass,
1101*c83a76b0SSuyog Pawar                                             0);
1102*c83a76b0SSuyog Pawar                                     }
1103*c83a76b0SSuyog Pawar                                     else
1104*c83a76b0SSuyog Pawar                                     {
1105*c83a76b0SSuyog Pawar                                         /* 4TU not evaluated :  4tu modes set same as 1tu modes */
1106*c83a76b0SSuyog Pawar                                         memcpy(
1107*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1108*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1109*c83a76b0SSuyog Pawar                                             NUM_BEST_MODES);
1110*c83a76b0SSuyog Pawar 
1111*c83a76b0SSuyog Pawar                                         /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1112*c83a76b0SSuyog Pawar                                         memcpy(
1113*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1114*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1115*c83a76b0SSuyog Pawar                                             NUM_BEST_MODES * sizeof(WORD32));
1116*c83a76b0SSuyog Pawar                                     }
1117*c83a76b0SSuyog Pawar 
1118*c83a76b0SSuyog Pawar                                     child_cost[j] =
1119*c83a76b0SSuyog Pawar                                         MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1120*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
1121*c83a76b0SSuyog Pawar 
1122*c83a76b0SSuyog Pawar                                     /* Child cost is sum of costs at 16x16 level  */
1123*c83a76b0SSuyog Pawar                                     child_cost_least += child_cost[j];
1124*c83a76b0SSuyog Pawar 
1125*c83a76b0SSuyog Pawar                                     /* Select the best mode to be populated as top and left nbr depending on the
1126*c83a76b0SSuyog Pawar                                     4tu and 1tu cost */
1127*c83a76b0SSuyog Pawar                                     if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
1128*c83a76b0SSuyog Pawar                                        ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
1129*c83a76b0SSuyog Pawar                                     {
1130*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->best_mode =
1131*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
1132*c83a76b0SSuyog Pawar                                     }
1133*c83a76b0SSuyog Pawar                                     else
1134*c83a76b0SSuyog Pawar                                     {
1135*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->best_mode =
1136*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
1137*c83a76b0SSuyog Pawar                                     }
1138*c83a76b0SSuyog Pawar 
1139*c83a76b0SSuyog Pawar                                     { /* Update the CTB nodes only for MAX - 1 CU nodes */
1140*c83a76b0SSuyog Pawar                                         WORD32 xA, yA, row, col;
1141*c83a76b0SSuyog Pawar                                         xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
1142*c83a76b0SSuyog Pawar                                         yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
1143*c83a76b0SSuyog Pawar                                         size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
1144*c83a76b0SSuyog Pawar                                         for(row = yA; row < (yA + size); row++)
1145*c83a76b0SSuyog Pawar                                         {
1146*c83a76b0SSuyog Pawar                                             for(col = xA; col < (xA + size); col++)
1147*c83a76b0SSuyog Pawar                                             {
1148*c83a76b0SSuyog Pawar                                                 ps_ctxt->au1_ctb_mode_map[row][col] =
1149*c83a76b0SSuyog Pawar                                                     ps_cu_node->ps_sub_cu[j]->best_mode;
1150*c83a76b0SSuyog Pawar                                             }
1151*c83a76b0SSuyog Pawar                                         }
1152*c83a76b0SSuyog Pawar                                     }
1153*c83a76b0SSuyog Pawar                                 }
1154*c83a76b0SSuyog Pawar 
1155*c83a76b0SSuyog Pawar                                 /*Child SATD cost*/
1156*c83a76b0SSuyog Pawar                                 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
1157*c83a76b0SSuyog Pawar 
1158*c83a76b0SSuyog Pawar                                 /* store the child 16x16 costs */
1159*c83a76b0SSuyog Pawar                                 pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] =
1160*c83a76b0SSuyog Pawar                                     child_cost[j];
1161*c83a76b0SSuyog Pawar 
1162*c83a76b0SSuyog Pawar                                 /* set the CU valid flag */
1163*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].b1_valid_cu = 1;
1164*c83a76b0SSuyog Pawar 
1165*c83a76b0SSuyog Pawar                                 /* All 16x16 merge is valid, if Cu 32x32 is chosen */
1166*c83a76b0SSuyog Pawar                                 /* To be reset, if CU 64x64 is chosen */
1167*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].b1_merge_flag = 1;
1168*c83a76b0SSuyog Pawar 
1169*c83a76b0SSuyog Pawar                                 /* storing the modes to intra 16 analyse */
1170*c83a76b0SSuyog Pawar                                 /* store the best 16x16 modes 8x8 tu */
1171*c83a76b0SSuyog Pawar                                 memcpy(
1172*c83a76b0SSuyog Pawar                                     &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1173*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1174*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * (NUM_BEST_MODES));
1175*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1176*c83a76b0SSuyog Pawar 
1177*c83a76b0SSuyog Pawar                                 /* store the best 16x16 modes 16x16 tu */
1178*c83a76b0SSuyog Pawar                                 memcpy(
1179*c83a76b0SSuyog Pawar                                     &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0],
1180*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1181*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * (NUM_BEST_MODES));
1182*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1183*c83a76b0SSuyog Pawar 
1184*c83a76b0SSuyog Pawar                                 /* divide the 16x16 costs (pro rating) to 4 8x8 costs */
1185*c83a76b0SSuyog Pawar                                 /* store the same 16x16 modes as 4 8x8 child modes    */
1186*c83a76b0SSuyog Pawar                                 {
1187*c83a76b0SSuyog Pawar                                     WORD32 idx_8x8;
1188*c83a76b0SSuyog Pawar                                     WORD32 *pi4_intra_8_cost_curr16;
1189*c83a76b0SSuyog Pawar                                     intra8_analyse_t *ps_intra8_analyse;
1190*c83a76b0SSuyog Pawar 
1191*c83a76b0SSuyog Pawar                                     pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1);
1192*c83a76b0SSuyog Pawar                                     pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW;
1193*c83a76b0SSuyog Pawar 
1194*c83a76b0SSuyog Pawar                                     for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
1195*c83a76b0SSuyog Pawar                                     {
1196*c83a76b0SSuyog Pawar                                         pi4_intra_8_cost_curr16
1197*c83a76b0SSuyog Pawar                                             [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] =
1198*c83a76b0SSuyog Pawar                                                 (child_cost[j] + 3) >> 2;
1199*c83a76b0SSuyog Pawar 
1200*c83a76b0SSuyog Pawar                                         ps_intra8_analyse =
1201*c83a76b0SSuyog Pawar                                             &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
1202*c83a76b0SSuyog Pawar 
1203*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->b1_enable_nxn = 0;
1204*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->b1_valid_cu = 1;
1205*c83a76b0SSuyog Pawar 
1206*c83a76b0SSuyog Pawar                                         /* store the best 8x8 modes 8x8 tu */
1207*c83a76b0SSuyog Pawar                                         memcpy(
1208*c83a76b0SSuyog Pawar                                             &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
1209*c83a76b0SSuyog Pawar                                             &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1210*c83a76b0SSuyog Pawar                                             sizeof(UWORD8) * (NUM_BEST_MODES + 1));
1211*c83a76b0SSuyog Pawar 
1212*c83a76b0SSuyog Pawar                                         /* store the best 8x8 modes 4x4 tu */
1213*c83a76b0SSuyog Pawar                                         memcpy(
1214*c83a76b0SSuyog Pawar                                             &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
1215*c83a76b0SSuyog Pawar                                             &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1216*c83a76b0SSuyog Pawar                                             sizeof(UWORD8) * (NUM_BEST_MODES + 1));
1217*c83a76b0SSuyog Pawar 
1218*c83a76b0SSuyog Pawar                                         /* NXN modes not evaluated hence set to 0 */
1219*c83a76b0SSuyog Pawar                                         memset(
1220*c83a76b0SSuyog Pawar                                             &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1221*c83a76b0SSuyog Pawar                                             255,
1222*c83a76b0SSuyog Pawar                                             sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1223*c83a76b0SSuyog Pawar                                     }
1224*c83a76b0SSuyog Pawar                                 }
1225*c83a76b0SSuyog Pawar                             }
1226*c83a76b0SSuyog Pawar 
1227*c83a76b0SSuyog Pawar                             ihevce_set_nbr_map(
1228*c83a76b0SSuyog Pawar                                 ps_ctxt->pu1_ctb_nbr_map,
1229*c83a76b0SSuyog Pawar                                 ps_ctxt->i4_nbr_map_strd,
1230*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
1231*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
1232*c83a76b0SSuyog Pawar                                 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
1233*c83a76b0SSuyog Pawar                                 0);
1234*c83a76b0SSuyog Pawar                         }
1235*c83a76b0SSuyog Pawar #if 1  //DISBLE_CHILD_CU_EVAL_L0_IPE //1
1236*c83a76b0SSuyog Pawar                         else
1237*c83a76b0SSuyog Pawar                         {
1238*c83a76b0SSuyog Pawar                             for(j = 0; j < 4; j++)
1239*c83a76b0SSuyog Pawar                             {
1240*c83a76b0SSuyog Pawar                                 WORD32 idx_8x8;
1241*c83a76b0SSuyog Pawar                                 intra8_analyse_t *ps_intra8_analyse;
1242*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255;
1243*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255;
1244*c83a76b0SSuyog Pawar 
1245*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].b1_valid_cu = 0;
1246*c83a76b0SSuyog Pawar 
1247*c83a76b0SSuyog Pawar                                 for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
1248*c83a76b0SSuyog Pawar                                 {
1249*c83a76b0SSuyog Pawar                                     ps_intra8_analyse =
1250*c83a76b0SSuyog Pawar                                         &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
1251*c83a76b0SSuyog Pawar 
1252*c83a76b0SSuyog Pawar                                     ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
1253*c83a76b0SSuyog Pawar                                     ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
1254*c83a76b0SSuyog Pawar 
1255*c83a76b0SSuyog Pawar                                     ps_intra8_analyse->b1_enable_nxn = 0;
1256*c83a76b0SSuyog Pawar                                     ps_intra8_analyse->b1_valid_cu = 0;
1257*c83a76b0SSuyog Pawar 
1258*c83a76b0SSuyog Pawar                                     /* NXN modes not evaluated hence set to 0 */
1259*c83a76b0SSuyog Pawar                                     memset(
1260*c83a76b0SSuyog Pawar                                         &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1261*c83a76b0SSuyog Pawar                                         255,
1262*c83a76b0SSuyog Pawar                                         sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1263*c83a76b0SSuyog Pawar                                 }
1264*c83a76b0SSuyog Pawar                             }
1265*c83a76b0SSuyog Pawar 
1266*c83a76b0SSuyog Pawar                             child_cost_least = MAX_INTRA_COST_IPE;
1267*c83a76b0SSuyog Pawar                         }
1268*c83a76b0SSuyog Pawar #endif
1269*c83a76b0SSuyog Pawar 
1270*c83a76b0SSuyog Pawar                         /* Populate params for 32x32 block analysis */
1271*c83a76b0SSuyog Pawar 
1272*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u1_cu_size = 32;
1273*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_x0 =
1274*c83a76b0SSuyog Pawar                             gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1275*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_y0 =
1276*c83a76b0SSuyog Pawar                             gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1277*c83a76b0SSuyog Pawar 
1278*c83a76b0SSuyog Pawar                         /* Revaluation for 32x32 parent block at 16x16 prediction level */
1279*c83a76b0SSuyog Pawar                         //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1280*c83a76b0SSuyog Pawar 
1281*c83a76b0SSuyog Pawar                         {
1282*c83a76b0SSuyog Pawar                             /* Eval for TUSize = CuSize */
1283*c83a76b0SSuyog Pawar                             ihevce_mode_eval_filtering(
1284*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent,
1285*c83a76b0SSuyog Pawar                                 ps_cu_node,
1286*c83a76b0SSuyog Pawar                                 ps_ctxt,
1287*c83a76b0SSuyog Pawar                                 ps_curr_src,
1288*c83a76b0SSuyog Pawar                                 26,
1289*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1290*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1291*c83a76b0SSuyog Pawar                                 step2_bypass,
1292*c83a76b0SSuyog Pawar                                 1);
1293*c83a76b0SSuyog Pawar 
1294*c83a76b0SSuyog Pawar                             if(i4_enable_1cu_4tu)
1295*c83a76b0SSuyog Pawar                             {
1296*c83a76b0SSuyog Pawar                                 /* Eval for TUSize = CuSize/2 */
1297*c83a76b0SSuyog Pawar                                 ihevce_mode_eval_filtering(
1298*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_parent,
1299*c83a76b0SSuyog Pawar                                     ps_cu_node,
1300*c83a76b0SSuyog Pawar                                     ps_ctxt,
1301*c83a76b0SSuyog Pawar                                     ps_curr_src,
1302*c83a76b0SSuyog Pawar                                     26,
1303*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1304*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1305*c83a76b0SSuyog Pawar                                     step2_bypass,
1306*c83a76b0SSuyog Pawar                                     0);
1307*c83a76b0SSuyog Pawar                             }
1308*c83a76b0SSuyog Pawar                             else
1309*c83a76b0SSuyog Pawar                             {
1310*c83a76b0SSuyog Pawar                                 /* 4TU not evaluated :  4tu modes set same as 1tu modes */
1311*c83a76b0SSuyog Pawar                                 memcpy(
1312*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1313*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1314*c83a76b0SSuyog Pawar                                     NUM_BEST_MODES);
1315*c83a76b0SSuyog Pawar 
1316*c83a76b0SSuyog Pawar                                 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1317*c83a76b0SSuyog Pawar                                 memcpy(
1318*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1319*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1320*c83a76b0SSuyog Pawar                                     NUM_BEST_MODES * sizeof(WORD32));
1321*c83a76b0SSuyog Pawar                             }
1322*c83a76b0SSuyog Pawar                         }
1323*c83a76b0SSuyog Pawar 
1324*c83a76b0SSuyog Pawar                         ps_ctxt->u1_disable_child_cu_decide = 0;
1325*c83a76b0SSuyog Pawar                         step2_bypass = 1;
1326*c83a76b0SSuyog Pawar 
1327*c83a76b0SSuyog Pawar                         /* Update parent cost */
1328*c83a76b0SSuyog Pawar                         parent_cost =
1329*c83a76b0SSuyog Pawar                             MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1330*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
1331*c83a76b0SSuyog Pawar 
1332*c83a76b0SSuyog Pawar                         /* Select the best mode to be populated as top and left nbr depending on the
1333*c83a76b0SSuyog Pawar                         4tu and 1tu cost */
1334*c83a76b0SSuyog Pawar                         if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
1335*c83a76b0SSuyog Pawar                            ps_cu_node->ps_parent->au4_best_cost_1tu[0])
1336*c83a76b0SSuyog Pawar                         {
1337*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->best_mode =
1338*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1339*c83a76b0SSuyog Pawar                         }
1340*c83a76b0SSuyog Pawar                         else
1341*c83a76b0SSuyog Pawar                         {
1342*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->best_mode =
1343*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
1344*c83a76b0SSuyog Pawar                         }
1345*c83a76b0SSuyog Pawar 
1346*c83a76b0SSuyog Pawar                         /* store the 32x32 cost */
1347*c83a76b0SSuyog Pawar                         *pi4_intra_32_cost = parent_cost;
1348*c83a76b0SSuyog Pawar 
1349*c83a76b0SSuyog Pawar                         /* set the CU valid flag */
1350*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_valid_cu = 1;
1351*c83a76b0SSuyog Pawar 
1352*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_merge_flag = 1;
1353*c83a76b0SSuyog Pawar 
1354*c83a76b0SSuyog Pawar                         /* storing the modes to intra 32 analyse */
1355*c83a76b0SSuyog Pawar                         {
1356*c83a76b0SSuyog Pawar                             /* store the best 32x32 modes 16x16 tu */
1357*c83a76b0SSuyog Pawar                             memcpy(
1358*c83a76b0SSuyog Pawar                                 &ps_intra32_analyse->au1_best_modes_16x16_tu[0],
1359*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1360*c83a76b0SSuyog Pawar                                 sizeof(UWORD8) * (NUM_BEST_MODES));
1361*c83a76b0SSuyog Pawar                             ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1362*c83a76b0SSuyog Pawar 
1363*c83a76b0SSuyog Pawar                             /* store the best 32x32 modes 32x32 tu */
1364*c83a76b0SSuyog Pawar                             memcpy(
1365*c83a76b0SSuyog Pawar                                 &ps_intra32_analyse->au1_best_modes_32x32_tu[0],
1366*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1367*c83a76b0SSuyog Pawar                                 sizeof(UWORD8) * (NUM_BEST_MODES));
1368*c83a76b0SSuyog Pawar                             ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255;
1369*c83a76b0SSuyog Pawar                         }
1370*c83a76b0SSuyog Pawar                         parent_best_mode = ps_cu_node->ps_parent->best_mode;
1371*c83a76b0SSuyog Pawar                         if((parent_cost <=
1372*c83a76b0SSuyog Pawar                             child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
1373*c83a76b0SSuyog Pawar                                                 LAMBDA_Q_SHIFT)))  //|| identical_modes)
1374*c83a76b0SSuyog Pawar                         {
1375*c83a76b0SSuyog Pawar                             WORD32 i4_q_scale_q3_mod;
1376*c83a76b0SSuyog Pawar                             UWORD8 u1_cu_possible_qp;
1377*c83a76b0SSuyog Pawar                             WORD32 i4_act_factor;
1378*c83a76b0SSuyog Pawar 
1379*c83a76b0SSuyog Pawar                             /* CU size 32x32 and fill the final cu params */
1380*c83a76b0SSuyog Pawar 
1381*c83a76b0SSuyog Pawar                             ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1382*c83a76b0SSuyog Pawar 
1383*c83a76b0SSuyog Pawar                             if((IHEVCE_QUALITY_P3 > i4_quality_preset))
1384*c83a76b0SSuyog Pawar                             {
1385*c83a76b0SSuyog Pawar                                 for(i = 0; i < 4; i++)
1386*c83a76b0SSuyog Pawar                                 {
1387*c83a76b0SSuyog Pawar                                     intra8_analyse_t *ps_intra8_analyse;
1388*c83a76b0SSuyog Pawar                                     ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
1389*c83a76b0SSuyog Pawar                                     for(j = 0; j < 4; j++)
1390*c83a76b0SSuyog Pawar                                     {
1391*c83a76b0SSuyog Pawar                                         /* Populate best 3 nxn modes */
1392*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][0] =
1393*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0];
1394*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][1] =
1395*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[i]
1396*c83a76b0SSuyog Pawar                                                 ->au1_best_mode_4tu[1];  //(ps_ed + 1)->best_mode;
1397*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][2] =
1398*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[i]
1399*c83a76b0SSuyog Pawar                                                 ->au1_best_mode_4tu[2];  //(ps_ed + 2)->best_mode;
1400*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
1401*c83a76b0SSuyog Pawar                                     }
1402*c83a76b0SSuyog Pawar                                 }
1403*c83a76b0SSuyog Pawar                             }
1404*c83a76b0SSuyog Pawar                             /* store the 32x32 non split flag */
1405*c83a76b0SSuyog Pawar                             ps_intra32_analyse->b1_split_flag = 0;
1406*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0;
1407*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0;
1408*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0;
1409*c83a76b0SSuyog Pawar                             ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0;
1410*c83a76b0SSuyog Pawar 
1411*c83a76b0SSuyog Pawar                             au1_best_32x32_modes[blk_cnt >> 4] =
1412*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1413*c83a76b0SSuyog Pawar 
1414*c83a76b0SSuyog Pawar                             au4_best_32x32_cost[blk_cnt >> 4] =
1415*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au4_best_cost_1tu[0];
1416*c83a76b0SSuyog Pawar                             /*As 32*32 has won, pick L2 8x8 qp which maps
1417*c83a76b0SSuyog Pawar                             to L0 32x32 Qp*/
1418*c83a76b0SSuyog Pawar                             ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
1419*c83a76b0SSuyog Pawar                             ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
1420*c83a76b0SSuyog Pawar                             u1_cu_possible_qp = ihevce_cu_level_qp_mod(
1421*c83a76b0SSuyog Pawar                                 ps_ctxt->i4_qscale,
1422*c83a76b0SSuyog Pawar                                 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
1423*c83a76b0SSuyog Pawar                                 ps_ctxt->ld_curr_frame_16x16_log_avg[0],
1424*c83a76b0SSuyog Pawar                                 f_strength,
1425*c83a76b0SSuyog Pawar                                 &i4_act_factor,
1426*c83a76b0SSuyog Pawar                                 &i4_q_scale_q3_mod,
1427*c83a76b0SSuyog Pawar                                 ps_ctxt->ps_rc_quant_ctxt);
1428*c83a76b0SSuyog Pawar                             /* cost accumalation of best cu size candiate */
1429*c83a76b0SSuyog Pawar                             i8_frame_acc_satd_cost += parent_cost;
1430*c83a76b0SSuyog Pawar 
1431*c83a76b0SSuyog Pawar                             /* satd and mpm bits accumalation of best cu size candiate */
1432*c83a76b0SSuyog Pawar                             i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
1433*c83a76b0SSuyog Pawar 
1434*c83a76b0SSuyog Pawar                             /* Mode bits cost accumalation for best cu size and cu mode */
1435*c83a76b0SSuyog Pawar                             i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
1436*c83a76b0SSuyog Pawar 
1437*c83a76b0SSuyog Pawar                             /*satd/mod_qp accumulation of best cu */
1438*c83a76b0SSuyog Pawar                             i8_frame_acc_satd_by_modqp_q10 +=
1439*c83a76b0SSuyog Pawar                                 ((LWORD64)ps_cu_node->ps_parent->best_satd
1440*c83a76b0SSuyog Pawar                                  << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1441*c83a76b0SSuyog Pawar                                 i4_q_scale_q3_mod;
1442*c83a76b0SSuyog Pawar 
1443*c83a76b0SSuyog Pawar                             /* Increment pointers */
1444*c83a76b0SSuyog Pawar                             ps_ed_blk_l1 += 16;
1445*c83a76b0SSuyog Pawar                             blk_cnt += 16;
1446*c83a76b0SSuyog Pawar                             //ps_row_cu++;
1447*c83a76b0SSuyog Pawar                             merge_64x64 &= 1;
1448*c83a76b0SSuyog Pawar                         }
1449*c83a76b0SSuyog Pawar                         else
1450*c83a76b0SSuyog Pawar                         {
1451*c83a76b0SSuyog Pawar                             /* store the 32x32 split flag */
1452*c83a76b0SSuyog Pawar                             ps_intra32_analyse->b1_split_flag = 1;
1453*c83a76b0SSuyog Pawar 
1454*c83a76b0SSuyog Pawar                             /* CU size 16x16 and fill the final cu params for all 4 blocks */
1455*c83a76b0SSuyog Pawar                             for(j = 0; j < 4; j++)
1456*c83a76b0SSuyog Pawar                             {
1457*c83a76b0SSuyog Pawar                                 WORD32 i4_q_scale_q3_mod;
1458*c83a76b0SSuyog Pawar                                 UWORD8 u1_cu_possible_qp;
1459*c83a76b0SSuyog Pawar                                 WORD32 i4_act_factor;
1460*c83a76b0SSuyog Pawar 
1461*c83a76b0SSuyog Pawar                                 /* Set CU split flag */
1462*c83a76b0SSuyog Pawar                                 ASSERT(blk_cnt % 4 == 0);
1463*c83a76b0SSuyog Pawar 
1464*c83a76b0SSuyog Pawar                                 ihevce_update_cand_list(
1465*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
1466*c83a76b0SSuyog Pawar 
1467*c83a76b0SSuyog Pawar                                 /* store the 16x16 non split flag  */
1468*c83a76b0SSuyog Pawar                                 ps_intra16_analyse[j].b1_split_flag = 0;
1469*c83a76b0SSuyog Pawar 
1470*c83a76b0SSuyog Pawar                                 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1471*c83a76b0SSuyog Pawar                                 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
1472*c83a76b0SSuyog Pawar                                 /*As 16*16 has won, pick L1 8x8 qp which maps
1473*c83a76b0SSuyog Pawar                                 to L0 16x16 Qp*/
1474*c83a76b0SSuyog Pawar                                 u1_cu_possible_qp = ihevce_cu_level_qp_mod(
1475*c83a76b0SSuyog Pawar                                     ps_ctxt->i4_qscale,
1476*c83a76b0SSuyog Pawar                                     ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
1477*c83a76b0SSuyog Pawar                                     ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1478*c83a76b0SSuyog Pawar                                     f_strength,
1479*c83a76b0SSuyog Pawar                                     &i4_act_factor,
1480*c83a76b0SSuyog Pawar                                     &i4_q_scale_q3_mod,
1481*c83a76b0SSuyog Pawar                                     ps_ctxt->ps_rc_quant_ctxt);
1482*c83a76b0SSuyog Pawar 
1483*c83a76b0SSuyog Pawar                                 /*accum satd/qp for all child block*/
1484*c83a76b0SSuyog Pawar                                 i8_frame_acc_satd_by_modqp_q10 +=
1485*c83a76b0SSuyog Pawar                                     ((LWORD64)child_satd[j]
1486*c83a76b0SSuyog Pawar                                      << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1487*c83a76b0SSuyog Pawar                                     i4_q_scale_q3_mod;
1488*c83a76b0SSuyog Pawar 
1489*c83a76b0SSuyog Pawar                                 /* Accumalate mode bits for all child blocks */
1490*c83a76b0SSuyog Pawar                                 i8_frame_acc_mode_bits_cost +=
1491*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
1492*c83a76b0SSuyog Pawar 
1493*c83a76b0SSuyog Pawar                                 /* satd and mpm bits accumalation of best cu size candiate */
1494*c83a76b0SSuyog Pawar                                 i4_ctb_acc_satd += child_satd[j];
1495*c83a76b0SSuyog Pawar 
1496*c83a76b0SSuyog Pawar                                 /* Increment pointers */
1497*c83a76b0SSuyog Pawar                                 //ps_row_cu++;
1498*c83a76b0SSuyog Pawar                                 ps_ed_blk_l1 += 4;
1499*c83a76b0SSuyog Pawar                                 blk_cnt += 4;
1500*c83a76b0SSuyog Pawar                             }
1501*c83a76b0SSuyog Pawar 
1502*c83a76b0SSuyog Pawar                             /* cost accumalation of best cu size candiate */
1503*c83a76b0SSuyog Pawar                             i8_frame_acc_satd_cost += child_cost_least;
1504*c83a76b0SSuyog Pawar 
1505*c83a76b0SSuyog Pawar                             /* 64x64 merge is not possible */
1506*c83a76b0SSuyog Pawar                             merge_64x64 = 0;
1507*c83a76b0SSuyog Pawar                         }
1508*c83a76b0SSuyog Pawar 
1509*c83a76b0SSuyog Pawar                         //ps_ed_blk_l2 += 4;
1510*c83a76b0SSuyog Pawar 
1511*c83a76b0SSuyog Pawar                     }  //end of EIID's else
1512*c83a76b0SSuyog Pawar #endif
1513*c83a76b0SSuyog Pawar                 }
1514*c83a76b0SSuyog Pawar                 /* If Merge success for L1 max CU size 16x16 is chosen */
1515*c83a76b0SSuyog Pawar                 else if(merge_16x16_l1)
1516*c83a76b0SSuyog Pawar                 {
1517*c83a76b0SSuyog Pawar #if IP_DBG_L1_l2
1518*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u1_cu_size = 16;
1519*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1520*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1521*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode;
1522*c83a76b0SSuyog Pawar                     ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1523*c83a76b0SSuyog Pawar 
1524*c83a76b0SSuyog Pawar                     blk_cnt += 4;
1525*c83a76b0SSuyog Pawar                     ps_ed_blk_l1 += 4;
1526*c83a76b0SSuyog Pawar                     ps_row_cu++;
1527*c83a76b0SSuyog Pawar                     merge_64x64 = 0;
1528*c83a76b0SSuyog Pawar #else
1529*c83a76b0SSuyog Pawar 
1530*c83a76b0SSuyog Pawar                     /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/
1531*c83a76b0SSuyog Pawar                     /* enable this only in B pictures */
1532*c83a76b0SSuyog Pawar                     if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
1533*c83a76b0SSuyog Pawar                     {
1534*c83a76b0SSuyog Pawar                         WORD32 i4_q_scale_q3_mod, i4_local_ctr;
1535*c83a76b0SSuyog Pawar                         WORD8 i1_cu_possible_qp;
1536*c83a76b0SSuyog Pawar                         WORD32 i4_act_factor;
1537*c83a76b0SSuyog Pawar                         /* make cost infinity. */
1538*c83a76b0SSuyog Pawar                         /* make modes invalid */
1539*c83a76b0SSuyog Pawar                         /* update loop variables */
1540*c83a76b0SSuyog Pawar                         /* set other output variales */
1541*c83a76b0SSuyog Pawar                         /* dont set neighbour flag so that next blocks wont access this cu */
1542*c83a76b0SSuyog Pawar                         /* what happens to ctb_mode_map?? */
1543*c83a76b0SSuyog Pawar 
1544*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u1_cu_size = 16;
1545*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_x0 =
1546*c83a76b0SSuyog Pawar                             gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1547*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_y0 =
1548*c83a76b0SSuyog Pawar                             gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1549*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->best_mode =
1550*c83a76b0SSuyog Pawar                             INTRA_DC;  //ps_ed_blk_l1->best_merge_mode;
1551*c83a76b0SSuyog Pawar 
1552*c83a76b0SSuyog Pawar                         /* fill in the first modes as invalid */
1553*c83a76b0SSuyog Pawar 
1554*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
1555*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
1556*c83a76b0SSuyog Pawar                             INTRA_DC;  //for safery. Since update_cand_list will set num_modes as 3
1557*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
1558*c83a76b0SSuyog Pawar 
1559*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
1560*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
1561*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
1562*c83a76b0SSuyog Pawar 
1563*c83a76b0SSuyog Pawar                         ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1564*c83a76b0SSuyog Pawar 
1565*c83a76b0SSuyog Pawar                         //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
1566*c83a76b0SSuyog Pawar                         //ps_row_cu->u1_num_intra_rdopt_cands = 0;
1567*c83a76b0SSuyog Pawar 
1568*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_split_flag = 1;
1569*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_merge_flag = 0;
1570*c83a76b0SSuyog Pawar 
1571*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_valid_cu = 0;
1572*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_split_flag = 0;
1573*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_merge_flag = 1;
1574*c83a76b0SSuyog Pawar                         //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu,
1575*c83a76b0SSuyog Pawar                         //  255,
1576*c83a76b0SSuyog Pawar                         //  NUM_BEST_MODES);
1577*c83a76b0SSuyog Pawar                         //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu,
1578*c83a76b0SSuyog Pawar                         //  255,
1579*c83a76b0SSuyog Pawar                         //  NUM_BEST_MODES);
1580*c83a76b0SSuyog Pawar                         //set only first mode since if it's 255. it wont go ahead
1581*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
1582*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
1583*c83a76b0SSuyog Pawar                         *pi4_intra_16_cost = MAX_INTRA_COST_IPE;
1584*c83a76b0SSuyog Pawar 
1585*c83a76b0SSuyog Pawar                         /*since ME will start evaluating from bottom up, set the lower
1586*c83a76b0SSuyog Pawar                         cu size data invalid */
1587*c83a76b0SSuyog Pawar                         for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++)
1588*c83a76b0SSuyog Pawar                         {
1589*c83a76b0SSuyog Pawar                             ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1590*c83a76b0SSuyog Pawar                                 .au1_4x4_best_modes[0][0] = 255;
1591*c83a76b0SSuyog Pawar                             ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1592*c83a76b0SSuyog Pawar                                 .au1_4x4_best_modes[1][0] = 255;
1593*c83a76b0SSuyog Pawar                             ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1594*c83a76b0SSuyog Pawar                                 .au1_4x4_best_modes[2][0] = 255;
1595*c83a76b0SSuyog Pawar                             ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1596*c83a76b0SSuyog Pawar                                 .au1_4x4_best_modes[3][0] = 255;
1597*c83a76b0SSuyog Pawar                             ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1598*c83a76b0SSuyog Pawar                                 .au1_best_modes_8x8_tu[0] = 255;
1599*c83a76b0SSuyog Pawar                             ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1600*c83a76b0SSuyog Pawar                                 .au1_best_modes_4x4_tu[0] = 255;
1601*c83a76b0SSuyog Pawar 
1602*c83a76b0SSuyog Pawar                             pi4_intra_8_cost
1603*c83a76b0SSuyog Pawar                                 [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] =
1604*c83a76b0SSuyog Pawar                                     MAX_INTRA_COST_IPE;
1605*c83a76b0SSuyog Pawar                         }
1606*c83a76b0SSuyog Pawar 
1607*c83a76b0SSuyog Pawar                         /* set neighbours even if intra is not evaluated, since source is always available. */
1608*c83a76b0SSuyog Pawar                         ihevce_set_nbr_map(
1609*c83a76b0SSuyog Pawar                             ps_ctxt->pu1_ctb_nbr_map,
1610*c83a76b0SSuyog Pawar                             ps_ctxt->i4_nbr_map_strd,
1611*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->u2_x0 << 1,
1612*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->u2_y0 << 1,
1613*c83a76b0SSuyog Pawar                             (ps_cu_node->ps_parent->u1_cu_size >> 2),
1614*c83a76b0SSuyog Pawar                             1);
1615*c83a76b0SSuyog Pawar 
1616*c83a76b0SSuyog Pawar                         //what happends to RC variables??
1617*c83a76b0SSuyog Pawar                         /* run only constant Qp */
1618*c83a76b0SSuyog Pawar                         ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1619*c83a76b0SSuyog Pawar                         ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
1620*c83a76b0SSuyog Pawar                         i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1621*c83a76b0SSuyog Pawar                             ps_ctxt->i4_qscale,
1622*c83a76b0SSuyog Pawar                             ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
1623*c83a76b0SSuyog Pawar                             ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1624*c83a76b0SSuyog Pawar                             f_strength,
1625*c83a76b0SSuyog Pawar                             &i4_act_factor,
1626*c83a76b0SSuyog Pawar                             &i4_q_scale_q3_mod,
1627*c83a76b0SSuyog Pawar                             ps_ctxt->ps_rc_quant_ctxt);
1628*c83a76b0SSuyog Pawar 
1629*c83a76b0SSuyog Pawar                         /* cost accumalation of best cu size candiate */
1630*c83a76b0SSuyog Pawar                         i8_frame_acc_satd_cost += 0;  //parent_cost;  //incorrect accumulation
1631*c83a76b0SSuyog Pawar 
1632*c83a76b0SSuyog Pawar                         /*satd/mod_qp accumulation of best cu */
1633*c83a76b0SSuyog Pawar                         i8_frame_acc_satd_by_modqp_q10 += 0;  //incorrect accumulation
1634*c83a76b0SSuyog Pawar                         //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod;
1635*c83a76b0SSuyog Pawar 
1636*c83a76b0SSuyog Pawar                         /* Accumalate mode bits for all child blocks */
1637*c83a76b0SSuyog Pawar                         i8_frame_acc_mode_bits_cost +=
1638*c83a76b0SSuyog Pawar                             0;  //ps_cu_node->ps_parent->u2_mode_bits_cost;
1639*c83a76b0SSuyog Pawar                         //incoorect accumulation
1640*c83a76b0SSuyog Pawar 
1641*c83a76b0SSuyog Pawar                         blk_cnt += 4;
1642*c83a76b0SSuyog Pawar                         ps_ed_blk_l1 += 4;
1643*c83a76b0SSuyog Pawar                         //ps_row_cu++;
1644*c83a76b0SSuyog Pawar                         merge_64x64 = 0;
1645*c83a76b0SSuyog Pawar 
1646*c83a76b0SSuyog Pawar                         /* increment for stat purpose only. Increment is valid only on single thread */
1647*c83a76b0SSuyog Pawar                         ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1;
1648*c83a76b0SSuyog Pawar                     }
1649*c83a76b0SSuyog Pawar                     else
1650*c83a76b0SSuyog Pawar                     {
1651*c83a76b0SSuyog Pawar                         /* 64x64 merge is not possible */
1652*c83a76b0SSuyog Pawar                         merge_64x64 = 0;
1653*c83a76b0SSuyog Pawar 
1654*c83a76b0SSuyog Pawar                         /* set the 32x32 split flag to 1 */
1655*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_split_flag = 1;
1656*c83a76b0SSuyog Pawar 
1657*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_merge_flag = 0;
1658*c83a76b0SSuyog Pawar 
1659*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_merge_flag = 1;
1660*c83a76b0SSuyog Pawar 
1661*c83a76b0SSuyog Pawar                         if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
1662*c83a76b0SSuyog Pawar                            (ps_ctxt->i4_slice_type == PSLICE))
1663*c83a76b0SSuyog Pawar                         {
1664*c83a76b0SSuyog Pawar                             ps_ctxt->u1_disable_child_cu_decide = 1;
1665*c83a76b0SSuyog Pawar                             step2_bypass = 0;
1666*c83a76b0SSuyog Pawar                         }
1667*c83a76b0SSuyog Pawar                         //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1668*c83a76b0SSuyog Pawar                         /* Based on the flag, Child modes decision can be disabled*/
1669*c83a76b0SSuyog Pawar                         if(0 == ps_ctxt->u1_disable_child_cu_decide)
1670*c83a76b0SSuyog Pawar                         {
1671*c83a76b0SSuyog Pawar                             for(j = 0; j < 4; j++)
1672*c83a76b0SSuyog Pawar                             {
1673*c83a76b0SSuyog Pawar                                 intra8_analyse_t *ps_intra8_analyse;
1674*c83a76b0SSuyog Pawar                                 WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode;
1675*c83a76b0SSuyog Pawar 
1676*c83a76b0SSuyog Pawar                                 if(best_ang_mode < 2)
1677*c83a76b0SSuyog Pawar                                     best_ang_mode = 26;
1678*c83a76b0SSuyog Pawar 
1679*c83a76b0SSuyog Pawar                                 //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE;
1680*c83a76b0SSuyog Pawar                                 //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode;
1681*c83a76b0SSuyog Pawar 
1682*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[j]->u2_x0 =
1683*c83a76b0SSuyog Pawar                                     gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */
1684*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[j]->u2_y0 =
1685*c83a76b0SSuyog Pawar                                     gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */
1686*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8;
1687*c83a76b0SSuyog Pawar 
1688*c83a76b0SSuyog Pawar                                 ihevce_mode_eval_filtering(
1689*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[j],
1690*c83a76b0SSuyog Pawar                                     ps_cu_node,
1691*c83a76b0SSuyog Pawar                                     ps_ctxt,
1692*c83a76b0SSuyog Pawar                                     ps_curr_src,
1693*c83a76b0SSuyog Pawar                                     best_ang_mode,
1694*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1695*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1696*c83a76b0SSuyog Pawar                                     !step2_bypass,
1697*c83a76b0SSuyog Pawar                                     1);
1698*c83a76b0SSuyog Pawar 
1699*c83a76b0SSuyog Pawar                                 if(i4_enable_4cu_16tu)
1700*c83a76b0SSuyog Pawar                                 {
1701*c83a76b0SSuyog Pawar                                     ihevce_mode_eval_filtering(
1702*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j],
1703*c83a76b0SSuyog Pawar                                         ps_cu_node,
1704*c83a76b0SSuyog Pawar                                         ps_ctxt,
1705*c83a76b0SSuyog Pawar                                         ps_curr_src,
1706*c83a76b0SSuyog Pawar                                         best_ang_mode,
1707*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1708*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1709*c83a76b0SSuyog Pawar                                         !step2_bypass,
1710*c83a76b0SSuyog Pawar                                         0);
1711*c83a76b0SSuyog Pawar                                 }
1712*c83a76b0SSuyog Pawar                                 else
1713*c83a76b0SSuyog Pawar                                 {
1714*c83a76b0SSuyog Pawar                                     /* 4TU not evaluated :  4tu modes set same as 1tu modes */
1715*c83a76b0SSuyog Pawar                                     memcpy(
1716*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1717*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1718*c83a76b0SSuyog Pawar                                         NUM_BEST_MODES);
1719*c83a76b0SSuyog Pawar 
1720*c83a76b0SSuyog Pawar                                     /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1721*c83a76b0SSuyog Pawar                                     memcpy(
1722*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1723*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1724*c83a76b0SSuyog Pawar                                         NUM_BEST_MODES * sizeof(WORD32));
1725*c83a76b0SSuyog Pawar                                 }
1726*c83a76b0SSuyog Pawar 
1727*c83a76b0SSuyog Pawar                                 child_cost[j] =
1728*c83a76b0SSuyog Pawar                                     MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1729*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
1730*c83a76b0SSuyog Pawar 
1731*c83a76b0SSuyog Pawar                                 child_cost_least += child_cost[j];
1732*c83a76b0SSuyog Pawar 
1733*c83a76b0SSuyog Pawar                                 /* Select the best mode to be populated as top and left nbr depending on the
1734*c83a76b0SSuyog Pawar                                 4tu and 1tu cost */
1735*c83a76b0SSuyog Pawar                                 if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
1736*c83a76b0SSuyog Pawar                                    ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
1737*c83a76b0SSuyog Pawar                                 {
1738*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[j]->best_mode =
1739*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
1740*c83a76b0SSuyog Pawar                                 }
1741*c83a76b0SSuyog Pawar                                 else
1742*c83a76b0SSuyog Pawar                                 {
1743*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[j]->best_mode =
1744*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
1745*c83a76b0SSuyog Pawar                                 }
1746*c83a76b0SSuyog Pawar                                 { /* Update the CTB nodes only for MAX - 1 CU nodes */
1747*c83a76b0SSuyog Pawar                                     WORD32 xA, yA, row, col;
1748*c83a76b0SSuyog Pawar                                     xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
1749*c83a76b0SSuyog Pawar                                     yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
1750*c83a76b0SSuyog Pawar                                     size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
1751*c83a76b0SSuyog Pawar                                     for(row = yA; row < (yA + size); row++)
1752*c83a76b0SSuyog Pawar                                     {
1753*c83a76b0SSuyog Pawar                                         for(col = xA; col < (xA + size); col++)
1754*c83a76b0SSuyog Pawar                                         {
1755*c83a76b0SSuyog Pawar                                             ps_ctxt->au1_ctb_mode_map[row][col] =
1756*c83a76b0SSuyog Pawar                                                 ps_cu_node->ps_sub_cu[j]->best_mode;
1757*c83a76b0SSuyog Pawar                                         }
1758*c83a76b0SSuyog Pawar                                     }
1759*c83a76b0SSuyog Pawar                                 }
1760*c83a76b0SSuyog Pawar 
1761*c83a76b0SSuyog Pawar                                 /*collect individual child satd for final SATD/qp accum*/
1762*c83a76b0SSuyog Pawar                                 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
1763*c83a76b0SSuyog Pawar 
1764*c83a76b0SSuyog Pawar                                 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
1765*c83a76b0SSuyog Pawar 
1766*c83a76b0SSuyog Pawar                                 /* store the child 8x8 costs */
1767*c83a76b0SSuyog Pawar                                 pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] =
1768*c83a76b0SSuyog Pawar                                     child_cost[j];
1769*c83a76b0SSuyog Pawar 
1770*c83a76b0SSuyog Pawar                                 /* set the CU valid flag */
1771*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_valid_cu = 1;
1772*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_enable_nxn = 0;
1773*c83a76b0SSuyog Pawar 
1774*c83a76b0SSuyog Pawar                                 /* storing the modes to intra8  analyse */
1775*c83a76b0SSuyog Pawar 
1776*c83a76b0SSuyog Pawar                                 /* store the best 8x8 modes 8x8 tu */
1777*c83a76b0SSuyog Pawar                                 memcpy(
1778*c83a76b0SSuyog Pawar                                     &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
1779*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1780*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * (NUM_BEST_MODES));
1781*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1782*c83a76b0SSuyog Pawar 
1783*c83a76b0SSuyog Pawar                                 /* store the best 8x8 modes 4x4 tu */
1784*c83a76b0SSuyog Pawar                                 memcpy(
1785*c83a76b0SSuyog Pawar                                     &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
1786*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1787*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * (NUM_BEST_MODES));
1788*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
1789*c83a76b0SSuyog Pawar 
1790*c83a76b0SSuyog Pawar                                 /* NXN modes not evaluated hence set to 255 */
1791*c83a76b0SSuyog Pawar                                 memset(
1792*c83a76b0SSuyog Pawar                                     &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1793*c83a76b0SSuyog Pawar                                     255,
1794*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1795*c83a76b0SSuyog Pawar                             }
1796*c83a76b0SSuyog Pawar 
1797*c83a76b0SSuyog Pawar                             ihevce_set_nbr_map(
1798*c83a76b0SSuyog Pawar                                 ps_ctxt->pu1_ctb_nbr_map,
1799*c83a76b0SSuyog Pawar                                 ps_ctxt->i4_nbr_map_strd,
1800*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
1801*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
1802*c83a76b0SSuyog Pawar                                 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
1803*c83a76b0SSuyog Pawar                                 0);
1804*c83a76b0SSuyog Pawar                         }
1805*c83a76b0SSuyog Pawar #if 1  //DISBLE_CHILD_CU_EVAL_L0_IPE //1
1806*c83a76b0SSuyog Pawar                         else
1807*c83a76b0SSuyog Pawar                         {
1808*c83a76b0SSuyog Pawar                             for(j = 0; j < 4; j++)
1809*c83a76b0SSuyog Pawar                             {
1810*c83a76b0SSuyog Pawar                                 intra8_analyse_t *ps_intra8_analyse;
1811*c83a76b0SSuyog Pawar                                 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
1812*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
1813*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
1814*c83a76b0SSuyog Pawar                                 /* NXN modes not evaluated hence set to 255 */
1815*c83a76b0SSuyog Pawar                                 memset(
1816*c83a76b0SSuyog Pawar                                     &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1817*c83a76b0SSuyog Pawar                                     255,
1818*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1819*c83a76b0SSuyog Pawar 
1820*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_valid_cu = 0;
1821*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_enable_nxn = 0;
1822*c83a76b0SSuyog Pawar                             }
1823*c83a76b0SSuyog Pawar                             child_cost_least = MAX_INTRA_COST_IPE;
1824*c83a76b0SSuyog Pawar                         }
1825*c83a76b0SSuyog Pawar #endif
1826*c83a76b0SSuyog Pawar                         //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
1827*c83a76b0SSuyog Pawar                         //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
1828*c83a76b0SSuyog Pawar 
1829*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u1_cu_size = 16;
1830*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_x0 =
1831*c83a76b0SSuyog Pawar                             gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1832*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_y0 =
1833*c83a76b0SSuyog Pawar                             gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1834*c83a76b0SSuyog Pawar 
1835*c83a76b0SSuyog Pawar                         //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1836*c83a76b0SSuyog Pawar 
1837*c83a76b0SSuyog Pawar                         /* Eval for TUSize = CuSize */
1838*c83a76b0SSuyog Pawar                         ihevce_mode_eval_filtering(
1839*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent,
1840*c83a76b0SSuyog Pawar                             ps_cu_node,
1841*c83a76b0SSuyog Pawar                             ps_ctxt,
1842*c83a76b0SSuyog Pawar                             ps_curr_src,
1843*c83a76b0SSuyog Pawar                             26,
1844*c83a76b0SSuyog Pawar                             &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1845*c83a76b0SSuyog Pawar                             &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1846*c83a76b0SSuyog Pawar                             step2_bypass,
1847*c83a76b0SSuyog Pawar                             1);
1848*c83a76b0SSuyog Pawar 
1849*c83a76b0SSuyog Pawar                         if(i4_enable_1cu_4tu)
1850*c83a76b0SSuyog Pawar                         {
1851*c83a76b0SSuyog Pawar                             /* Eval for TUSize = CuSize/2 */
1852*c83a76b0SSuyog Pawar                             ihevce_mode_eval_filtering(
1853*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent,
1854*c83a76b0SSuyog Pawar                                 ps_cu_node,
1855*c83a76b0SSuyog Pawar                                 ps_ctxt,
1856*c83a76b0SSuyog Pawar                                 ps_curr_src,
1857*c83a76b0SSuyog Pawar                                 26,
1858*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1859*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1860*c83a76b0SSuyog Pawar                                 step2_bypass,
1861*c83a76b0SSuyog Pawar                                 0);
1862*c83a76b0SSuyog Pawar                         }
1863*c83a76b0SSuyog Pawar                         else
1864*c83a76b0SSuyog Pawar                         {
1865*c83a76b0SSuyog Pawar                             /* 4TU not evaluated :  4tu modes set same as 1tu modes */
1866*c83a76b0SSuyog Pawar                             memcpy(
1867*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1868*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1869*c83a76b0SSuyog Pawar                                 NUM_BEST_MODES);
1870*c83a76b0SSuyog Pawar 
1871*c83a76b0SSuyog Pawar                             /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1872*c83a76b0SSuyog Pawar                             memcpy(
1873*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1874*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1875*c83a76b0SSuyog Pawar                                 NUM_BEST_MODES * sizeof(WORD32));
1876*c83a76b0SSuyog Pawar                         }
1877*c83a76b0SSuyog Pawar 
1878*c83a76b0SSuyog Pawar                         ps_ctxt->u1_disable_child_cu_decide = 0;
1879*c83a76b0SSuyog Pawar                         step2_bypass = 1;
1880*c83a76b0SSuyog Pawar 
1881*c83a76b0SSuyog Pawar                         /* Update parent cost */
1882*c83a76b0SSuyog Pawar                         parent_cost =
1883*c83a76b0SSuyog Pawar                             MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1884*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
1885*c83a76b0SSuyog Pawar 
1886*c83a76b0SSuyog Pawar                         /* Select the best mode to be populated as top and left nbr depending on the
1887*c83a76b0SSuyog Pawar                         4tu and 1tu cost */
1888*c83a76b0SSuyog Pawar                         if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
1889*c83a76b0SSuyog Pawar                            ps_cu_node->ps_parent->au4_best_cost_1tu[0])
1890*c83a76b0SSuyog Pawar                         {
1891*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->best_mode =
1892*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1893*c83a76b0SSuyog Pawar                         }
1894*c83a76b0SSuyog Pawar                         else
1895*c83a76b0SSuyog Pawar                         {
1896*c83a76b0SSuyog Pawar                             ps_cu_node->ps_parent->best_mode =
1897*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
1898*c83a76b0SSuyog Pawar                         }
1899*c83a76b0SSuyog Pawar 
1900*c83a76b0SSuyog Pawar                         /* store the 16x16 cost */
1901*c83a76b0SSuyog Pawar                         *pi4_intra_16_cost = parent_cost;
1902*c83a76b0SSuyog Pawar 
1903*c83a76b0SSuyog Pawar                         /* accumulate the 32x32 cost */
1904*c83a76b0SSuyog Pawar                         if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
1905*c83a76b0SSuyog Pawar                         {
1906*c83a76b0SSuyog Pawar                             *pi4_intra_32_cost = parent_cost;
1907*c83a76b0SSuyog Pawar                         }
1908*c83a76b0SSuyog Pawar                         else
1909*c83a76b0SSuyog Pawar                         {
1910*c83a76b0SSuyog Pawar                             *pi4_intra_32_cost += parent_cost;
1911*c83a76b0SSuyog Pawar                         }
1912*c83a76b0SSuyog Pawar 
1913*c83a76b0SSuyog Pawar                         /* set the CU valid flag */
1914*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_valid_cu = 1;
1915*c83a76b0SSuyog Pawar 
1916*c83a76b0SSuyog Pawar                         /* storing the modes to intra 16 analyse */
1917*c83a76b0SSuyog Pawar                         {
1918*c83a76b0SSuyog Pawar                             /* store the best 16x16 modes 16x16 tu */
1919*c83a76b0SSuyog Pawar                             memcpy(
1920*c83a76b0SSuyog Pawar                                 &ps_intra16_analyse->au1_best_modes_16x16_tu[0],
1921*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1922*c83a76b0SSuyog Pawar                                 sizeof(UWORD8) * NUM_BEST_MODES);
1923*c83a76b0SSuyog Pawar                             ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1924*c83a76b0SSuyog Pawar 
1925*c83a76b0SSuyog Pawar                             /* store the best 16x16 modes 8x8 tu */
1926*c83a76b0SSuyog Pawar                             memcpy(
1927*c83a76b0SSuyog Pawar                                 &ps_intra16_analyse->au1_best_modes_8x8_tu[0],
1928*c83a76b0SSuyog Pawar                                 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1929*c83a76b0SSuyog Pawar                                 sizeof(UWORD8) * NUM_BEST_MODES);
1930*c83a76b0SSuyog Pawar                             ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1931*c83a76b0SSuyog Pawar                         }
1932*c83a76b0SSuyog Pawar 
1933*c83a76b0SSuyog Pawar                         parent_best_mode = ps_cu_node->ps_parent->best_mode;
1934*c83a76b0SSuyog Pawar                         if(parent_cost <=
1935*c83a76b0SSuyog Pawar                            child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
1936*c83a76b0SSuyog Pawar                                                LAMBDA_Q_SHIFT))  //|| identical_modes)
1937*c83a76b0SSuyog Pawar                         {
1938*c83a76b0SSuyog Pawar                             WORD32 i4_q_scale_q3_mod;
1939*c83a76b0SSuyog Pawar                             WORD8 i1_cu_possible_qp;
1940*c83a76b0SSuyog Pawar                             WORD32 i4_act_factor;
1941*c83a76b0SSuyog Pawar                             //choose parent CU
1942*c83a76b0SSuyog Pawar 
1943*c83a76b0SSuyog Pawar                             ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1944*c83a76b0SSuyog Pawar 
1945*c83a76b0SSuyog Pawar                             /* set the 16x16 non split flag */
1946*c83a76b0SSuyog Pawar                             ps_intra16_analyse->b1_split_flag = 0;
1947*c83a76b0SSuyog Pawar 
1948*c83a76b0SSuyog Pawar                             /*As 16*16 has won, pick L1 8x8 qp which maps
1949*c83a76b0SSuyog Pawar                             to L0 16x16 Qp*/
1950*c83a76b0SSuyog Pawar                             ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
1951*c83a76b0SSuyog Pawar                             ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
1952*c83a76b0SSuyog Pawar                             i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1953*c83a76b0SSuyog Pawar                                 ps_ctxt->i4_qscale,
1954*c83a76b0SSuyog Pawar                                 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
1955*c83a76b0SSuyog Pawar                                 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1956*c83a76b0SSuyog Pawar                                 f_strength,
1957*c83a76b0SSuyog Pawar                                 &i4_act_factor,
1958*c83a76b0SSuyog Pawar                                 &i4_q_scale_q3_mod,
1959*c83a76b0SSuyog Pawar                                 ps_ctxt->ps_rc_quant_ctxt);
1960*c83a76b0SSuyog Pawar 
1961*c83a76b0SSuyog Pawar                             /* cost accumalation of best cu size candiate */
1962*c83a76b0SSuyog Pawar                             i8_frame_acc_satd_cost += parent_cost;
1963*c83a76b0SSuyog Pawar 
1964*c83a76b0SSuyog Pawar                             /* satd and mpm bits accumalation of best cu size candiate */
1965*c83a76b0SSuyog Pawar                             i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
1966*c83a76b0SSuyog Pawar 
1967*c83a76b0SSuyog Pawar                             /*satd/mod_qp accumulation of best cu */
1968*c83a76b0SSuyog Pawar                             i8_frame_acc_satd_by_modqp_q10 +=
1969*c83a76b0SSuyog Pawar                                 ((LWORD64)ps_cu_node->ps_parent->best_satd
1970*c83a76b0SSuyog Pawar                                  << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1971*c83a76b0SSuyog Pawar                                 i4_q_scale_q3_mod;
1972*c83a76b0SSuyog Pawar 
1973*c83a76b0SSuyog Pawar                             /* Accumalate mode bits for all child blocks */
1974*c83a76b0SSuyog Pawar                             i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
1975*c83a76b0SSuyog Pawar 
1976*c83a76b0SSuyog Pawar                             blk_cnt += 4;
1977*c83a76b0SSuyog Pawar                             ps_ed_blk_l1 += 4;
1978*c83a76b0SSuyog Pawar                             //ps_row_cu++;
1979*c83a76b0SSuyog Pawar                         }
1980*c83a76b0SSuyog Pawar                         else
1981*c83a76b0SSuyog Pawar                         {
1982*c83a76b0SSuyog Pawar                             //choose child CU
1983*c83a76b0SSuyog Pawar                             WORD8 i1_cu_possible_qp;
1984*c83a76b0SSuyog Pawar                             WORD32 i4_act_factor;
1985*c83a76b0SSuyog Pawar                             WORD32 i4_q_scale_q3_mod;
1986*c83a76b0SSuyog Pawar 
1987*c83a76b0SSuyog Pawar                             ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1988*c83a76b0SSuyog Pawar                             ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2);
1989*c83a76b0SSuyog Pawar                             i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1990*c83a76b0SSuyog Pawar                                 ps_ctxt->i4_qscale,
1991*c83a76b0SSuyog Pawar                                 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1],
1992*c83a76b0SSuyog Pawar                                 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
1993*c83a76b0SSuyog Pawar                                 f_strength,
1994*c83a76b0SSuyog Pawar                                 &i4_act_factor,
1995*c83a76b0SSuyog Pawar                                 &i4_q_scale_q3_mod,
1996*c83a76b0SSuyog Pawar                                 ps_ctxt->ps_rc_quant_ctxt);
1997*c83a76b0SSuyog Pawar 
1998*c83a76b0SSuyog Pawar                             /* set the 16x16 split flag */
1999*c83a76b0SSuyog Pawar                             ps_intra16_analyse->b1_split_flag = 1;
2000*c83a76b0SSuyog Pawar 
2001*c83a76b0SSuyog Pawar                             for(j = 0; j < 4; j++)
2002*c83a76b0SSuyog Pawar                             {
2003*c83a76b0SSuyog Pawar                                 ihevce_update_cand_list(
2004*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
2005*c83a76b0SSuyog Pawar 
2006*c83a76b0SSuyog Pawar                                 if((IHEVCE_QUALITY_P3 > i4_quality_preset))
2007*c83a76b0SSuyog Pawar                                 {
2008*c83a76b0SSuyog Pawar                                     WORD32 k;
2009*c83a76b0SSuyog Pawar                                     intra8_analyse_t *ps_intra8_analyse;
2010*c83a76b0SSuyog Pawar                                     ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
2011*c83a76b0SSuyog Pawar 
2012*c83a76b0SSuyog Pawar                                     for(k = 0; k < 4; k++)
2013*c83a76b0SSuyog Pawar                                     {
2014*c83a76b0SSuyog Pawar                                         /* Populate best 3 nxn modes */
2015*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[k][0] =
2016*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
2017*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[k][1] =
2018*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j]
2019*c83a76b0SSuyog Pawar                                                 ->au1_best_mode_4tu[1];  //(ps_ed + 1)->best_mode;
2020*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[k][2] =
2021*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j]
2022*c83a76b0SSuyog Pawar                                                 ->au1_best_mode_4tu[2];  //(ps_ed + 2)->best_mode;
2023*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255;
2024*c83a76b0SSuyog Pawar                                     }
2025*c83a76b0SSuyog Pawar                                 }
2026*c83a76b0SSuyog Pawar                                 /*accum satd/qp for all child block*/
2027*c83a76b0SSuyog Pawar                                 i8_frame_acc_satd_by_modqp_q10 +=
2028*c83a76b0SSuyog Pawar                                     ((LWORD64)child_satd[j]
2029*c83a76b0SSuyog Pawar                                      << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2030*c83a76b0SSuyog Pawar                                     i4_q_scale_q3_mod;
2031*c83a76b0SSuyog Pawar 
2032*c83a76b0SSuyog Pawar                                 /* Accumalate mode bits for all child blocks */
2033*c83a76b0SSuyog Pawar                                 i8_frame_acc_mode_bits_cost +=
2034*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
2035*c83a76b0SSuyog Pawar 
2036*c83a76b0SSuyog Pawar                                 /* satd and mpm bits accumalation of best cu size candiate */
2037*c83a76b0SSuyog Pawar                                 i4_ctb_acc_satd += child_satd[j];
2038*c83a76b0SSuyog Pawar 
2039*c83a76b0SSuyog Pawar                                 blk_cnt += 1;
2040*c83a76b0SSuyog Pawar                                 ps_ed_blk_l1 += 1;
2041*c83a76b0SSuyog Pawar                                 //ps_row_cu++;
2042*c83a76b0SSuyog Pawar                             }
2043*c83a76b0SSuyog Pawar 
2044*c83a76b0SSuyog Pawar                             /* cost accumalation of best cu size candiate */
2045*c83a76b0SSuyog Pawar                             i8_frame_acc_satd_cost += child_cost_least;
2046*c83a76b0SSuyog Pawar                         }
2047*c83a76b0SSuyog Pawar 
2048*c83a76b0SSuyog Pawar                     }  //else of EIID
2049*c83a76b0SSuyog Pawar #endif
2050*c83a76b0SSuyog Pawar                 }  // if(merge_16x16_l1)
2051*c83a76b0SSuyog Pawar                 /* MAX CU SIZE 8x8 */
2052*c83a76b0SSuyog Pawar                 else
2053*c83a76b0SSuyog Pawar                 {
2054*c83a76b0SSuyog Pawar #if IP_DBG_L1_l2
2055*c83a76b0SSuyog Pawar                     for(i = 0; i < 4; i++)
2056*c83a76b0SSuyog Pawar                     {
2057*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u1_cu_size = 8;
2058*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_x0 =
2059*c83a76b0SSuyog Pawar                             gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2060*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->u2_y0 =
2061*c83a76b0SSuyog Pawar                             gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2062*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
2063*c83a76b0SSuyog Pawar 
2064*c83a76b0SSuyog Pawar                         ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2065*c83a76b0SSuyog Pawar                         blk_cnt++;
2066*c83a76b0SSuyog Pawar                         ps_ed_blk_l1++;
2067*c83a76b0SSuyog Pawar                         ps_row_cu++;
2068*c83a76b0SSuyog Pawar                         merge_64x64 = 0;
2069*c83a76b0SSuyog Pawar                     }
2070*c83a76b0SSuyog Pawar #else
2071*c83a76b0SSuyog Pawar 
2072*c83a76b0SSuyog Pawar                     /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */
2073*c83a76b0SSuyog Pawar                     if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
2074*c83a76b0SSuyog Pawar                     {
2075*c83a76b0SSuyog Pawar                         WORD32 i4_q_scale_q3_mod;
2076*c83a76b0SSuyog Pawar                         WORD8 i1_cu_possible_qp;
2077*c83a76b0SSuyog Pawar                         WORD32 i4_act_factor;
2078*c83a76b0SSuyog Pawar 
2079*c83a76b0SSuyog Pawar                         merge_64x64 = 0;
2080*c83a76b0SSuyog Pawar 
2081*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_merge_flag = 0;
2082*c83a76b0SSuyog Pawar 
2083*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
2084*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255;
2085*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
2086*c83a76b0SSuyog Pawar 
2087*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
2088*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255;
2089*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
2090*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_split_flag = 1;
2091*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_valid_cu = 0;
2092*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_merge_flag = 0;
2093*c83a76b0SSuyog Pawar 
2094*c83a76b0SSuyog Pawar                         for(i = 0; i < 4; i++)
2095*c83a76b0SSuyog Pawar                         {
2096*c83a76b0SSuyog Pawar                             intra8_analyse_t *ps_intra8_analyse;
2097*c83a76b0SSuyog Pawar                             WORD32 ctr_sub_cu;
2098*c83a76b0SSuyog Pawar 
2099*c83a76b0SSuyog Pawar                             cu_pos_x = gau1_cu_pos_x[blk_cnt];
2100*c83a76b0SSuyog Pawar                             cu_pos_y = gau1_cu_pos_y[blk_cnt];
2101*c83a76b0SSuyog Pawar 
2102*c83a76b0SSuyog Pawar                             if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
2103*c83a76b0SSuyog Pawar                             {
2104*c83a76b0SSuyog Pawar                                 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
2105*c83a76b0SSuyog Pawar 
2106*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_valid_cu = 0;
2107*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_enable_nxn = 0;
2108*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255;
2109*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255;
2110*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255;
2111*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255;
2112*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
2113*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
2114*c83a76b0SSuyog Pawar 
2115*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->u1_cu_size = 8;
2116*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->u2_x0 =
2117*c83a76b0SSuyog Pawar                                     gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2118*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->u2_y0 =
2119*c83a76b0SSuyog Pawar                                     gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2120*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->best_mode =
2121*c83a76b0SSuyog Pawar                                     INTRA_DC;  //ps_ed_blk_l1->best_mode;
2122*c83a76b0SSuyog Pawar 
2123*c83a76b0SSuyog Pawar                                 /* fill in the first modes as invalid */
2124*c83a76b0SSuyog Pawar 
2125*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
2126*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
2127*c83a76b0SSuyog Pawar                                     INTRA_DC;  //for safery. Since update_cand_list will set num_modes as 3
2128*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
2129*c83a76b0SSuyog Pawar 
2130*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
2131*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
2132*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
2133*c83a76b0SSuyog Pawar 
2134*c83a76b0SSuyog Pawar                                 ihevce_update_cand_list(
2135*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2136*c83a76b0SSuyog Pawar 
2137*c83a76b0SSuyog Pawar                                 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
2138*c83a76b0SSuyog Pawar                                 //ps_row_cu->u1_num_intra_rdopt_cands = 0;
2139*c83a76b0SSuyog Pawar 
2140*c83a76b0SSuyog Pawar                                 for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++)
2141*c83a76b0SSuyog Pawar                                 {
2142*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] =
2143*c83a76b0SSuyog Pawar                                         INTRA_DC;
2144*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] =
2145*c83a76b0SSuyog Pawar                                         INTRA_DC;
2146*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] =
2147*c83a76b0SSuyog Pawar                                         MAX_INTRA_COST_IPE;
2148*c83a76b0SSuyog Pawar 
2149*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] =
2150*c83a76b0SSuyog Pawar                                         MAX_INTRA_COST_IPE;
2151*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost =
2152*c83a76b0SSuyog Pawar                                         MAX_INTRA_COST_IPE;
2153*c83a76b0SSuyog Pawar                                 }
2154*c83a76b0SSuyog Pawar 
2155*c83a76b0SSuyog Pawar                                 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2156*c83a76b0SSuyog Pawar                                     MAX_INTRA_COST_IPE;
2157*c83a76b0SSuyog Pawar 
2158*c83a76b0SSuyog Pawar                                 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
2159*c83a76b0SSuyog Pawar                                 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
2160*c83a76b0SSuyog Pawar                                 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2161*c83a76b0SSuyog Pawar                                     ps_ctxt->i4_qscale,
2162*c83a76b0SSuyog Pawar                                     ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
2163*c83a76b0SSuyog Pawar                                     ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2164*c83a76b0SSuyog Pawar                                     f_strength,
2165*c83a76b0SSuyog Pawar                                     &i4_act_factor,
2166*c83a76b0SSuyog Pawar                                     &i4_q_scale_q3_mod,
2167*c83a76b0SSuyog Pawar                                     ps_ctxt->ps_rc_quant_ctxt);
2168*c83a76b0SSuyog Pawar 
2169*c83a76b0SSuyog Pawar                                 /* set neighbours even if intra is not evaluated, since source is always available. */
2170*c83a76b0SSuyog Pawar                                 ihevce_set_nbr_map(
2171*c83a76b0SSuyog Pawar                                     ps_ctxt->pu1_ctb_nbr_map,
2172*c83a76b0SSuyog Pawar                                     ps_ctxt->i4_nbr_map_strd,
2173*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_parent->u2_x0 << 1,
2174*c83a76b0SSuyog Pawar                                     ps_cu_node->ps_parent->u2_y0 << 1,
2175*c83a76b0SSuyog Pawar                                     (ps_cu_node->ps_parent->u1_cu_size >> 2),
2176*c83a76b0SSuyog Pawar                                     1);
2177*c83a76b0SSuyog Pawar 
2178*c83a76b0SSuyog Pawar                                 //ps_row_cu++;
2179*c83a76b0SSuyog Pawar                             }
2180*c83a76b0SSuyog Pawar                             blk_cnt++;
2181*c83a76b0SSuyog Pawar                             ps_ed_blk_l1++;
2182*c83a76b0SSuyog Pawar                         }
2183*c83a76b0SSuyog Pawar                     }
2184*c83a76b0SSuyog Pawar                     else
2185*c83a76b0SSuyog Pawar                     {
2186*c83a76b0SSuyog Pawar                         //cu_intra_cand_t *ps_cu_intra_cand;
2187*c83a76b0SSuyog Pawar                         WORD8 i1_cu_possible_qp;
2188*c83a76b0SSuyog Pawar                         WORD32 i4_act_factor;
2189*c83a76b0SSuyog Pawar                         WORD32 i4_q_scale_q3_mod;
2190*c83a76b0SSuyog Pawar 
2191*c83a76b0SSuyog Pawar                         ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
2192*c83a76b0SSuyog Pawar                         ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
2193*c83a76b0SSuyog Pawar                         i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2194*c83a76b0SSuyog Pawar                             ps_ctxt->i4_qscale,
2195*c83a76b0SSuyog Pawar                             ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
2196*c83a76b0SSuyog Pawar                             ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2197*c83a76b0SSuyog Pawar                             f_strength,
2198*c83a76b0SSuyog Pawar                             &i4_act_factor,
2199*c83a76b0SSuyog Pawar                             &i4_q_scale_q3_mod,
2200*c83a76b0SSuyog Pawar                             ps_ctxt->ps_rc_quant_ctxt);
2201*c83a76b0SSuyog Pawar 
2202*c83a76b0SSuyog Pawar                         /* 64x64 merge is not possible */
2203*c83a76b0SSuyog Pawar                         merge_64x64 = 0;
2204*c83a76b0SSuyog Pawar 
2205*c83a76b0SSuyog Pawar                         ps_intra32_analyse->b1_merge_flag = 0;
2206*c83a76b0SSuyog Pawar 
2207*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_merge_flag = 0;
2208*c83a76b0SSuyog Pawar 
2209*c83a76b0SSuyog Pawar                         /* by default 16x16 modes are set to default values DC and Planar */
2210*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0;
2211*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1;
2212*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
2213*c83a76b0SSuyog Pawar 
2214*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0;
2215*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1;
2216*c83a76b0SSuyog Pawar                         ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
2217*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_split_flag = 1;
2218*c83a76b0SSuyog Pawar                         ps_intra16_analyse->b1_valid_cu = 1;
2219*c83a76b0SSuyog Pawar 
2220*c83a76b0SSuyog Pawar                         for(i = 0; i < 4; i++)
2221*c83a76b0SSuyog Pawar                         {
2222*c83a76b0SSuyog Pawar                             intra8_analyse_t *ps_intra8_analyse;
2223*c83a76b0SSuyog Pawar                             cu_pos_x = gau1_cu_pos_x[blk_cnt];
2224*c83a76b0SSuyog Pawar                             cu_pos_y = gau1_cu_pos_y[blk_cnt];
2225*c83a76b0SSuyog Pawar                             if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
2226*c83a76b0SSuyog Pawar                             {
2227*c83a76b0SSuyog Pawar                                 //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand;
2228*c83a76b0SSuyog Pawar                                 //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
2229*c83a76b0SSuyog Pawar 
2230*c83a76b0SSuyog Pawar                                 //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
2231*c83a76b0SSuyog Pawar 
2232*c83a76b0SSuyog Pawar                                 child_cost_least = 0;
2233*c83a76b0SSuyog Pawar 
2234*c83a76b0SSuyog Pawar                                 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
2235*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->u1_cu_size = 8;
2236*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->u2_x0 =
2237*c83a76b0SSuyog Pawar                                     gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2238*c83a76b0SSuyog Pawar                                 ps_cu_node->ps_parent->u2_y0 =
2239*c83a76b0SSuyog Pawar                                     gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2240*c83a76b0SSuyog Pawar 
2241*c83a76b0SSuyog Pawar                                 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
2242*c83a76b0SSuyog Pawar 
2243*c83a76b0SSuyog Pawar                                 /*EARLY DECISION 8x8 block */
2244*c83a76b0SSuyog Pawar                                 ihevce_pu_calc_8x8_blk(
2245*c83a76b0SSuyog Pawar                                     ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector);
2246*c83a76b0SSuyog Pawar                                 for(j = 0; j < 4; j++)
2247*c83a76b0SSuyog Pawar                                 {
2248*c83a76b0SSuyog Pawar                                     child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost;
2249*c83a76b0SSuyog Pawar                                     child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
2250*c83a76b0SSuyog Pawar                                 }
2251*c83a76b0SSuyog Pawar 
2252*c83a76b0SSuyog Pawar                                 /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */
2253*c83a76b0SSuyog Pawar                                 if(0 == ps_ctxt->u1_disable_child_cu_decide)
2254*c83a76b0SSuyog Pawar                                 {
2255*c83a76b0SSuyog Pawar                                     ihevce_set_nbr_map(
2256*c83a76b0SSuyog Pawar                                         ps_ctxt->pu1_ctb_nbr_map,
2257*c83a76b0SSuyog Pawar                                         ps_ctxt->i4_nbr_map_strd,
2258*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_parent->u2_x0 << 1,
2259*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_parent->u2_y0 << 1,
2260*c83a76b0SSuyog Pawar                                         (ps_cu_node->ps_parent->u1_cu_size >> 2),
2261*c83a76b0SSuyog Pawar                                         0);
2262*c83a76b0SSuyog Pawar 
2263*c83a76b0SSuyog Pawar                                     //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
2264*c83a76b0SSuyog Pawar 
2265*c83a76b0SSuyog Pawar                                     /* Eval for TUSize = CuSize */
2266*c83a76b0SSuyog Pawar                                     ihevce_mode_eval_filtering(
2267*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_parent,
2268*c83a76b0SSuyog Pawar                                         ps_cu_node,
2269*c83a76b0SSuyog Pawar                                         ps_ctxt,
2270*c83a76b0SSuyog Pawar                                         ps_curr_src,
2271*c83a76b0SSuyog Pawar                                         26,
2272*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2273*c83a76b0SSuyog Pawar                                         &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2274*c83a76b0SSuyog Pawar                                         step2_bypass,
2275*c83a76b0SSuyog Pawar                                         1);
2276*c83a76b0SSuyog Pawar 
2277*c83a76b0SSuyog Pawar                                     if(i4_enable_1cu_4tu)
2278*c83a76b0SSuyog Pawar                                     {
2279*c83a76b0SSuyog Pawar                                         /* Eval for TUSize = CuSize/2 */
2280*c83a76b0SSuyog Pawar                                         ihevce_mode_eval_filtering(
2281*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_parent,
2282*c83a76b0SSuyog Pawar                                             ps_cu_node,
2283*c83a76b0SSuyog Pawar                                             ps_ctxt,
2284*c83a76b0SSuyog Pawar                                             ps_curr_src,
2285*c83a76b0SSuyog Pawar                                             26,
2286*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2287*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2288*c83a76b0SSuyog Pawar                                             step2_bypass,
2289*c83a76b0SSuyog Pawar                                             0);
2290*c83a76b0SSuyog Pawar                                     }
2291*c83a76b0SSuyog Pawar                                     else
2292*c83a76b0SSuyog Pawar                                     {
2293*c83a76b0SSuyog Pawar                                         /* 4TU not evaluated :  4tu modes set same as 1tu modes */
2294*c83a76b0SSuyog Pawar                                         memcpy(
2295*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2296*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2297*c83a76b0SSuyog Pawar                                             NUM_BEST_MODES);
2298*c83a76b0SSuyog Pawar 
2299*c83a76b0SSuyog Pawar                                         /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
2300*c83a76b0SSuyog Pawar                                         memcpy(
2301*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2302*c83a76b0SSuyog Pawar                                             &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2303*c83a76b0SSuyog Pawar                                             NUM_BEST_MODES * sizeof(WORD32));
2304*c83a76b0SSuyog Pawar                                     }
2305*c83a76b0SSuyog Pawar 
2306*c83a76b0SSuyog Pawar                                     /* Update parent cost */
2307*c83a76b0SSuyog Pawar                                     parent_cost =
2308*c83a76b0SSuyog Pawar                                         MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2309*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
2310*c83a76b0SSuyog Pawar 
2311*c83a76b0SSuyog Pawar                                     /* Select the best mode to be populated as top and left nbr depending on the
2312*c83a76b0SSuyog Pawar                             4tu and 1tu cost */
2313*c83a76b0SSuyog Pawar                                     if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
2314*c83a76b0SSuyog Pawar                                        ps_cu_node->ps_parent->au4_best_cost_1tu[0])
2315*c83a76b0SSuyog Pawar                                     {
2316*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_parent->best_mode =
2317*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_parent->au1_best_mode_1tu[0];
2318*c83a76b0SSuyog Pawar                                     }
2319*c83a76b0SSuyog Pawar                                     else
2320*c83a76b0SSuyog Pawar                                     {
2321*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_parent->best_mode =
2322*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_parent->au1_best_mode_4tu[0];
2323*c83a76b0SSuyog Pawar                                     }
2324*c83a76b0SSuyog Pawar                                 }
2325*c83a76b0SSuyog Pawar 
2326*c83a76b0SSuyog Pawar                                 /* set the CU valid flag */
2327*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_valid_cu = 1;
2328*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->b1_enable_nxn = 0;
2329*c83a76b0SSuyog Pawar 
2330*c83a76b0SSuyog Pawar                                 /* storing the modes to intra 8 analyse */
2331*c83a76b0SSuyog Pawar 
2332*c83a76b0SSuyog Pawar                                 /* store the best 8x8 modes 8x8 tu */
2333*c83a76b0SSuyog Pawar                                 memcpy(
2334*c83a76b0SSuyog Pawar                                     &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
2335*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2336*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * (NUM_BEST_MODES));
2337*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
2338*c83a76b0SSuyog Pawar 
2339*c83a76b0SSuyog Pawar                                 /* store the best 8x8 modes 4x4 tu */
2340*c83a76b0SSuyog Pawar                                 memcpy(
2341*c83a76b0SSuyog Pawar                                     &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
2342*c83a76b0SSuyog Pawar                                     &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2343*c83a76b0SSuyog Pawar                                     sizeof(UWORD8) * (NUM_BEST_MODES));
2344*c83a76b0SSuyog Pawar                                 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
2345*c83a76b0SSuyog Pawar 
2346*c83a76b0SSuyog Pawar                                 /*As 8*8 has won, pick L1 4x4 qp which is equal to
2347*c83a76b0SSuyog Pawar                                 L1 8x8 Qp*/
2348*c83a76b0SSuyog Pawar                                 //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp;
2349*c83a76b0SSuyog Pawar                                 //ps_row_cu->i4_act_factor[0][1] = i4_act_factor;
2350*c83a76b0SSuyog Pawar 
2351*c83a76b0SSuyog Pawar                                 parent_best_mode = ps_cu_node->ps_parent->best_mode;
2352*c83a76b0SSuyog Pawar                                 if(parent_cost <=
2353*c83a76b0SSuyog Pawar                                    child_cost_least +
2354*c83a76b0SSuyog Pawar                                        (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT))
2355*c83a76b0SSuyog Pawar                                 {
2356*c83a76b0SSuyog Pawar                                     /*CU = 4TU */
2357*c83a76b0SSuyog Pawar                                     ihevce_update_cand_list(
2358*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2359*c83a76b0SSuyog Pawar 
2360*c83a76b0SSuyog Pawar                                     /* store the child 8x8 costs */
2361*c83a76b0SSuyog Pawar                                     pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2362*c83a76b0SSuyog Pawar                                         parent_cost;
2363*c83a76b0SSuyog Pawar 
2364*c83a76b0SSuyog Pawar                                     /* cost accumalation of best cu size candiate */
2365*c83a76b0SSuyog Pawar                                     i8_frame_acc_satd_cost += parent_cost;
2366*c83a76b0SSuyog Pawar 
2367*c83a76b0SSuyog Pawar                                     /*satd/mod_qp accumulation of best cu */
2368*c83a76b0SSuyog Pawar                                     i8_frame_acc_satd_by_modqp_q10 +=
2369*c83a76b0SSuyog Pawar                                         ((LWORD64)ps_cu_node->ps_parent->best_satd
2370*c83a76b0SSuyog Pawar                                          << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2371*c83a76b0SSuyog Pawar                                         i4_q_scale_q3_mod;
2372*c83a76b0SSuyog Pawar 
2373*c83a76b0SSuyog Pawar                                     /* Accumalate mode bits for all child blocks */
2374*c83a76b0SSuyog Pawar                                     i8_frame_acc_mode_bits_cost +=
2375*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_parent->u2_mode_bits_cost;
2376*c83a76b0SSuyog Pawar 
2377*c83a76b0SSuyog Pawar                                     /* satd and mpm bits accumalation of best cu size candiate */
2378*c83a76b0SSuyog Pawar                                     i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
2379*c83a76b0SSuyog Pawar 
2380*c83a76b0SSuyog Pawar                                     /* accumulate the 16x16 cost*/
2381*c83a76b0SSuyog Pawar                                     if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
2382*c83a76b0SSuyog Pawar                                     {
2383*c83a76b0SSuyog Pawar                                         *pi4_intra_16_cost = parent_cost;
2384*c83a76b0SSuyog Pawar                                     }
2385*c83a76b0SSuyog Pawar                                     else
2386*c83a76b0SSuyog Pawar                                     {
2387*c83a76b0SSuyog Pawar                                         *pi4_intra_16_cost += parent_cost;
2388*c83a76b0SSuyog Pawar                                     }
2389*c83a76b0SSuyog Pawar 
2390*c83a76b0SSuyog Pawar                                     /* accumulate the 32x32 cost*/
2391*c83a76b0SSuyog Pawar                                     if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
2392*c83a76b0SSuyog Pawar                                     {
2393*c83a76b0SSuyog Pawar                                         *pi4_intra_32_cost = parent_cost;
2394*c83a76b0SSuyog Pawar                                     }
2395*c83a76b0SSuyog Pawar                                     else
2396*c83a76b0SSuyog Pawar                                     {
2397*c83a76b0SSuyog Pawar                                         *pi4_intra_32_cost += parent_cost;
2398*c83a76b0SSuyog Pawar                                     }
2399*c83a76b0SSuyog Pawar                                 }
2400*c83a76b0SSuyog Pawar                                 else
2401*c83a76b0SSuyog Pawar                                 {
2402*c83a76b0SSuyog Pawar                                     /*CU = 4PU*/
2403*c83a76b0SSuyog Pawar                                     //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0;
2404*c83a76b0SSuyog Pawar                                     //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0;
2405*c83a76b0SSuyog Pawar                                     //ps_row_cu->u1_cu_size  = ps_cu_node->ps_parent->u1_cu_size;
2406*c83a76b0SSuyog Pawar 
2407*c83a76b0SSuyog Pawar                                     /* store the child 8x8 costs woth 4x4 pu summed cost */
2408*c83a76b0SSuyog Pawar                                     pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2409*c83a76b0SSuyog Pawar                                         (child_cost_least);
2410*c83a76b0SSuyog Pawar 
2411*c83a76b0SSuyog Pawar                                     /* accumulate the 16x16 cost*/
2412*c83a76b0SSuyog Pawar                                     if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
2413*c83a76b0SSuyog Pawar                                     {
2414*c83a76b0SSuyog Pawar                                         *pi4_intra_16_cost = child_cost_least;
2415*c83a76b0SSuyog Pawar                                     }
2416*c83a76b0SSuyog Pawar                                     else
2417*c83a76b0SSuyog Pawar                                     {
2418*c83a76b0SSuyog Pawar                                         *pi4_intra_16_cost += child_cost_least;
2419*c83a76b0SSuyog Pawar                                     }
2420*c83a76b0SSuyog Pawar 
2421*c83a76b0SSuyog Pawar                                     /* cost accumalation of best cu size candiate */
2422*c83a76b0SSuyog Pawar                                     i8_frame_acc_satd_cost += child_cost_least;
2423*c83a76b0SSuyog Pawar 
2424*c83a76b0SSuyog Pawar                                     for(j = 0; j < 4; j++)
2425*c83a76b0SSuyog Pawar                                     {
2426*c83a76b0SSuyog Pawar                                         /*satd/qp accumualtion*/
2427*c83a76b0SSuyog Pawar                                         i8_frame_acc_satd_by_modqp_q10 +=
2428*c83a76b0SSuyog Pawar                                             ((LWORD64)child_satd[j]
2429*c83a76b0SSuyog Pawar                                              << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2430*c83a76b0SSuyog Pawar                                             i4_q_scale_q3_mod;
2431*c83a76b0SSuyog Pawar 
2432*c83a76b0SSuyog Pawar                                         /* Accumalate mode bits for all child blocks */
2433*c83a76b0SSuyog Pawar                                         i8_frame_acc_mode_bits_cost +=
2434*c83a76b0SSuyog Pawar                                             ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
2435*c83a76b0SSuyog Pawar 
2436*c83a76b0SSuyog Pawar                                         /* satd and mpm bits accumalation of best cu size candiate */
2437*c83a76b0SSuyog Pawar                                         i4_ctb_acc_satd += child_satd[j];
2438*c83a76b0SSuyog Pawar                                     }
2439*c83a76b0SSuyog Pawar 
2440*c83a76b0SSuyog Pawar                                     /* accumulate the 32x32 cost*/
2441*c83a76b0SSuyog Pawar                                     if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
2442*c83a76b0SSuyog Pawar                                     {
2443*c83a76b0SSuyog Pawar                                         *pi4_intra_32_cost = child_cost_least;
2444*c83a76b0SSuyog Pawar                                     }
2445*c83a76b0SSuyog Pawar                                     else
2446*c83a76b0SSuyog Pawar                                     {
2447*c83a76b0SSuyog Pawar                                         *pi4_intra_32_cost += child_cost_least;
2448*c83a76b0SSuyog Pawar                                     }
2449*c83a76b0SSuyog Pawar 
2450*c83a76b0SSuyog Pawar                                     ps_intra8_analyse->b1_enable_nxn = 1;
2451*c83a76b0SSuyog Pawar 
2452*c83a76b0SSuyog Pawar                                     /* Insert the best 8x8 modes unconditionally */
2453*c83a76b0SSuyog Pawar 
2454*c83a76b0SSuyog Pawar                                     x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
2455*c83a76b0SSuyog Pawar                                     y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
2456*c83a76b0SSuyog Pawar                                     size = ps_cu_node->u1_cu_size >> 2;
2457*c83a76b0SSuyog Pawar 
2458*c83a76b0SSuyog Pawar                                     ps_ctxt->au1_ctb_mode_map[y][x] =
2459*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[0]->best_mode;
2460*c83a76b0SSuyog Pawar                                     ps_ctxt->au1_ctb_mode_map[y][x + 1] =
2461*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[1]->best_mode;
2462*c83a76b0SSuyog Pawar                                     ps_ctxt->au1_ctb_mode_map[y + 1][x] =
2463*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[2]->best_mode;
2464*c83a76b0SSuyog Pawar                                     ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] =
2465*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[3]->best_mode;
2466*c83a76b0SSuyog Pawar                                 }
2467*c83a76b0SSuyog Pawar                                 /* NXN mode population */
2468*c83a76b0SSuyog Pawar                                 for(j = 0; j < 4; j++)
2469*c83a76b0SSuyog Pawar                                 {
2470*c83a76b0SSuyog Pawar                                     cand_mode_list[0] =
2471*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
2472*c83a76b0SSuyog Pawar                                     cand_mode_list[1] =
2473*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1];
2474*c83a76b0SSuyog Pawar                                     cand_mode_list[2] =
2475*c83a76b0SSuyog Pawar                                         ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2];
2476*c83a76b0SSuyog Pawar 
2477*c83a76b0SSuyog Pawar                                     if(1)
2478*c83a76b0SSuyog Pawar                                     {
2479*c83a76b0SSuyog Pawar                                         /* Populate best 3 nxn modes */
2480*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][0] =
2481*c83a76b0SSuyog Pawar                                             cand_mode_list[0];
2482*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][1] =
2483*c83a76b0SSuyog Pawar                                             cand_mode_list[1];  //(ps_ed + 1)->best_mode;
2484*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][2] =
2485*c83a76b0SSuyog Pawar                                             cand_mode_list[2];  //(ps_ed + 2)->best_mode;
2486*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
2487*c83a76b0SSuyog Pawar 
2488*c83a76b0SSuyog Pawar                                         //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4);
2489*c83a76b0SSuyog Pawar                                     }
2490*c83a76b0SSuyog Pawar                                     /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */
2491*c83a76b0SSuyog Pawar 
2492*c83a76b0SSuyog Pawar                                     else /* IHEVCE_QUALITY_P0 == i4_quality_preset */
2493*c83a76b0SSuyog Pawar                                     {
2494*c83a76b0SSuyog Pawar                                         /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/
2495*c83a76b0SSuyog Pawar                                         ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0;
2496*c83a76b0SSuyog Pawar                                     }
2497*c83a76b0SSuyog Pawar 
2498*c83a76b0SSuyog Pawar                                     ps_intra8_analyse
2499*c83a76b0SSuyog Pawar                                         ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255;
2500*c83a76b0SSuyog Pawar                                 }
2501*c83a76b0SSuyog Pawar 
2502*c83a76b0SSuyog Pawar                                 //ps_row_cu++;
2503*c83a76b0SSuyog Pawar                             }
2504*c83a76b0SSuyog Pawar                             else
2505*c83a76b0SSuyog Pawar                             {
2506*c83a76b0SSuyog Pawar                                 /* For Incomplete CTB, 16x16 is not valid */
2507*c83a76b0SSuyog Pawar                                 ps_intra16_analyse->b1_valid_cu = 0;
2508*c83a76b0SSuyog Pawar                             }
2509*c83a76b0SSuyog Pawar                             blk_cnt++;
2510*c83a76b0SSuyog Pawar                             ps_ed_blk_l1++;
2511*c83a76b0SSuyog Pawar                         }
2512*c83a76b0SSuyog Pawar                         //ps_ed_blk_l2 ++;
2513*c83a76b0SSuyog Pawar                     }  //else of EIID
2514*c83a76b0SSuyog Pawar #endif
2515*c83a76b0SSuyog Pawar                 }
2516*c83a76b0SSuyog Pawar             }
2517*c83a76b0SSuyog Pawar             else
2518*c83a76b0SSuyog Pawar             {
2519*c83a76b0SSuyog Pawar                 /* For incomplete CTB, init valid CU to 0 */
2520*c83a76b0SSuyog Pawar                 ps_ed_blk_l1++;
2521*c83a76b0SSuyog Pawar                 ps_intra32_analyse->b1_valid_cu = 0;
2522*c83a76b0SSuyog Pawar                 ps_intra16_analyse[0].b1_valid_cu = 0;
2523*c83a76b0SSuyog Pawar                 blk_cnt++;
2524*c83a76b0SSuyog Pawar                 merge_64x64 = 0;
2525*c83a76b0SSuyog Pawar             }
2526*c83a76b0SSuyog Pawar         } while(blk_cnt != MAX_CTB_SIZE);
2527*c83a76b0SSuyog Pawar         /* if 64x64 merge is possible then check for 32x32 having same best modes */
2528*c83a76b0SSuyog Pawar         if(1 == merge_64x64)
2529*c83a76b0SSuyog Pawar         {
2530*c83a76b0SSuyog Pawar             WORD32 act_mode = au1_best_32x32_modes[0];
2531*c83a76b0SSuyog Pawar 
2532*c83a76b0SSuyog Pawar             ps_ed_blk_l2 = ps_ed_l2_ctb;
2533*c83a76b0SSuyog Pawar             best_mode = ps_ed_blk_l2->best_mode;
2534*c83a76b0SSuyog Pawar             merge_64x64 =
2535*c83a76b0SSuyog Pawar                 ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) +
2536*c83a76b0SSuyog Pawar                      (act_mode == au1_best_32x32_modes[2]) +
2537*c83a76b0SSuyog Pawar                      (act_mode == au1_best_32x32_modes[3]) ==
2538*c83a76b0SSuyog Pawar                  4);
2539*c83a76b0SSuyog Pawar             if(merge_64x64 == 1)
2540*c83a76b0SSuyog Pawar                 best_mode = au1_best_32x32_modes[0];
2541*c83a76b0SSuyog Pawar             else
2542*c83a76b0SSuyog Pawar                 best_mode = ps_ed_blk_l2->best_mode;
2543*c83a76b0SSuyog Pawar             /* All 32x32 costs are accumalated to 64x64 cost */
2544*c83a76b0SSuyog Pawar             ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
2545*c83a76b0SSuyog Pawar             for(i = 0; i < 4; i++)
2546*c83a76b0SSuyog Pawar             {
2547*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
2548*c83a76b0SSuyog Pawar                     ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
2549*c83a76b0SSuyog Pawar             }
2550*c83a76b0SSuyog Pawar 
2551*c83a76b0SSuyog Pawar             /* If all modes of 32x32 block is not same */
2552*c83a76b0SSuyog Pawar             if(0 == merge_64x64)
2553*c83a76b0SSuyog Pawar             {
2554*c83a76b0SSuyog Pawar                 /*Compute CHILD cost for 32x32 */
2555*c83a76b0SSuyog Pawar                 WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] +
2556*c83a76b0SSuyog Pawar                                           au4_best_32x32_cost[2] + au4_best_32x32_cost[3];
2557*c83a76b0SSuyog Pawar                 WORD32 cost = MAX_INTRA_COST_IPE;
2558*c83a76b0SSuyog Pawar 
2559*c83a76b0SSuyog Pawar                 WORD32 best_mode_temp = 0;
2560*c83a76b0SSuyog Pawar                 /*Compute 64x64 cost for each mode of 32x32*/
2561*c83a76b0SSuyog Pawar                 for(i = 0; i < 4; i++)
2562*c83a76b0SSuyog Pawar                 {
2563*c83a76b0SSuyog Pawar                     WORD32 mode = au1_best_32x32_modes[i];
2564*c83a76b0SSuyog Pawar                     if(mode < 2)
2565*c83a76b0SSuyog Pawar                         mode = 26;
2566*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u1_cu_size = 64;
2567*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */
2568*c83a76b0SSuyog Pawar                     ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */
2569*c83a76b0SSuyog Pawar 
2570*c83a76b0SSuyog Pawar                     ihevce_set_nbr_map(
2571*c83a76b0SSuyog Pawar                         ps_ctxt->pu1_ctb_nbr_map,
2572*c83a76b0SSuyog Pawar                         ps_ctxt->i4_nbr_map_strd,
2573*c83a76b0SSuyog Pawar                         (ps_cu_node->ps_parent->u2_x0 << 1),
2574*c83a76b0SSuyog Pawar                         (ps_cu_node->ps_parent->u2_y0 << 1),
2575*c83a76b0SSuyog Pawar                         (ps_cu_node->ps_parent->u1_cu_size >> 2),
2576*c83a76b0SSuyog Pawar                         0);
2577*c83a76b0SSuyog Pawar 
2578*c83a76b0SSuyog Pawar                     ihevce_mode_eval_filtering(
2579*c83a76b0SSuyog Pawar                         ps_cu_node->ps_parent,
2580*c83a76b0SSuyog Pawar                         ps_cu_node,
2581*c83a76b0SSuyog Pawar                         ps_ctxt,
2582*c83a76b0SSuyog Pawar                         ps_curr_src,
2583*c83a76b0SSuyog Pawar                         mode,
2584*c83a76b0SSuyog Pawar                         &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2585*c83a76b0SSuyog Pawar                         &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2586*c83a76b0SSuyog Pawar                         !step2_bypass,
2587*c83a76b0SSuyog Pawar                         0);
2588*c83a76b0SSuyog Pawar 
2589*c83a76b0SSuyog Pawar                     parent_cost = ps_cu_node->ps_parent->best_cost;
2590*c83a76b0SSuyog Pawar                     if(cost > parent_cost)
2591*c83a76b0SSuyog Pawar                     {
2592*c83a76b0SSuyog Pawar                         cost = parent_cost;
2593*c83a76b0SSuyog Pawar                         best_mode_temp = ps_cu_node->ps_parent->best_mode;
2594*c83a76b0SSuyog Pawar                     }
2595*c83a76b0SSuyog Pawar                 }
2596*c83a76b0SSuyog Pawar                 if(cost < child_cost_64x64)
2597*c83a76b0SSuyog Pawar                 {
2598*c83a76b0SSuyog Pawar                     merge_64x64 = 1;
2599*c83a76b0SSuyog Pawar                     best_mode = best_mode_temp;
2600*c83a76b0SSuyog Pawar 
2601*c83a76b0SSuyog Pawar                     /* Update 64x64 cost if CU 64x64 is chosen  */
2602*c83a76b0SSuyog Pawar                     ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost;
2603*c83a76b0SSuyog Pawar 
2604*c83a76b0SSuyog Pawar                     /* Accumalate the least cost for CU 64x64 */
2605*c83a76b0SSuyog Pawar                     i8_frame_acc_satd_cost = cost;
2606*c83a76b0SSuyog Pawar                     i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost;
2607*c83a76b0SSuyog Pawar 
2608*c83a76b0SSuyog Pawar                     /* satd and mpm bits accumalation of best cu size candiate */
2609*c83a76b0SSuyog Pawar                     i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd;
2610*c83a76b0SSuyog Pawar                 }
2611*c83a76b0SSuyog Pawar             }
2612*c83a76b0SSuyog Pawar         }
2613*c83a76b0SSuyog Pawar 
2614*c83a76b0SSuyog Pawar         if(merge_64x64)
2615*c83a76b0SSuyog Pawar         {
2616*c83a76b0SSuyog Pawar             WORD32 i, j;
2617*c83a76b0SSuyog Pawar             intra32_analyse_t *ps_intra32_analyse;
2618*c83a76b0SSuyog Pawar             intra16_analyse_t *ps_intra16_analyse;
2619*c83a76b0SSuyog Pawar             WORD32 row, col;
2620*c83a76b0SSuyog Pawar             WORD32 i4_q_scale_q3_mod;
2621*c83a76b0SSuyog Pawar             WORD8 i1_cu_possible_qp;
2622*c83a76b0SSuyog Pawar             WORD32 i4_act_factor;
2623*c83a76b0SSuyog Pawar             //ps_row_cu = ps_curr_cu;
2624*c83a76b0SSuyog Pawar             ps_ctb_out->u4_cu_split_flags = 0x0;
2625*c83a76b0SSuyog Pawar             ps_ed_blk_l1 = ps_ed_l1_ctb;
2626*c83a76b0SSuyog Pawar             ps_ed_blk_l2 = ps_ed_l2_ctb;
2627*c83a76b0SSuyog Pawar 
2628*c83a76b0SSuyog Pawar             ps_l0_ipe_out_ctb->u1_split_flag = 0;
2629*c83a76b0SSuyog Pawar 
2630*c83a76b0SSuyog Pawar             /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/
2631*c83a76b0SSuyog Pawar             for(i = 0; i < 4; i++)
2632*c83a76b0SSuyog Pawar             {
2633*c83a76b0SSuyog Pawar                 /* get the corresponding intra 32 analyse pointer  use (blk_cnt / 16) */
2634*c83a76b0SSuyog Pawar                 /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
2635*c83a76b0SSuyog Pawar                 ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i];
2636*c83a76b0SSuyog Pawar 
2637*c83a76b0SSuyog Pawar                 for(j = 0; j < 4; j++)
2638*c83a76b0SSuyog Pawar                 {
2639*c83a76b0SSuyog Pawar                     /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
2640*c83a76b0SSuyog Pawar                     /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
2641*c83a76b0SSuyog Pawar                     ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j];
2642*c83a76b0SSuyog Pawar                     ps_intra16_analyse->b1_merge_flag = 0;
2643*c83a76b0SSuyog Pawar                 }
2644*c83a76b0SSuyog Pawar             }
2645*c83a76b0SSuyog Pawar 
2646*c83a76b0SSuyog Pawar             /* CU size 64x64 and fill the final cu params */
2647*c83a76b0SSuyog Pawar             //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0];
2648*c83a76b0SSuyog Pawar             //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0];
2649*c83a76b0SSuyog Pawar             //ps_row_cu->u1_cu_size  = 64;
2650*c83a76b0SSuyog Pawar 
2651*c83a76b0SSuyog Pawar             /* Candidate mode Update */
2652*c83a76b0SSuyog Pawar             cand_mode_list[0] = best_mode;
2653*c83a76b0SSuyog Pawar             if(cand_mode_list[0] > 1)
2654*c83a76b0SSuyog Pawar             {
2655*c83a76b0SSuyog Pawar                 if(cand_mode_list[0] == 2)
2656*c83a76b0SSuyog Pawar                 {
2657*c83a76b0SSuyog Pawar                     cand_mode_list[1] = 34;
2658*c83a76b0SSuyog Pawar                     cand_mode_list[2] = 3;
2659*c83a76b0SSuyog Pawar                 }
2660*c83a76b0SSuyog Pawar                 else if(cand_mode_list[0] == 34)
2661*c83a76b0SSuyog Pawar                 {
2662*c83a76b0SSuyog Pawar                     cand_mode_list[1] = 2;
2663*c83a76b0SSuyog Pawar                     cand_mode_list[2] = 33;
2664*c83a76b0SSuyog Pawar                 }
2665*c83a76b0SSuyog Pawar                 else
2666*c83a76b0SSuyog Pawar                 {
2667*c83a76b0SSuyog Pawar                     cand_mode_list[1] = cand_mode_list[0] - 1;
2668*c83a76b0SSuyog Pawar                     cand_mode_list[2] = cand_mode_list[0] + 1;
2669*c83a76b0SSuyog Pawar                 }
2670*c83a76b0SSuyog Pawar                 //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode;
2671*c83a76b0SSuyog Pawar                 //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode;
2672*c83a76b0SSuyog Pawar             }
2673*c83a76b0SSuyog Pawar             else
2674*c83a76b0SSuyog Pawar             {
2675*c83a76b0SSuyog Pawar                 cand_mode_list[0] = 0;
2676*c83a76b0SSuyog Pawar                 cand_mode_list[1] = 1;
2677*c83a76b0SSuyog Pawar                 cand_mode_list[2] = 26;
2678*c83a76b0SSuyog Pawar                 //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode;
2679*c83a76b0SSuyog Pawar             }
2680*c83a76b0SSuyog Pawar 
2681*c83a76b0SSuyog Pawar             /* All 32x32 costs are accumalated to 64x64 cost */
2682*c83a76b0SSuyog Pawar             ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
2683*c83a76b0SSuyog Pawar             for(i = 0; i < 4; i++)
2684*c83a76b0SSuyog Pawar             {
2685*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
2686*c83a76b0SSuyog Pawar                     ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
2687*c83a76b0SSuyog Pawar             }
2688*c83a76b0SSuyog Pawar             /* by default 64x64 modes are set to default values DC and Planar */
2689*c83a76b0SSuyog Pawar             ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0];
2690*c83a76b0SSuyog Pawar             ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1];
2691*c83a76b0SSuyog Pawar             ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2];
2692*c83a76b0SSuyog Pawar             ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255;
2693*c83a76b0SSuyog Pawar 
2694*c83a76b0SSuyog Pawar             /* Update CTB mode map for the finalised CU */
2695*c83a76b0SSuyog Pawar             x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
2696*c83a76b0SSuyog Pawar             y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
2697*c83a76b0SSuyog Pawar             size = ps_cu_node->u1_cu_size >> 2;
2698*c83a76b0SSuyog Pawar 
2699*c83a76b0SSuyog Pawar             for(row = y; row < (y + size); row++)
2700*c83a76b0SSuyog Pawar             {
2701*c83a76b0SSuyog Pawar                 for(col = x; col < (x + size); col++)
2702*c83a76b0SSuyog Pawar                 {
2703*c83a76b0SSuyog Pawar                     ps_ctxt->au1_ctb_mode_map[row][col] = best_mode;
2704*c83a76b0SSuyog Pawar                 }
2705*c83a76b0SSuyog Pawar             }
2706*c83a76b0SSuyog Pawar 
2707*c83a76b0SSuyog Pawar             ihevce_set_nbr_map(
2708*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
2709*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
2710*c83a76b0SSuyog Pawar                 (ps_cu_node->u2_x0 << 1),
2711*c83a76b0SSuyog Pawar                 (ps_cu_node->u2_y0 << 1),
2712*c83a76b0SSuyog Pawar                 (ps_cu_node->u1_cu_size >> 2),
2713*c83a76b0SSuyog Pawar                 1);
2714*c83a76b0SSuyog Pawar 
2715*c83a76b0SSuyog Pawar             /*As 64*64 has won, pick L1 32x32 qp*/
2716*c83a76b0SSuyog Pawar             //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6));
2717*c83a76b0SSuyog Pawar             //ASSERT((blk_cnt>>6) == 0);
2718*c83a76b0SSuyog Pawar             ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
2719*c83a76b0SSuyog Pawar             i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2720*c83a76b0SSuyog Pawar                 ps_ctxt->i4_qscale,
2721*c83a76b0SSuyog Pawar                 ps_ed_ctb_l1->i4_32x32_satd[0][0],
2722*c83a76b0SSuyog Pawar                 ps_ctxt->ld_curr_frame_32x32_log_avg[0],
2723*c83a76b0SSuyog Pawar                 f_strength,
2724*c83a76b0SSuyog Pawar                 &i4_act_factor,
2725*c83a76b0SSuyog Pawar                 &i4_q_scale_q3_mod,
2726*c83a76b0SSuyog Pawar                 ps_ctxt->ps_rc_quant_ctxt);
2727*c83a76b0SSuyog Pawar 
2728*c83a76b0SSuyog Pawar             i8_frame_acc_satd_by_modqp_q10 =
2729*c83a76b0SSuyog Pawar                 (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2730*c83a76b0SSuyog Pawar                 i4_q_scale_q3_mod;
2731*c83a76b0SSuyog Pawar             /* Increment pointers */
2732*c83a76b0SSuyog Pawar             ps_ed_blk_l1 += 64;
2733*c83a76b0SSuyog Pawar             ps_ed_blk_l2 += 16;
2734*c83a76b0SSuyog Pawar             //ps_row_cu++;
2735*c83a76b0SSuyog Pawar         }
2736*c83a76b0SSuyog Pawar     }
2737*c83a76b0SSuyog Pawar 
2738*c83a76b0SSuyog Pawar     //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu);
2739*c83a76b0SSuyog Pawar 
2740*c83a76b0SSuyog Pawar     {
2741*c83a76b0SSuyog Pawar         WORD32 i4_i, i4_j;
2742*c83a76b0SSuyog Pawar         WORD32 dummy;
2743*c83a76b0SSuyog Pawar         WORD8 i1_cu_qp;
2744*c83a76b0SSuyog Pawar         (void)i1_cu_qp;
2745*c83a76b0SSuyog Pawar         /*MAM_VAR_L1*/
2746*c83a76b0SSuyog Pawar         for(i4_j = 0; i4_j < 2; i4_j++)
2747*c83a76b0SSuyog Pawar         {
2748*c83a76b0SSuyog Pawar             i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j];
2749*c83a76b0SSuyog Pawar             f_strength = ps_ctxt->f_strength;
2750*c83a76b0SSuyog Pawar 
2751*c83a76b0SSuyog Pawar             //i4_mod_factor_num = 4;
2752*c83a76b0SSuyog Pawar 
2753*c83a76b0SSuyog Pawar             ps_ed_blk_l1 = ps_ed_l1_ctb;
2754*c83a76b0SSuyog Pawar             ps_ed_blk_l2 = ps_ed_l2_ctb;
2755*c83a76b0SSuyog Pawar             //ps_row_cu = ps_curr_cu;
2756*c83a76b0SSuyog Pawar 
2757*c83a76b0SSuyog Pawar             /*Valid only for complete CTB */
2758*c83a76b0SSuyog Pawar             if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt))
2759*c83a76b0SSuyog Pawar             {
2760*c83a76b0SSuyog Pawar                 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
2761*c83a76b0SSuyog Pawar                 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2);
2762*c83a76b0SSuyog Pawar                 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2);
2763*c83a76b0SSuyog Pawar                 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2);
2764*c83a76b0SSuyog Pawar 
2765*c83a76b0SSuyog Pawar                 i1_cu_qp = ihevce_cu_level_qp_mod(
2766*c83a76b0SSuyog Pawar                     ps_ctxt->i4_qscale,
2767*c83a76b0SSuyog Pawar                     ps_ed_ctb_l1->i4_32x32_satd[0][0],
2768*c83a76b0SSuyog Pawar                     ps_ctxt->ld_curr_frame_32x32_log_avg[0],
2769*c83a76b0SSuyog Pawar                     f_strength,
2770*c83a76b0SSuyog Pawar                     &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j],
2771*c83a76b0SSuyog Pawar                     &dummy,
2772*c83a76b0SSuyog Pawar                     ps_ctxt->ps_rc_quant_ctxt);
2773*c83a76b0SSuyog Pawar 
2774*c83a76b0SSuyog Pawar                 i1_cu_qp = ihevce_cu_level_qp_mod(
2775*c83a76b0SSuyog Pawar                     ps_ctxt->i4_qscale,
2776*c83a76b0SSuyog Pawar                     ps_ed_ctb_l1->i4_32x32_satd[0][1],
2777*c83a76b0SSuyog Pawar                     ps_ctxt->ld_curr_frame_32x32_log_avg[1],
2778*c83a76b0SSuyog Pawar                     f_strength,
2779*c83a76b0SSuyog Pawar                     &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j],
2780*c83a76b0SSuyog Pawar                     &dummy,
2781*c83a76b0SSuyog Pawar                     ps_ctxt->ps_rc_quant_ctxt);
2782*c83a76b0SSuyog Pawar                 i1_cu_qp = ihevce_cu_level_qp_mod(
2783*c83a76b0SSuyog Pawar                     ps_ctxt->i4_qscale,
2784*c83a76b0SSuyog Pawar                     ps_ed_ctb_l1->i4_32x32_satd[0][2],
2785*c83a76b0SSuyog Pawar                     ps_ctxt->ld_curr_frame_32x32_log_avg[2],
2786*c83a76b0SSuyog Pawar                     f_strength,
2787*c83a76b0SSuyog Pawar                     &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j],
2788*c83a76b0SSuyog Pawar                     &dummy,
2789*c83a76b0SSuyog Pawar                     ps_ctxt->ps_rc_quant_ctxt);
2790*c83a76b0SSuyog Pawar 
2791*c83a76b0SSuyog Pawar                 i1_cu_qp = ihevce_cu_level_qp_mod(
2792*c83a76b0SSuyog Pawar                     ps_ctxt->i4_qscale,
2793*c83a76b0SSuyog Pawar                     ps_ed_ctb_l1->i4_32x32_satd[0][3],
2794*c83a76b0SSuyog Pawar                     2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0],
2795*c83a76b0SSuyog Pawar                     f_strength,
2796*c83a76b0SSuyog Pawar                     &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j],
2797*c83a76b0SSuyog Pawar                     &dummy,
2798*c83a76b0SSuyog Pawar                     ps_ctxt->ps_rc_quant_ctxt);
2799*c83a76b0SSuyog Pawar 
2800*c83a76b0SSuyog Pawar                 ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0);
2801*c83a76b0SSuyog Pawar             }
2802*c83a76b0SSuyog Pawar             else
2803*c83a76b0SSuyog Pawar             {
2804*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024;
2805*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024;
2806*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024;
2807*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024;
2808*c83a76b0SSuyog Pawar             }
2809*c83a76b0SSuyog Pawar 
2810*c83a76b0SSuyog Pawar             /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction
2811*c83a76b0SSuyog Pawar             for the usage by ME*/
2812*c83a76b0SSuyog Pawar 
2813*c83a76b0SSuyog Pawar             {
2814*c83a76b0SSuyog Pawar                 WORD32 pos_x_32, pos_y_32, pos;
2815*c83a76b0SSuyog Pawar                 //WORD32 i4_incomplete_ctb_val_8;
2816*c83a76b0SSuyog Pawar                 pos_x_32 = u1_curr_ctb_wdt / 16;
2817*c83a76b0SSuyog Pawar                 pos_y_32 = u1_curr_ctb_hgt / 16;
2818*c83a76b0SSuyog Pawar 
2819*c83a76b0SSuyog Pawar                 pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32;
2820*c83a76b0SSuyog Pawar 
2821*c83a76b0SSuyog Pawar                 for(i4_i = 0; i4_i < 4; i4_i++)
2822*c83a76b0SSuyog Pawar                 {
2823*c83a76b0SSuyog Pawar                     if(i4_i < pos)
2824*c83a76b0SSuyog Pawar                     {
2825*c83a76b0SSuyog Pawar                         ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2);
2826*c83a76b0SSuyog Pawar                         ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2);
2827*c83a76b0SSuyog Pawar                         ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2);
2828*c83a76b0SSuyog Pawar                         i1_cu_qp = ihevce_cu_level_qp_mod(
2829*c83a76b0SSuyog Pawar                             ps_ctxt->i4_qscale,
2830*c83a76b0SSuyog Pawar                             ps_ed_ctb_l1->i4_16x16_satd[i4_i][0],
2831*c83a76b0SSuyog Pawar                             ps_ctxt->ld_curr_frame_16x16_log_avg[0],
2832*c83a76b0SSuyog Pawar                             f_strength,
2833*c83a76b0SSuyog Pawar                             &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j],
2834*c83a76b0SSuyog Pawar                             &dummy,
2835*c83a76b0SSuyog Pawar                             ps_ctxt->ps_rc_quant_ctxt);
2836*c83a76b0SSuyog Pawar                         i1_cu_qp = ihevce_cu_level_qp_mod(
2837*c83a76b0SSuyog Pawar                             ps_ctxt->i4_qscale,
2838*c83a76b0SSuyog Pawar                             ps_ed_ctb_l1->i4_16x16_satd[i4_i][1],
2839*c83a76b0SSuyog Pawar                             ps_ctxt->ld_curr_frame_16x16_log_avg[1],
2840*c83a76b0SSuyog Pawar                             f_strength,
2841*c83a76b0SSuyog Pawar                             &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j],
2842*c83a76b0SSuyog Pawar                             &dummy,
2843*c83a76b0SSuyog Pawar                             ps_ctxt->ps_rc_quant_ctxt);
2844*c83a76b0SSuyog Pawar                         i1_cu_qp = ihevce_cu_level_qp_mod(
2845*c83a76b0SSuyog Pawar                             ps_ctxt->i4_qscale,
2846*c83a76b0SSuyog Pawar                             ps_ed_ctb_l1->i4_16x16_satd[i4_i][2],
2847*c83a76b0SSuyog Pawar                             ps_ctxt->ld_curr_frame_16x16_log_avg[2],
2848*c83a76b0SSuyog Pawar                             f_strength,
2849*c83a76b0SSuyog Pawar                             &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j],
2850*c83a76b0SSuyog Pawar                             &dummy,
2851*c83a76b0SSuyog Pawar                             ps_ctxt->ps_rc_quant_ctxt);
2852*c83a76b0SSuyog Pawar                     }
2853*c83a76b0SSuyog Pawar                     else
2854*c83a76b0SSuyog Pawar                     {
2855*c83a76b0SSuyog Pawar                         /*For incomplete CTB */
2856*c83a76b0SSuyog Pawar                         ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024;
2857*c83a76b0SSuyog Pawar                         ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024;
2858*c83a76b0SSuyog Pawar                         ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024;
2859*c83a76b0SSuyog Pawar                     }
2860*c83a76b0SSuyog Pawar                 }
2861*c83a76b0SSuyog Pawar             }
2862*c83a76b0SSuyog Pawar 
2863*c83a76b0SSuyog Pawar             /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction
2864*c83a76b0SSuyog Pawar             for the usage by ME*/
2865*c83a76b0SSuyog Pawar             {
2866*c83a76b0SSuyog Pawar                 WORD32 pos_x_16, pos_y_16, pos;
2867*c83a76b0SSuyog Pawar                 //WORD32 i4_incomplete_ctb_val_8;
2868*c83a76b0SSuyog Pawar                 pos_x_16 = u1_curr_ctb_wdt / 4;
2869*c83a76b0SSuyog Pawar                 pos_y_16 = u1_curr_ctb_hgt / 4;
2870*c83a76b0SSuyog Pawar 
2871*c83a76b0SSuyog Pawar                 pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16;
2872*c83a76b0SSuyog Pawar                 for(i4_i = 0; i4_i < 16; i4_i++)
2873*c83a76b0SSuyog Pawar                 {
2874*c83a76b0SSuyog Pawar                     if(i4_i < pos)
2875*c83a76b0SSuyog Pawar                     {
2876*c83a76b0SSuyog Pawar                         ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2);
2877*c83a76b0SSuyog Pawar                         ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2);
2878*c83a76b0SSuyog Pawar                         i1_cu_qp = ihevce_cu_level_qp_mod(
2879*c83a76b0SSuyog Pawar                             ps_ctxt->i4_qscale,
2880*c83a76b0SSuyog Pawar                             ps_ed_ctb_l1->i4_8x8_satd[i4_i][0],
2881*c83a76b0SSuyog Pawar                             ps_ctxt->ld_curr_frame_8x8_log_avg[0],
2882*c83a76b0SSuyog Pawar                             f_strength,
2883*c83a76b0SSuyog Pawar                             &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j],
2884*c83a76b0SSuyog Pawar                             &dummy,
2885*c83a76b0SSuyog Pawar                             ps_ctxt->ps_rc_quant_ctxt);
2886*c83a76b0SSuyog Pawar                         i1_cu_qp = ihevce_cu_level_qp_mod(
2887*c83a76b0SSuyog Pawar                             ps_ctxt->i4_qscale,
2888*c83a76b0SSuyog Pawar                             ps_ed_ctb_l1->i4_8x8_satd[i4_i][1],
2889*c83a76b0SSuyog Pawar                             ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2890*c83a76b0SSuyog Pawar                             f_strength,
2891*c83a76b0SSuyog Pawar                             &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j],
2892*c83a76b0SSuyog Pawar                             &dummy,
2893*c83a76b0SSuyog Pawar                             ps_ctxt->ps_rc_quant_ctxt);
2894*c83a76b0SSuyog Pawar                     }
2895*c83a76b0SSuyog Pawar                     else
2896*c83a76b0SSuyog Pawar                     {
2897*c83a76b0SSuyog Pawar                         /*For incomplete CTB */
2898*c83a76b0SSuyog Pawar                         ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024;
2899*c83a76b0SSuyog Pawar                         ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024;
2900*c83a76b0SSuyog Pawar                     }
2901*c83a76b0SSuyog Pawar                 }
2902*c83a76b0SSuyog Pawar             }
2903*c83a76b0SSuyog Pawar         }  //for loop
2904*c83a76b0SSuyog Pawar 
2905*c83a76b0SSuyog Pawar         /* Accumalate the cost of ctb to the total cost */
2906*c83a76b0SSuyog Pawar         ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost;
2907*c83a76b0SSuyog Pawar         ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10;
2908*c83a76b0SSuyog Pawar 
2909*c83a76b0SSuyog Pawar         ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost;
2910*c83a76b0SSuyog Pawar 
2911*c83a76b0SSuyog Pawar         /* satd and mpm bits accumalation of best cu size candiate for the ctb */
2912*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd;
2913*c83a76b0SSuyog Pawar         ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost;
2914*c83a76b0SSuyog Pawar 
2915*c83a76b0SSuyog Pawar         ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd;
2916*c83a76b0SSuyog Pawar     }
2917*c83a76b0SSuyog Pawar 
2918*c83a76b0SSuyog Pawar     {
2919*c83a76b0SSuyog Pawar         WORD32 ctr_8x8;
2920*c83a76b0SSuyog Pawar         for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++)
2921*c83a76b0SSuyog Pawar         {
2922*c83a76b0SSuyog Pawar             /*Accumalate activity factor for Intra and Inter*/
2923*c83a76b0SSuyog Pawar             if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] <
2924*c83a76b0SSuyog Pawar                ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8])
2925*c83a76b0SSuyog Pawar             {
2926*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
2927*c83a76b0SSuyog Pawar                     ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
2928*c83a76b0SSuyog Pawar             }
2929*c83a76b0SSuyog Pawar             else
2930*c83a76b0SSuyog Pawar             {
2931*c83a76b0SSuyog Pawar                 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
2932*c83a76b0SSuyog Pawar                     ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
2933*c83a76b0SSuyog Pawar             }
2934*c83a76b0SSuyog Pawar 
2935*c83a76b0SSuyog Pawar             /*Accumalate activity factor at frame level*/
2936*c83a76b0SSuyog Pawar             ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8];
2937*c83a76b0SSuyog Pawar         }
2938*c83a76b0SSuyog Pawar     }
2939*c83a76b0SSuyog Pawar     return;
2940*c83a76b0SSuyog Pawar }
2941*c83a76b0SSuyog Pawar 
ihevce_nxn_sad_computer(UWORD8 * pu1_inp,WORD32 i4_inp_stride,UWORD8 * pu1_ref,WORD32 i4_ref_stride,WORD32 trans_size)2942*c83a76b0SSuyog Pawar WORD32 ihevce_nxn_sad_computer(
2943*c83a76b0SSuyog Pawar     UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size)
2944*c83a76b0SSuyog Pawar {
2945*c83a76b0SSuyog Pawar     WORD32 wd, ht, i, j;
2946*c83a76b0SSuyog Pawar     WORD32 sad = 0;
2947*c83a76b0SSuyog Pawar 
2948*c83a76b0SSuyog Pawar     wd = trans_size;
2949*c83a76b0SSuyog Pawar     ht = trans_size;
2950*c83a76b0SSuyog Pawar 
2951*c83a76b0SSuyog Pawar     for(i = 0; i < ht; i++)
2952*c83a76b0SSuyog Pawar     {
2953*c83a76b0SSuyog Pawar         for(j = 0; j < wd; j++)
2954*c83a76b0SSuyog Pawar         {
2955*c83a76b0SSuyog Pawar             sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j])));
2956*c83a76b0SSuyog Pawar         }
2957*c83a76b0SSuyog Pawar         pu1_inp += i4_inp_stride;
2958*c83a76b0SSuyog Pawar         pu1_ref += i4_ref_stride;
2959*c83a76b0SSuyog Pawar     }
2960*c83a76b0SSuyog Pawar 
2961*c83a76b0SSuyog Pawar     return sad;
2962*c83a76b0SSuyog Pawar }
2963*c83a76b0SSuyog Pawar 
2964*c83a76b0SSuyog Pawar /*!
2965*c83a76b0SSuyog Pawar ******************************************************************************
2966*c83a76b0SSuyog Pawar * \if Function name : ihevce_mode_eval_filtering \endif
2967*c83a76b0SSuyog Pawar *
2968*c83a76b0SSuyog Pawar * \brief
2969*c83a76b0SSuyog Pawar *    Evaluates best 3 modes for the given CU size with probable modes from,
2970*c83a76b0SSuyog Pawar *    early decision structure, mpm candidates and dc, planar mode
2971*c83a76b0SSuyog Pawar *
2972*c83a76b0SSuyog Pawar * \param[in] ps_cu_node : pointer to MAX cu node info buffer
2973*c83a76b0SSuyog Pawar * \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer
2974*c83a76b0SSuyog Pawar * \param[in] ps_ctxt : pointer to IPE context struct
2975*c83a76b0SSuyog Pawar * \param[in] ps_curr_src : pointer to src pixels struct
2976*c83a76b0SSuyog Pawar * \param[in] best_amode : best angular mode from l1 layer or
2977*c83a76b0SSuyog Pawar                             from (MAX - 1) CU mode
2978*c83a76b0SSuyog Pawar * \param[in] best_costs_4x4  : pointer to 3 best cost buffer
2979*c83a76b0SSuyog Pawar * \param[in] best_modes_4x4  : pointer to 3 best mode buffer
2980*c83a76b0SSuyog Pawar * \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated
2981*c83a76b0SSuyog Pawar *                           if 1, (MAX CU) sugested is evaluated
2982*c83a76b0SSuyog Pawar * \param[in] tu_eq_cu     : indicates if tu size is same as cu or cu/2
2983*c83a76b0SSuyog Pawar *
2984*c83a76b0SSuyog Pawar * \return
2985*c83a76b0SSuyog Pawar *    None
2986*c83a76b0SSuyog Pawar *
2987*c83a76b0SSuyog Pawar * \author
2988*c83a76b0SSuyog Pawar *  Ittiam
2989*c83a76b0SSuyog Pawar *
2990*c83a76b0SSuyog Pawar *****************************************************************************
2991*c83a76b0SSuyog Pawar */
ihevce_mode_eval_filtering(ihevce_ipe_cu_tree_t * ps_cu_node,ihevce_ipe_cu_tree_t * ps_child_cu_node,ihevce_ipe_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src,WORD32 best_amode,WORD32 * best_costs_4x4,UWORD8 * best_modes_4x4,WORD32 step2_bypass,WORD32 tu_eq_cu)2992*c83a76b0SSuyog Pawar void ihevce_mode_eval_filtering(
2993*c83a76b0SSuyog Pawar     ihevce_ipe_cu_tree_t *ps_cu_node,
2994*c83a76b0SSuyog Pawar     ihevce_ipe_cu_tree_t *ps_child_cu_node,
2995*c83a76b0SSuyog Pawar     ihevce_ipe_ctxt_t *ps_ctxt,
2996*c83a76b0SSuyog Pawar     iv_enc_yuv_buf_t *ps_curr_src,
2997*c83a76b0SSuyog Pawar     WORD32 best_amode,
2998*c83a76b0SSuyog Pawar     WORD32 *best_costs_4x4,
2999*c83a76b0SSuyog Pawar     UWORD8 *best_modes_4x4,
3000*c83a76b0SSuyog Pawar     WORD32 step2_bypass,
3001*c83a76b0SSuyog Pawar     WORD32 tu_eq_cu)
3002*c83a76b0SSuyog Pawar {
3003*c83a76b0SSuyog Pawar     UWORD8 *pu1_origin, *pu1_orig;
3004*c83a76b0SSuyog Pawar     WORD32 src_strd = ps_curr_src->i4_y_strd;
3005*c83a76b0SSuyog Pawar     WORD32 nbr_flags;
3006*c83a76b0SSuyog Pawar     nbr_avail_flags_t s_nbr;
3007*c83a76b0SSuyog Pawar     WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1;
3008*c83a76b0SSuyog Pawar     WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2;
3009*c83a76b0SSuyog Pawar     WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2;
3010*c83a76b0SSuyog Pawar     UWORD8 mode;
3011*c83a76b0SSuyog Pawar 
3012*c83a76b0SSuyog Pawar     WORD32 cost_ang_mode = MAX_INTRA_COST_IPE;
3013*c83a76b0SSuyog Pawar     WORD32 filter_flag;
3014*c83a76b0SSuyog Pawar     WORD32 cost_amode_step2[7] = { 0 };
3015*c83a76b0SSuyog Pawar     /*WORD32 best_sad[5];  // NOTE_A01: Not getting consumed at present */
3016*c83a76b0SSuyog Pawar     WORD32 sad = 0;
3017*c83a76b0SSuyog Pawar     WORD32 cu_pos_x, cu_pos_y;
3018*c83a76b0SSuyog Pawar     WORD32 temp;
3019*c83a76b0SSuyog Pawar     WORD32 i = 0, j, k, i_end, z;
3020*c83a76b0SSuyog Pawar     //WORD32 row, col, size;
3021*c83a76b0SSuyog Pawar     UWORD8 *pu1_ref;
3022*c83a76b0SSuyog Pawar     WORD32 xA, yA, xB, yB;
3023*c83a76b0SSuyog Pawar     WORD32 top_intra_mode;
3024*c83a76b0SSuyog Pawar     WORD32 left_intra_mode;
3025*c83a76b0SSuyog Pawar     UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3026*c83a76b0SSuyog Pawar     UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3027*c83a76b0SSuyog Pawar 
3028*c83a76b0SSuyog Pawar     UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
3029*c83a76b0SSuyog Pawar     WORD32 count;
3030*c83a76b0SSuyog Pawar 
3031*c83a76b0SSuyog Pawar     pf_ipe_res_trans_had apf_resd_trns_had[4];
3032*c83a76b0SSuyog Pawar 
3033*c83a76b0SSuyog Pawar     WORD32 cand_mode_satd_list[3];
3034*c83a76b0SSuyog Pawar     ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
3035*c83a76b0SSuyog Pawar 
3036*c83a76b0SSuyog Pawar     ihevc_intra_pred_luma_ref_substitution_fptr =
3037*c83a76b0SSuyog Pawar         ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3038*c83a76b0SSuyog Pawar 
3039*c83a76b0SSuyog Pawar     apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit;
3040*c83a76b0SSuyog Pawar     apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit;
3041*c83a76b0SSuyog Pawar     apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit;
3042*c83a76b0SSuyog Pawar     apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit;
3043*c83a76b0SSuyog Pawar 
3044*c83a76b0SSuyog Pawar     /* initialize modes_to_eval as zero */
3045*c83a76b0SSuyog Pawar     memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES);
3046*c83a76b0SSuyog Pawar 
3047*c83a76b0SSuyog Pawar     /* Compute the Parent Cost */
3048*c83a76b0SSuyog Pawar 
3049*c83a76b0SSuyog Pawar     /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */
3050*c83a76b0SSuyog Pawar     pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) +
3051*c83a76b0SSuyog Pawar                (ps_cu_node->u2_x0 << 3);
3052*c83a76b0SSuyog Pawar 
3053*c83a76b0SSuyog Pawar     /* Get position of CU within CTB at 4x4 granularity */
3054*c83a76b0SSuyog Pawar     cu_pos_x = ps_cu_node->u2_x0 << 1;
3055*c83a76b0SSuyog Pawar     cu_pos_y = ps_cu_node->u2_y0 << 1;
3056*c83a76b0SSuyog Pawar 
3057*c83a76b0SSuyog Pawar     /* get the neighbour availability flags */
3058*c83a76b0SSuyog Pawar     ihevce_get_only_nbr_flag(
3059*c83a76b0SSuyog Pawar         &s_nbr,
3060*c83a76b0SSuyog Pawar         ps_ctxt->pu1_ctb_nbr_map,
3061*c83a76b0SSuyog Pawar         ps_ctxt->i4_nbr_map_strd,
3062*c83a76b0SSuyog Pawar         cu_pos_x,
3063*c83a76b0SSuyog Pawar         cu_pos_y,
3064*c83a76b0SSuyog Pawar         trans_size >> 2,
3065*c83a76b0SSuyog Pawar         trans_size >> 2);
3066*c83a76b0SSuyog Pawar 
3067*c83a76b0SSuyog Pawar     /* Traverse for all 4 child blocks in the parent block */
3068*c83a76b0SSuyog Pawar     xA = (ps_cu_node->u2_x0 << 3) >> 2;
3069*c83a76b0SSuyog Pawar     yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
3070*c83a76b0SSuyog Pawar     xB = xA + 1;
3071*c83a76b0SSuyog Pawar     yB = yA - 1;
3072*c83a76b0SSuyog Pawar     left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
3073*c83a76b0SSuyog Pawar     top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
3074*c83a76b0SSuyog Pawar     /* call the function which populates sad cost for all the modes */
3075*c83a76b0SSuyog Pawar 
3076*c83a76b0SSuyog Pawar     ihevce_intra_populate_mode_bits_cost_bracketing(
3077*c83a76b0SSuyog Pawar         top_intra_mode,
3078*c83a76b0SSuyog Pawar         left_intra_mode,
3079*c83a76b0SSuyog Pawar         s_nbr.u1_top_avail,
3080*c83a76b0SSuyog Pawar         s_nbr.u1_left_avail,
3081*c83a76b0SSuyog Pawar         ps_cu_node->u2_y0,
3082*c83a76b0SSuyog Pawar         &ps_ctxt->au2_mode_bits_satd_cost[0],
3083*c83a76b0SSuyog Pawar         &ps_ctxt->au2_mode_bits_satd[0],
3084*c83a76b0SSuyog Pawar         ps_ctxt->i4_ol_satd_lambda,
3085*c83a76b0SSuyog Pawar         cand_mode_satd_list);
3086*c83a76b0SSuyog Pawar 
3087*c83a76b0SSuyog Pawar     for(k = 0; k < num_tu_in_y; k++)
3088*c83a76b0SSuyog Pawar     {
3089*c83a76b0SSuyog Pawar         for(j = 0; j < num_tu_in_x; j++)
3090*c83a76b0SSuyog Pawar         {
3091*c83a76b0SSuyog Pawar             /* get the neighbour availability flags */
3092*c83a76b0SSuyog Pawar             nbr_flags = ihevce_get_nbr_intra(
3093*c83a76b0SSuyog Pawar                 &s_nbr,
3094*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
3095*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
3096*c83a76b0SSuyog Pawar                 cu_pos_x + ((j) * (trans_size >> 2)),
3097*c83a76b0SSuyog Pawar                 cu_pos_y + ((k) * (trans_size >> 2)),
3098*c83a76b0SSuyog Pawar                 trans_size >> 2);
3099*c83a76b0SSuyog Pawar 
3100*c83a76b0SSuyog Pawar             pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3101*c83a76b0SSuyog Pawar 
3102*c83a76b0SSuyog Pawar             /* Create reference samples array */
3103*c83a76b0SSuyog Pawar             ihevc_intra_pred_luma_ref_substitution_fptr(
3104*c83a76b0SSuyog Pawar                 pu1_origin - src_strd - 1,
3105*c83a76b0SSuyog Pawar                 pu1_origin - src_strd,
3106*c83a76b0SSuyog Pawar                 pu1_origin - 1,
3107*c83a76b0SSuyog Pawar                 src_strd,
3108*c83a76b0SSuyog Pawar                 trans_size,
3109*c83a76b0SSuyog Pawar                 nbr_flags,
3110*c83a76b0SSuyog Pawar                 pu1_ref_orig,
3111*c83a76b0SSuyog Pawar                 0);
3112*c83a76b0SSuyog Pawar 
3113*c83a76b0SSuyog Pawar             /* Perform reference samples filtering */
3114*c83a76b0SSuyog Pawar             ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt);
3115*c83a76b0SSuyog Pawar 
3116*c83a76b0SSuyog Pawar             ihevce_set_nbr_map(
3117*c83a76b0SSuyog Pawar                 ps_ctxt->pu1_ctb_nbr_map,
3118*c83a76b0SSuyog Pawar                 ps_ctxt->i4_nbr_map_strd,
3119*c83a76b0SSuyog Pawar                 cu_pos_x + ((j) * (trans_size >> 2)),
3120*c83a76b0SSuyog Pawar                 cu_pos_y + ((k) * (trans_size >> 2)),
3121*c83a76b0SSuyog Pawar                 (trans_size >> 2),
3122*c83a76b0SSuyog Pawar                 1);
3123*c83a76b0SSuyog Pawar 
3124*c83a76b0SSuyog Pawar             pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3125*c83a76b0SSuyog Pawar             pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3126*c83a76b0SSuyog Pawar         }
3127*c83a76b0SSuyog Pawar     }
3128*c83a76b0SSuyog Pawar 
3129*c83a76b0SSuyog Pawar     /* Revaluation for angular mode */
3130*c83a76b0SSuyog Pawar     //if(ps_ed_blk->ang_attr.mode_present == 1)
3131*c83a76b0SSuyog Pawar     //if(((best_amode & 0x1) != 1))
3132*c83a76b0SSuyog Pawar 
3133*c83a76b0SSuyog Pawar     {
3134*c83a76b0SSuyog Pawar         WORD32 u1_trans_idx = trans_size >> 3;
3135*c83a76b0SSuyog Pawar         if(trans_size == 32)
3136*c83a76b0SSuyog Pawar             u1_trans_idx = 3;
3137*c83a76b0SSuyog Pawar         //best_amode = ps_ed_blk->ang_attr.best_mode;
3138*c83a76b0SSuyog Pawar 
3139*c83a76b0SSuyog Pawar         i = 0;
3140*c83a76b0SSuyog Pawar         if(!step2_bypass)
3141*c83a76b0SSuyog Pawar         {
3142*c83a76b0SSuyog Pawar             /* Around best level 4 angular mode, search for best level 2 mode */
3143*c83a76b0SSuyog Pawar             ASSERT((best_amode >= 2) && (best_amode <= 34));
3144*c83a76b0SSuyog Pawar 
3145*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
3146*c83a76b0SSuyog Pawar             {
3147*c83a76b0SSuyog Pawar                 if(best_amode >= 4)
3148*c83a76b0SSuyog Pawar                     ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2;
3149*c83a76b0SSuyog Pawar             }
3150*c83a76b0SSuyog Pawar 
3151*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode;
3152*c83a76b0SSuyog Pawar 
3153*c83a76b0SSuyog Pawar             if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
3154*c83a76b0SSuyog Pawar             {
3155*c83a76b0SSuyog Pawar                 if(best_amode <= 32)
3156*c83a76b0SSuyog Pawar                     ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2;
3157*c83a76b0SSuyog Pawar             }
3158*c83a76b0SSuyog Pawar         }
3159*c83a76b0SSuyog Pawar         else
3160*c83a76b0SSuyog Pawar         {
3161*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode;
3162*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode;
3163*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode;
3164*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode;
3165*c83a76b0SSuyog Pawar         }
3166*c83a76b0SSuyog Pawar 
3167*c83a76b0SSuyog Pawar         /* Add the left and top MPM modes for computation*/
3168*c83a76b0SSuyog Pawar 
3169*c83a76b0SSuyog Pawar         ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0];
3170*c83a76b0SSuyog Pawar         ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1];
3171*c83a76b0SSuyog Pawar 
3172*c83a76b0SSuyog Pawar         i_end = i;
3173*c83a76b0SSuyog Pawar         count = 0;
3174*c83a76b0SSuyog Pawar 
3175*c83a76b0SSuyog Pawar         /*Remove duplicate modes from modes_to_eval_temp[] */
3176*c83a76b0SSuyog Pawar         for(j = 0; j < i_end; j++)
3177*c83a76b0SSuyog Pawar         {
3178*c83a76b0SSuyog Pawar             for(k = 0; k < count; k++)
3179*c83a76b0SSuyog Pawar             {
3180*c83a76b0SSuyog Pawar                 if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k])
3181*c83a76b0SSuyog Pawar                     break;
3182*c83a76b0SSuyog Pawar             }
3183*c83a76b0SSuyog Pawar             if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1))
3184*c83a76b0SSuyog Pawar             {
3185*c83a76b0SSuyog Pawar                 ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j];
3186*c83a76b0SSuyog Pawar                 count++;
3187*c83a76b0SSuyog Pawar             }
3188*c83a76b0SSuyog Pawar         }
3189*c83a76b0SSuyog Pawar         i_end = count;
3190*c83a76b0SSuyog Pawar         if(count == 0)
3191*c83a76b0SSuyog Pawar         {
3192*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval[0] = 26;
3193*c83a76b0SSuyog Pawar             i_end = 1;
3194*c83a76b0SSuyog Pawar         }
3195*c83a76b0SSuyog Pawar 
3196*c83a76b0SSuyog Pawar         for(i = 0; i < i_end; i++)
3197*c83a76b0SSuyog Pawar         {
3198*c83a76b0SSuyog Pawar             pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3199*c83a76b0SSuyog Pawar             pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3200*c83a76b0SSuyog Pawar 
3201*c83a76b0SSuyog Pawar             mode = ps_ctxt->au1_modes_to_eval[i];
3202*c83a76b0SSuyog Pawar             ASSERT((mode >= 2) && (mode <= 34));
3203*c83a76b0SSuyog Pawar             cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
3204*c83a76b0SSuyog Pawar             filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
3205*c83a76b0SSuyog Pawar 
3206*c83a76b0SSuyog Pawar             for(k = 0; k < num_tu_in_y; k++)
3207*c83a76b0SSuyog Pawar             {
3208*c83a76b0SSuyog Pawar                 for(j = 0; j < num_tu_in_x; j++)
3209*c83a76b0SSuyog Pawar                 {
3210*c83a76b0SSuyog Pawar                     pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3211*c83a76b0SSuyog Pawar 
3212*c83a76b0SSuyog Pawar                     if(0 == filter_flag)
3213*c83a76b0SSuyog Pawar                         pu1_ref = pu1_ref_orig;
3214*c83a76b0SSuyog Pawar                     else
3215*c83a76b0SSuyog Pawar                         pu1_ref = pu1_ref_filt;
3216*c83a76b0SSuyog Pawar 
3217*c83a76b0SSuyog Pawar                     g_apf_lum_ip[g_i4_ip_funcs[mode]](
3218*c83a76b0SSuyog Pawar                         pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
3219*c83a76b0SSuyog Pawar 
3220*c83a76b0SSuyog Pawar                     if(ps_ctxt->u1_use_satd)
3221*c83a76b0SSuyog Pawar                     {
3222*c83a76b0SSuyog Pawar                         sad = apf_resd_trns_had[u1_trans_idx](
3223*c83a76b0SSuyog Pawar                             pu1_origin,
3224*c83a76b0SSuyog Pawar                             ps_curr_src->i4_y_strd,
3225*c83a76b0SSuyog Pawar                             &ps_ctxt->au1_pred_samples[0],
3226*c83a76b0SSuyog Pawar                             trans_size,
3227*c83a76b0SSuyog Pawar                             NULL,
3228*c83a76b0SSuyog Pawar                             0
3229*c83a76b0SSuyog Pawar 
3230*c83a76b0SSuyog Pawar                         );
3231*c83a76b0SSuyog Pawar                     }
3232*c83a76b0SSuyog Pawar                     else
3233*c83a76b0SSuyog Pawar                     {
3234*c83a76b0SSuyog Pawar                         sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer(
3235*c83a76b0SSuyog Pawar                             pu1_origin,
3236*c83a76b0SSuyog Pawar                             ps_curr_src->i4_y_strd,
3237*c83a76b0SSuyog Pawar                             &ps_ctxt->au1_pred_samples[0],
3238*c83a76b0SSuyog Pawar                             trans_size,
3239*c83a76b0SSuyog Pawar                             trans_size);
3240*c83a76b0SSuyog Pawar                     }
3241*c83a76b0SSuyog Pawar 
3242*c83a76b0SSuyog Pawar                     cost_amode_step2[i] += sad;
3243*c83a76b0SSuyog Pawar 
3244*c83a76b0SSuyog Pawar                     pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3245*c83a76b0SSuyog Pawar                     pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3246*c83a76b0SSuyog Pawar                 }
3247*c83a76b0SSuyog Pawar             }
3248*c83a76b0SSuyog Pawar         }
3249*c83a76b0SSuyog Pawar         best_amode = ps_ctxt->au1_modes_to_eval[0];
3250*c83a76b0SSuyog Pawar         /*Init cost indx */
3251*c83a76b0SSuyog Pawar         cost_ang_mode = MAX_INTRA_COST_IPE;  //cost_amode_step2[0];
3252*c83a76b0SSuyog Pawar         for(z = 0; z < i_end; z++)
3253*c83a76b0SSuyog Pawar         {
3254*c83a76b0SSuyog Pawar             /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
3255*c83a76b0SSuyog Pawar             if(cost_ang_mode >= cost_amode_step2[z])
3256*c83a76b0SSuyog Pawar             {
3257*c83a76b0SSuyog Pawar                 if(cost_ang_mode == cost_amode_step2[z])
3258*c83a76b0SSuyog Pawar                 {
3259*c83a76b0SSuyog Pawar                     if(best_amode > ps_ctxt->au1_modes_to_eval[z])
3260*c83a76b0SSuyog Pawar                         best_amode = ps_ctxt->au1_modes_to_eval[z];
3261*c83a76b0SSuyog Pawar                 }
3262*c83a76b0SSuyog Pawar                 else
3263*c83a76b0SSuyog Pawar                 {
3264*c83a76b0SSuyog Pawar                     best_amode = ps_ctxt->au1_modes_to_eval[z];
3265*c83a76b0SSuyog Pawar                 }
3266*c83a76b0SSuyog Pawar                 cost_ang_mode = cost_amode_step2[z];
3267*c83a76b0SSuyog Pawar             }
3268*c83a76b0SSuyog Pawar         }
3269*c83a76b0SSuyog Pawar 
3270*c83a76b0SSuyog Pawar         /*Modify mode bits for the angular modes */
3271*c83a76b0SSuyog Pawar     }
3272*c83a76b0SSuyog Pawar 
3273*c83a76b0SSuyog Pawar     {
3274*c83a76b0SSuyog Pawar         /* Step - I modification */
3275*c83a76b0SSuyog Pawar         ASSERT((best_amode >= 2) && (best_amode <= 34));
3276*c83a76b0SSuyog Pawar         i_end = 0;
3277*c83a76b0SSuyog Pawar         z = 0;
3278*c83a76b0SSuyog Pawar 
3279*c83a76b0SSuyog Pawar         /* Around best level 3 angular mode, search for best level 1 mode */
3280*c83a76b0SSuyog Pawar         ps_ctxt->au1_modes_to_eval[i_end++] = 0;
3281*c83a76b0SSuyog Pawar         ps_ctxt->au1_modes_to_eval[i_end++] = 1;
3282*c83a76b0SSuyog Pawar 
3283*c83a76b0SSuyog Pawar         if(best_amode != 2)
3284*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1;
3285*c83a76b0SSuyog Pawar 
3286*c83a76b0SSuyog Pawar         ps_ctxt->au1_modes_to_eval[i_end++] = best_amode;
3287*c83a76b0SSuyog Pawar 
3288*c83a76b0SSuyog Pawar         if(best_amode != 34)
3289*c83a76b0SSuyog Pawar             ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1;
3290*c83a76b0SSuyog Pawar 
3291*c83a76b0SSuyog Pawar         /* Inserting step_2's best mode at last to avoid
3292*c83a76b0SSuyog Pawar         recalculation of it's SATD cost */
3293*c83a76b0SSuyog Pawar 
3294*c83a76b0SSuyog Pawar         //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD
3295*c83a76b0SSuyog Pawar         //cost_amode_step2[i_end] = cost_ang_mode;
3296*c83a76b0SSuyog Pawar 
3297*c83a76b0SSuyog Pawar         /*best_sad[i_end] = cost_ang_mode
3298*c83a76b0SSuyog Pawar                 - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */
3299*c83a76b0SSuyog Pawar 
3300*c83a76b0SSuyog Pawar         cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */
3301*c83a76b0SSuyog Pawar 
3302*c83a76b0SSuyog Pawar         for(i = 0; i < i_end; i++)
3303*c83a76b0SSuyog Pawar         {
3304*c83a76b0SSuyog Pawar             WORD32 u1_trans_idx = trans_size >> 3;
3305*c83a76b0SSuyog Pawar             if(trans_size == 32)
3306*c83a76b0SSuyog Pawar                 u1_trans_idx = 3;
3307*c83a76b0SSuyog Pawar             pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3308*c83a76b0SSuyog Pawar             pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3309*c83a76b0SSuyog Pawar 
3310*c83a76b0SSuyog Pawar             /*best_sad[i] = 0; //See NOTE_A01 above */
3311*c83a76b0SSuyog Pawar             mode = ps_ctxt->au1_modes_to_eval[i];
3312*c83a76b0SSuyog Pawar             cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
3313*c83a76b0SSuyog Pawar             filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
3314*c83a76b0SSuyog Pawar 
3315*c83a76b0SSuyog Pawar             for(k = 0; k < num_tu_in_y; k++)
3316*c83a76b0SSuyog Pawar             {
3317*c83a76b0SSuyog Pawar                 for(j = 0; j < num_tu_in_x; j++)
3318*c83a76b0SSuyog Pawar                 {
3319*c83a76b0SSuyog Pawar                     pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3320*c83a76b0SSuyog Pawar 
3321*c83a76b0SSuyog Pawar                     if(0 == filter_flag)
3322*c83a76b0SSuyog Pawar                         pu1_ref = pu1_ref_orig;
3323*c83a76b0SSuyog Pawar                     else
3324*c83a76b0SSuyog Pawar                         pu1_ref = pu1_ref_filt;
3325*c83a76b0SSuyog Pawar 
3326*c83a76b0SSuyog Pawar                     g_apf_lum_ip[g_i4_ip_funcs[mode]](
3327*c83a76b0SSuyog Pawar                         pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
3328*c83a76b0SSuyog Pawar 
3329*c83a76b0SSuyog Pawar                     //if(trans_size != 4)
3330*c83a76b0SSuyog Pawar                     {
3331*c83a76b0SSuyog Pawar                         sad = apf_resd_trns_had[u1_trans_idx](
3332*c83a76b0SSuyog Pawar                             pu1_origin,
3333*c83a76b0SSuyog Pawar                             ps_curr_src->i4_y_strd,
3334*c83a76b0SSuyog Pawar                             &ps_ctxt->au1_pred_samples[0],
3335*c83a76b0SSuyog Pawar                             trans_size,
3336*c83a76b0SSuyog Pawar                             NULL,
3337*c83a76b0SSuyog Pawar                             0);
3338*c83a76b0SSuyog Pawar                     }
3339*c83a76b0SSuyog Pawar 
3340*c83a76b0SSuyog Pawar                     /*accumualting SATD though name says it is sad*/
3341*c83a76b0SSuyog Pawar                     cost_amode_step2[i] += sad;
3342*c83a76b0SSuyog Pawar                     /*best_sad[i] +=sad; //See NOTE_A01 above */
3343*c83a76b0SSuyog Pawar                     pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3344*c83a76b0SSuyog Pawar                     pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3345*c83a76b0SSuyog Pawar                 }
3346*c83a76b0SSuyog Pawar             }
3347*c83a76b0SSuyog Pawar         }
3348*c83a76b0SSuyog Pawar         /* Updating i_end for the step_2's inserted mode*/
3349*c83a76b0SSuyog Pawar         //        i_end++;
3350*c83a76b0SSuyog Pawar 
3351*c83a76b0SSuyog Pawar         /* Arrange the reference array in ascending order */
3352*c83a76b0SSuyog Pawar 
3353*c83a76b0SSuyog Pawar         for(i = 0; i < (i_end - 1); i++)
3354*c83a76b0SSuyog Pawar         {
3355*c83a76b0SSuyog Pawar             for(j = i + 1; j < i_end; j++)
3356*c83a76b0SSuyog Pawar             {
3357*c83a76b0SSuyog Pawar                 if(cost_amode_step2[i] > cost_amode_step2[j])
3358*c83a76b0SSuyog Pawar                 {
3359*c83a76b0SSuyog Pawar                     temp = cost_amode_step2[i];
3360*c83a76b0SSuyog Pawar                     cost_amode_step2[i] = cost_amode_step2[j];
3361*c83a76b0SSuyog Pawar                     cost_amode_step2[j] = temp;
3362*c83a76b0SSuyog Pawar 
3363*c83a76b0SSuyog Pawar                     temp = modes_4x4[i];
3364*c83a76b0SSuyog Pawar                     modes_4x4[i] = modes_4x4[j];
3365*c83a76b0SSuyog Pawar                     modes_4x4[j] = temp;
3366*c83a76b0SSuyog Pawar                 }
3367*c83a76b0SSuyog Pawar             }
3368*c83a76b0SSuyog Pawar         }
3369*c83a76b0SSuyog Pawar 
3370*c83a76b0SSuyog Pawar         /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
3371*c83a76b0SSuyog Pawar         best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
3372*c83a76b0SSuyog Pawar         cost_ang_mode = cost_amode_step2[0];
3373*c83a76b0SSuyog Pawar         ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode];
3374*c83a76b0SSuyog Pawar         ps_cu_node->best_cost = cost_amode_step2[0];
3375*c83a76b0SSuyog Pawar         ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
3376*c83a76b0SSuyog Pawar         ps_cu_node->best_satd =
3377*c83a76b0SSuyog Pawar             ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode];
3378*c83a76b0SSuyog Pawar 
3379*c83a76b0SSuyog Pawar         /*Accumalate best mode bits cost for RC*/
3380*c83a76b0SSuyog Pawar         ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode];
3381*c83a76b0SSuyog Pawar 
3382*c83a76b0SSuyog Pawar         /* Store the best three candidates */
3383*c83a76b0SSuyog Pawar         for(i = 0; i < 3; i++)
3384*c83a76b0SSuyog Pawar         {
3385*c83a76b0SSuyog Pawar             best_costs_4x4[i] = cost_amode_step2[i];
3386*c83a76b0SSuyog Pawar             best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]];
3387*c83a76b0SSuyog Pawar         }
3388*c83a76b0SSuyog Pawar     }
3389*c83a76b0SSuyog Pawar 
3390*c83a76b0SSuyog Pawar     return;
3391*c83a76b0SSuyog Pawar }
3392