xref: /aosp_15_r20/external/libhevc/decoder/ihevcd_get_mv.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar  *******************************************************************************
20*c83a76b0SSuyog Pawar  * @file
21*c83a76b0SSuyog Pawar  *  ihevcd_get_mv.c
22*c83a76b0SSuyog Pawar  *
23*c83a76b0SSuyog Pawar  * @brief
24*c83a76b0SSuyog Pawar  *  Contains functions to compute motion vectors
25*c83a76b0SSuyog Pawar  *
26*c83a76b0SSuyog Pawar  * @author
27*c83a76b0SSuyog Pawar  *  Ittiam
28*c83a76b0SSuyog Pawar  *
29*c83a76b0SSuyog Pawar  * @par List of Functions:
30*c83a76b0SSuyog Pawar  * - ihevcd_get_mv_ctb()
31*c83a76b0SSuyog Pawar  *
32*c83a76b0SSuyog Pawar  * @remarks
33*c83a76b0SSuyog Pawar  *  None
34*c83a76b0SSuyog Pawar  *
35*c83a76b0SSuyog Pawar  *******************************************************************************
36*c83a76b0SSuyog Pawar  */
37*c83a76b0SSuyog Pawar /*****************************************************************************/
38*c83a76b0SSuyog Pawar /* File Includes                                                             */
39*c83a76b0SSuyog Pawar /*****************************************************************************/
40*c83a76b0SSuyog Pawar 
41*c83a76b0SSuyog Pawar #include <stdio.h>
42*c83a76b0SSuyog Pawar #include <stddef.h>
43*c83a76b0SSuyog Pawar #include <stdlib.h>
44*c83a76b0SSuyog Pawar #include <string.h>
45*c83a76b0SSuyog Pawar 
46*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
47*c83a76b0SSuyog Pawar #include "iv.h"
48*c83a76b0SSuyog Pawar #include "ivd.h"
49*c83a76b0SSuyog Pawar #include "ihevcd_cxa.h"
50*c83a76b0SSuyog Pawar #include "ithread.h"
51*c83a76b0SSuyog Pawar 
52*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
53*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
54*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
55*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
56*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
57*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
58*c83a76b0SSuyog Pawar #include "ihevc_disp_mgr.h"
59*c83a76b0SSuyog Pawar #include "ihevc_buf_mgr.h"
60*c83a76b0SSuyog Pawar #include "ihevc_dpb_mgr.h"
61*c83a76b0SSuyog Pawar 
62*c83a76b0SSuyog Pawar #include "ihevcd_defs.h"
63*c83a76b0SSuyog Pawar #include "ihevcd_function_selector.h"
64*c83a76b0SSuyog Pawar #include "ihevcd_structs.h"
65*c83a76b0SSuyog Pawar #include "ihevcd_error.h"
66*c83a76b0SSuyog Pawar #include "ihevcd_nal.h"
67*c83a76b0SSuyog Pawar #include "ihevcd_bitstream.h"
68*c83a76b0SSuyog Pawar #include "ihevcd_fmt_conv.h"
69*c83a76b0SSuyog Pawar #include "ihevcd_job_queue.h"
70*c83a76b0SSuyog Pawar #include "ihevcd_debug.h"
71*c83a76b0SSuyog Pawar #include "ihevcd_mv_merge.h"
72*c83a76b0SSuyog Pawar #include "ihevcd_mv_pred.h"
73*c83a76b0SSuyog Pawar #include "ihevcd_profile.h"
74*c83a76b0SSuyog Pawar /**
75*c83a76b0SSuyog Pawar  *******************************************************************************
76*c83a76b0SSuyog Pawar  *
77*c83a76b0SSuyog Pawar  * @brief
78*c83a76b0SSuyog Pawar  * This function computes and stores MV's of all the PU's in CTB
79*c83a76b0SSuyog Pawar  *
80*c83a76b0SSuyog Pawar  * @par Description:
81*c83a76b0SSuyog Pawar  * MV's of a PU will be stored in PU structure. MV computation can be merge or mv pred
82*c83a76b0SSuyog Pawar  *
83*c83a76b0SSuyog Pawar  * @param[in] ps_proc
84*c83a76b0SSuyog Pawar  * processor context
85*c83a76b0SSuyog Pawar  *
86*c83a76b0SSuyog Pawar  * @param[in] pi4_ctb_top_pu_idx
87*c83a76b0SSuyog Pawar  * Pointer to ctb top PU indices
88*c83a76b0SSuyog Pawar  *
89*c83a76b0SSuyog Pawar  * @param[in] pi4_ctb_left_pu_idx
90*c83a76b0SSuyog Pawar  * Pointer to ctb left PU indices
91*c83a76b0SSuyog Pawar  *
92*c83a76b0SSuyog Pawar  * @param[in] pi4_ctb_top_left_pu_idx
93*c83a76b0SSuyog Pawar  * Pointer to ctb top left PU indices
94*c83a76b0SSuyog Pawar  *
95*c83a76b0SSuyog Pawar  * @returns
96*c83a76b0SSuyog Pawar  * number of PU's per ctb
97*c83a76b0SSuyog Pawar  *
98*c83a76b0SSuyog Pawar  * @remarks
99*c83a76b0SSuyog Pawar  *
100*c83a76b0SSuyog Pawar  *
101*c83a76b0SSuyog Pawar  *******************************************************************************
102*c83a76b0SSuyog Pawar  */
103*c83a76b0SSuyog Pawar 
ihevcd_get_mv_ctb(mv_ctxt_t * ps_mv_ctxt,UWORD32 * pu4_ctb_top_pu_idx,UWORD32 * pu4_ctb_left_pu_idx,UWORD32 * pu4_ctb_top_left_pu_idx)104*c83a76b0SSuyog Pawar WORD32 ihevcd_get_mv_ctb(mv_ctxt_t *ps_mv_ctxt,
105*c83a76b0SSuyog Pawar                          UWORD32 *pu4_ctb_top_pu_idx,
106*c83a76b0SSuyog Pawar                          UWORD32 *pu4_ctb_left_pu_idx,
107*c83a76b0SSuyog Pawar                          UWORD32 *pu4_ctb_top_left_pu_idx)
108*c83a76b0SSuyog Pawar {
109*c83a76b0SSuyog Pawar 
110*c83a76b0SSuyog Pawar     WORD32 i;
111*c83a76b0SSuyog Pawar     sps_t *ps_sps;
112*c83a76b0SSuyog Pawar     pps_t *ps_pps;
113*c83a76b0SSuyog Pawar     pu_t *ps_pu;
114*c83a76b0SSuyog Pawar     tile_t *ps_tile;
115*c83a76b0SSuyog Pawar     UWORD8 *pu1_pic_pu_map_ctb;
116*c83a76b0SSuyog Pawar     WORD32 num_minpu_in_ctb;
117*c83a76b0SSuyog Pawar     WORD32 ctb_start_pu_idx;
118*c83a76b0SSuyog Pawar     UWORD32 *pu4_top_pu_idx, *pu4_left_pu_idx, *pu4_top_left_pu_idx;
119*c83a76b0SSuyog Pawar     WORD32 pu_x_in_4x4, pu_y_in_4x4;
120*c83a76b0SSuyog Pawar     WORD32 pu_x_in_4x4_single_mcl, pu_y_in_4x4_single_mcl;
121*c83a76b0SSuyog Pawar     pu_mv_t s_pred_mv;
122*c83a76b0SSuyog Pawar     WORD32 ctb_size, ctb_size_in_min_pu;
123*c83a76b0SSuyog Pawar     WORD32 num_pu_per_ctb, pu_wd, pu_ht, pu_cnt;
124*c83a76b0SSuyog Pawar     WORD32  pu_wd_single_mcl, pu_ht_single_mcl;
125*c83a76b0SSuyog Pawar     UWORD32 au4_nbr_avail[MAX_CTB_SIZE / MIN_PU_SIZE
126*c83a76b0SSuyog Pawar                     + 2 /* Top nbr + bot nbr */];
127*c83a76b0SSuyog Pawar     UWORD32 *pu4_nbr_pu_idx/* (Left + ctb_size + right ) * (top + ctb_size + bottom) */;
128*c83a76b0SSuyog Pawar     WORD32 top_avail_bits;
129*c83a76b0SSuyog Pawar     UWORD8 u1_lb_avail, u1_l_avail, u1_t_avail, u1_tr_avail, u1_tl_avail;
130*c83a76b0SSuyog Pawar     WORD32 nbr_pu_idx_strd;
131*c83a76b0SSuyog Pawar     WORD32 cb_size;
132*c83a76b0SSuyog Pawar     WORD32 single_mcl_flag;
133*c83a76b0SSuyog Pawar 
134*c83a76b0SSuyog Pawar     PROFILE_DISABLE_MV_PREDICTION();
135*c83a76b0SSuyog Pawar     ps_sps = ps_mv_ctxt->ps_sps;
136*c83a76b0SSuyog Pawar     ps_pps = ps_mv_ctxt->ps_pps;
137*c83a76b0SSuyog Pawar     ps_pu = ps_mv_ctxt->ps_pu;
138*c83a76b0SSuyog Pawar     ps_tile = ps_mv_ctxt->ps_tile;
139*c83a76b0SSuyog Pawar 
140*c83a76b0SSuyog Pawar     pu4_nbr_pu_idx = ps_mv_ctxt->pu4_pic_pu_idx_map;
141*c83a76b0SSuyog Pawar 
142*c83a76b0SSuyog Pawar     ctb_size = (1 << ps_sps->i1_log2_ctb_size);
143*c83a76b0SSuyog Pawar 
144*c83a76b0SSuyog Pawar     ctb_size_in_min_pu = (ctb_size / MIN_PU_SIZE);
145*c83a76b0SSuyog Pawar 
146*c83a76b0SSuyog Pawar     num_minpu_in_ctb = ctb_size_in_min_pu * ctb_size_in_min_pu;
147*c83a76b0SSuyog Pawar     pu1_pic_pu_map_ctb = ps_mv_ctxt->pu1_pic_pu_map + (ps_mv_ctxt->i4_ctb_x + ps_mv_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb) * num_minpu_in_ctb;
148*c83a76b0SSuyog Pawar 
149*c83a76b0SSuyog Pawar     num_pu_per_ctb = ps_mv_ctxt->i4_ctb_pu_cnt;
150*c83a76b0SSuyog Pawar     ctb_start_pu_idx = ps_mv_ctxt->i4_ctb_start_pu_idx;
151*c83a76b0SSuyog Pawar     nbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
152*c83a76b0SSuyog Pawar 
153*c83a76b0SSuyog Pawar     {
154*c83a76b0SSuyog Pawar         /* Updating the initial availability map */
155*c83a76b0SSuyog Pawar         WORD32 i;
156*c83a76b0SSuyog Pawar         UWORD32 u4_left_ctb_avail, u4_top_lt_ctb_avail, u4_top_rt_ctb_avail,
157*c83a76b0SSuyog Pawar                         u4_top_ctb_avail;
158*c83a76b0SSuyog Pawar 
159*c83a76b0SSuyog Pawar         u4_left_ctb_avail = ps_mv_ctxt->u1_left_ctb_avail;
160*c83a76b0SSuyog Pawar         u4_top_lt_ctb_avail = ps_mv_ctxt->u1_top_lt_ctb_avail;
161*c83a76b0SSuyog Pawar         u4_top_ctb_avail = ps_mv_ctxt->u1_top_ctb_avail;
162*c83a76b0SSuyog Pawar         u4_top_rt_ctb_avail = ps_mv_ctxt->u1_top_rt_ctb_avail;
163*c83a76b0SSuyog Pawar 
164*c83a76b0SSuyog Pawar         /* Initializing the availability array */
165*c83a76b0SSuyog Pawar         memset(au4_nbr_avail, 0,
166*c83a76b0SSuyog Pawar                (MAX_CTB_SIZE / MIN_PU_SIZE + 2) * sizeof(UWORD32));
167*c83a76b0SSuyog Pawar         /* Initializing the availability array with CTB level availability flags */
168*c83a76b0SSuyog Pawar         {
169*c83a76b0SSuyog Pawar             WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
170*c83a76b0SSuyog Pawar                             - (ps_mv_ctxt->i4_ctb_y << ps_sps->i1_log2_ctb_size);
171*c83a76b0SSuyog Pawar             WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
172*c83a76b0SSuyog Pawar             for(i = 0; i < ctb_size_left / MIN_PU_SIZE; i++)
173*c83a76b0SSuyog Pawar             {
174*c83a76b0SSuyog Pawar                 au4_nbr_avail[i + 1] = (u4_left_ctb_avail << 31);
175*c83a76b0SSuyog Pawar             }
176*c83a76b0SSuyog Pawar         }
177*c83a76b0SSuyog Pawar         au4_nbr_avail[0] |= ((u4_top_rt_ctb_avail << 31)
178*c83a76b0SSuyog Pawar                         >> (1 + ctb_size_in_min_pu)); /* 1+ctb_size/4 position bit pos from msb */
179*c83a76b0SSuyog Pawar 
180*c83a76b0SSuyog Pawar         au4_nbr_avail[0] |= (u4_top_lt_ctb_avail << 31);
181*c83a76b0SSuyog Pawar         {
182*c83a76b0SSuyog Pawar             WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
183*c83a76b0SSuyog Pawar                             - (ps_mv_ctxt->i4_ctb_x << ps_sps->i1_log2_ctb_size);
184*c83a76b0SSuyog Pawar             WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
185*c83a76b0SSuyog Pawar             WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
186*c83a76b0SSuyog Pawar 
187*c83a76b0SSuyog Pawar             /* ctb_size_top gives number of valid pixels remaining in the current row */
188*c83a76b0SSuyog Pawar             /* Since we need pattern of 1's starting from the MSB, an additional shift */
189*c83a76b0SSuyog Pawar             /* is needed */
190*c83a76b0SSuyog Pawar             shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
191*c83a76b0SSuyog Pawar 
192*c83a76b0SSuyog Pawar             top_avail_bits = ((1 << (ctb_size_top / MIN_PU_SIZE)) - 1) << shift;
193*c83a76b0SSuyog Pawar         }
194*c83a76b0SSuyog Pawar 
195*c83a76b0SSuyog Pawar         au4_nbr_avail[0] |= ((u4_top_ctb_avail == 1) ? top_avail_bits : 0x0);
196*c83a76b0SSuyog Pawar         /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
197*c83a76b0SSuyog Pawar 
198*c83a76b0SSuyog Pawar     }
199*c83a76b0SSuyog Pawar 
200*c83a76b0SSuyog Pawar     {
201*c83a76b0SSuyog Pawar         /* In case of a  tile boundary, left and top arrays must change*/
202*c83a76b0SSuyog Pawar         /*Left*/
203*c83a76b0SSuyog Pawar         /* If start of tile row*/
204*c83a76b0SSuyog Pawar         if(((ps_tile->u1_pos_x) == (ps_mv_ctxt->i4_ctb_x)) && (ps_mv_ctxt->i4_ctb_x != 0))
205*c83a76b0SSuyog Pawar         {
206*c83a76b0SSuyog Pawar             WORD32 index_pic_map;
207*c83a76b0SSuyog Pawar             WORD32 ctb_pu_idx;
208*c83a76b0SSuyog Pawar             UWORD8 *pu1_pic_pu_map;
209*c83a76b0SSuyog Pawar 
210*c83a76b0SSuyog Pawar             /* Goto the left ctb which belongs to another tile */
211*c83a76b0SSuyog Pawar             index_pic_map = ((ps_mv_ctxt->i4_ctb_x - 1) + ps_mv_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
212*c83a76b0SSuyog Pawar             ctb_pu_idx = ps_mv_ctxt->pu4_pic_pu_idx[index_pic_map];
213*c83a76b0SSuyog Pawar             index_pic_map *= num_minpu_in_ctb;
214*c83a76b0SSuyog Pawar 
215*c83a76b0SSuyog Pawar             /*Replicate the PUs of the last column of the left ctb*/
216*c83a76b0SSuyog Pawar             pu1_pic_pu_map = ps_mv_ctxt->pu1_pic_pu_map + index_pic_map + ctb_size_in_min_pu - 1;
217*c83a76b0SSuyog Pawar             for(i = 0; i < ctb_size_in_min_pu; i++)
218*c83a76b0SSuyog Pawar             {
219*c83a76b0SSuyog Pawar                 /* Left neighbors change*/
220*c83a76b0SSuyog Pawar                 pu4_ctb_left_pu_idx[i] = ctb_pu_idx + (WORD32)*pu1_pic_pu_map;
221*c83a76b0SSuyog Pawar                 pu1_pic_pu_map = pu1_pic_pu_map + ctb_size_in_min_pu;
222*c83a76b0SSuyog Pawar             }
223*c83a76b0SSuyog Pawar 
224*c83a76b0SSuyog Pawar 
225*c83a76b0SSuyog Pawar             index_pic_map = ((ps_mv_ctxt->i4_ctb_x - 1) + (ps_mv_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
226*c83a76b0SSuyog Pawar             ctb_pu_idx = ps_mv_ctxt->pu4_pic_pu_idx[index_pic_map];
227*c83a76b0SSuyog Pawar             index_pic_map *= num_minpu_in_ctb;
228*c83a76b0SSuyog Pawar             index_pic_map += (num_minpu_in_ctb - 1);
229*c83a76b0SSuyog Pawar             pu4_ctb_top_left_pu_idx[0] = ctb_pu_idx + pu1_pic_pu_map[index_pic_map];
230*c83a76b0SSuyog Pawar         }
231*c83a76b0SSuyog Pawar         /*Top*/
232*c83a76b0SSuyog Pawar         /* If start of tile column*/
233*c83a76b0SSuyog Pawar         if(((ps_tile->u1_pos_y) == (ps_mv_ctxt->i4_ctb_y)) && (ps_mv_ctxt->i4_ctb_y != 0))
234*c83a76b0SSuyog Pawar         {
235*c83a76b0SSuyog Pawar             WORD32 index_pic_map;
236*c83a76b0SSuyog Pawar             WORD32 ctb_pu_idx;
237*c83a76b0SSuyog Pawar             UWORD8 *pu1_pic_pu_map;
238*c83a76b0SSuyog Pawar 
239*c83a76b0SSuyog Pawar             /* Goto the top ctb which belongs to another tile */
240*c83a76b0SSuyog Pawar             index_pic_map =  (ps_mv_ctxt->i4_ctb_x) + ((ps_mv_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
241*c83a76b0SSuyog Pawar             ctb_pu_idx = ps_mv_ctxt->pu4_pic_pu_idx[index_pic_map];
242*c83a76b0SSuyog Pawar             index_pic_map *= num_minpu_in_ctb;
243*c83a76b0SSuyog Pawar 
244*c83a76b0SSuyog Pawar             /*Replicate the PUs of the last row of the top ctb*/
245*c83a76b0SSuyog Pawar             pu1_pic_pu_map = ps_mv_ctxt->pu1_pic_pu_map + index_pic_map + (ctb_size_in_min_pu * (ctb_size_in_min_pu - 1));
246*c83a76b0SSuyog Pawar             for(i = 0; i < ctb_size_in_min_pu; i++)
247*c83a76b0SSuyog Pawar             {
248*c83a76b0SSuyog Pawar                 /* Top neighbors change*/
249*c83a76b0SSuyog Pawar                 pu4_ctb_top_pu_idx[i] = ctb_pu_idx + (WORD32)*pu1_pic_pu_map;
250*c83a76b0SSuyog Pawar                 pu1_pic_pu_map++;
251*c83a76b0SSuyog Pawar             }
252*c83a76b0SSuyog Pawar         }
253*c83a76b0SSuyog Pawar 
254*c83a76b0SSuyog Pawar         /* Updating the initial neighbor pu idx map */
255*c83a76b0SSuyog Pawar         /* Initializing the availability array with CTB level availability flags */
256*c83a76b0SSuyog Pawar         /* 16x16 array for holding pu info of the ctb, wrt the frame pu count*/
257*c83a76b0SSuyog Pawar         for(i = 0; i < ctb_size_in_min_pu; i++)
258*c83a76b0SSuyog Pawar         {
259*c83a76b0SSuyog Pawar             /* Left */
260*c83a76b0SSuyog Pawar             pu4_nbr_pu_idx[(i + 1) * nbr_pu_idx_strd] = pu4_ctb_left_pu_idx[i];
261*c83a76b0SSuyog Pawar             /* Top */
262*c83a76b0SSuyog Pawar             pu4_nbr_pu_idx[i + 1] = pu4_ctb_top_pu_idx[i];
263*c83a76b0SSuyog Pawar         }
264*c83a76b0SSuyog Pawar         /* Top right */
265*c83a76b0SSuyog Pawar         pu4_nbr_pu_idx[1 + ctb_size_in_min_pu] = pu4_ctb_top_pu_idx[ctb_size_in_min_pu];
266*c83a76b0SSuyog Pawar 
267*c83a76b0SSuyog Pawar         /* Top left */
268*c83a76b0SSuyog Pawar         pu4_nbr_pu_idx[0] = pu4_ctb_top_left_pu_idx[0];
269*c83a76b0SSuyog Pawar 
270*c83a76b0SSuyog Pawar     }
271*c83a76b0SSuyog Pawar 
272*c83a76b0SSuyog Pawar     /* CTB level MV pred */
273*c83a76b0SSuyog Pawar     for(pu_cnt = 0; pu_cnt < num_pu_per_ctb; pu_cnt++, ps_pu++)
274*c83a76b0SSuyog Pawar     {
275*c83a76b0SSuyog Pawar         pu_ht = (ps_pu->b4_ht + 1) << 2;
276*c83a76b0SSuyog Pawar         pu_wd = (ps_pu->b4_wd + 1) << 2;
277*c83a76b0SSuyog Pawar 
278*c83a76b0SSuyog Pawar         pu_ht_single_mcl = pu_ht;
279*c83a76b0SSuyog Pawar         pu_wd_single_mcl = pu_wd;
280*c83a76b0SSuyog Pawar 
281*c83a76b0SSuyog Pawar         pu_x_in_4x4 = ps_pu->b4_pos_x;
282*c83a76b0SSuyog Pawar         pu_y_in_4x4 = ps_pu->b4_pos_y;
283*c83a76b0SSuyog Pawar 
284*c83a76b0SSuyog Pawar         pu_x_in_4x4_single_mcl = pu_x_in_4x4;
285*c83a76b0SSuyog Pawar         pu_y_in_4x4_single_mcl = pu_y_in_4x4;
286*c83a76b0SSuyog Pawar 
287*c83a76b0SSuyog Pawar         /*******************************************/
288*c83a76b0SSuyog Pawar         /* Neighbor location: Graphical indication */
289*c83a76b0SSuyog Pawar         /*                                         */
290*c83a76b0SSuyog Pawar         /*          B2 _____________B1 B0          */
291*c83a76b0SSuyog Pawar         /*            |               |            */
292*c83a76b0SSuyog Pawar         /*            |               |            */
293*c83a76b0SSuyog Pawar         /*            |               |            */
294*c83a76b0SSuyog Pawar         /*            |      PU     ht|            */
295*c83a76b0SSuyog Pawar         /*            |               |            */
296*c83a76b0SSuyog Pawar         /*            |               |            */
297*c83a76b0SSuyog Pawar         /*          A1|______wd_______|            */
298*c83a76b0SSuyog Pawar         /*          A0                             */
299*c83a76b0SSuyog Pawar         /*                                         */
300*c83a76b0SSuyog Pawar         /*******************************************/
301*c83a76b0SSuyog Pawar         /* Below code is for merge mode, where if single_mcl_flag == 1,
302*c83a76b0SSuyog Pawar          * all the prediction units of the current coding unit share a
303*c83a76b0SSuyog Pawar          * single merge candidate list, which is identical to the
304*c83a76b0SSuyog Pawar          * merge candidate list of the 2Nx2N prediction unit.
305*c83a76b0SSuyog Pawar          */
306*c83a76b0SSuyog Pawar         single_mcl_flag = 0;
307*c83a76b0SSuyog Pawar         if(1 == ps_pu->b1_merge_flag)
308*c83a76b0SSuyog Pawar         {
309*c83a76b0SSuyog Pawar             cb_size = MAX(pu_wd_single_mcl, pu_ht_single_mcl);
310*c83a76b0SSuyog Pawar             cb_size = MAX(cb_size,
311*c83a76b0SSuyog Pawar                           (1 << ps_sps->i1_log2_min_coding_block_size));
312*c83a76b0SSuyog Pawar             if((ps_pps->i1_log2_parallel_merge_level > 2) && cb_size == 8 && (pu_wd_single_mcl != pu_ht_single_mcl))
313*c83a76b0SSuyog Pawar             {
314*c83a76b0SSuyog Pawar                 single_mcl_flag = 1;
315*c83a76b0SSuyog Pawar                 if((PART_Nx2N == ps_pu->b3_part_mode) && (1 == ps_pu->b2_part_idx))
316*c83a76b0SSuyog Pawar                 {
317*c83a76b0SSuyog Pawar                     pu_x_in_4x4_single_mcl = pu_x_in_4x4_single_mcl - 1;
318*c83a76b0SSuyog Pawar                 }
319*c83a76b0SSuyog Pawar                 else if((PART_2NxN == ps_pu->b3_part_mode) && (1 == ps_pu->b2_part_idx))
320*c83a76b0SSuyog Pawar                 {
321*c83a76b0SSuyog Pawar                     pu_y_in_4x4_single_mcl = pu_y_in_4x4_single_mcl - 1;
322*c83a76b0SSuyog Pawar                 }
323*c83a76b0SSuyog Pawar                 pu_ht_single_mcl = 8;
324*c83a76b0SSuyog Pawar                 pu_wd_single_mcl = 8;
325*c83a76b0SSuyog Pawar             }
326*c83a76b0SSuyog Pawar         }
327*c83a76b0SSuyog Pawar         pu4_top_pu_idx = &pu4_nbr_pu_idx[(1 + pu_x_in_4x4_single_mcl)
328*c83a76b0SSuyog Pawar                         + (1 + pu_y_in_4x4_single_mcl - 1) * nbr_pu_idx_strd];
329*c83a76b0SSuyog Pawar         pu4_top_left_pu_idx = pu4_top_pu_idx - 1;
330*c83a76b0SSuyog Pawar         pu4_left_pu_idx = pu4_top_pu_idx - 1 + nbr_pu_idx_strd;
331*c83a76b0SSuyog Pawar 
332*c83a76b0SSuyog Pawar         /* Get neibhbor availability */
333*c83a76b0SSuyog Pawar         {
334*c83a76b0SSuyog Pawar             u1_lb_avail = (au4_nbr_avail[1 + pu_y_in_4x4_single_mcl + pu_ht_single_mcl / MIN_PU_SIZE]
335*c83a76b0SSuyog Pawar                             >> (31 - (1 + pu_x_in_4x4_single_mcl - 1))) & 1;
336*c83a76b0SSuyog Pawar             u1_l_avail = (au4_nbr_avail[1 + pu_y_in_4x4_single_mcl]
337*c83a76b0SSuyog Pawar                             >> (31 - (1 + pu_x_in_4x4_single_mcl - 1))) & 1;
338*c83a76b0SSuyog Pawar             u1_t_avail = (au4_nbr_avail[1 + pu_y_in_4x4_single_mcl - 1]
339*c83a76b0SSuyog Pawar                             >> (31 - (1 + pu_x_in_4x4_single_mcl))) & 1;
340*c83a76b0SSuyog Pawar             u1_tr_avail = (au4_nbr_avail[1 + pu_y_in_4x4_single_mcl - 1]
341*c83a76b0SSuyog Pawar                             >> (31 - (1 + pu_x_in_4x4_single_mcl + pu_wd_single_mcl / MIN_PU_SIZE)))
342*c83a76b0SSuyog Pawar                             & 1;
343*c83a76b0SSuyog Pawar             u1_tl_avail = (au4_nbr_avail[1 + pu_y_in_4x4_single_mcl - 1]
344*c83a76b0SSuyog Pawar                             >> (31 - (1 + pu_x_in_4x4_single_mcl - 1))) & 1;
345*c83a76b0SSuyog Pawar         }
346*c83a76b0SSuyog Pawar         if(ps_pu->b1_intra_flag == 0)
347*c83a76b0SSuyog Pawar         {
348*c83a76b0SSuyog Pawar             if(ps_pu->b1_merge_flag == 0)
349*c83a76b0SSuyog Pawar             {
350*c83a76b0SSuyog Pawar                 WORD32 pred_flag_l0, pred_flag_l1;
351*c83a76b0SSuyog Pawar                 WORD32 tmp_x, tmp_y, mvd_x, mvd_y, mvp_x, mvp_y;
352*c83a76b0SSuyog Pawar                 WORD32 two_pow_16, two_pow_15;
353*c83a76b0SSuyog Pawar 
354*c83a76b0SSuyog Pawar                 ihevcd_mv_pred(ps_mv_ctxt, pu4_top_pu_idx, pu4_left_pu_idx,
355*c83a76b0SSuyog Pawar                                pu4_top_left_pu_idx, nbr_pu_idx_strd,
356*c83a76b0SSuyog Pawar                                ps_pu, u1_lb_avail, u1_l_avail,
357*c83a76b0SSuyog Pawar                                u1_tr_avail, u1_t_avail, u1_tl_avail,
358*c83a76b0SSuyog Pawar                                &s_pred_mv);
359*c83a76b0SSuyog Pawar 
360*c83a76b0SSuyog Pawar                 pred_flag_l0 = (ps_pu->b2_pred_mode != PRED_L1);
361*c83a76b0SSuyog Pawar                 pred_flag_l1 = (ps_pu->b2_pred_mode != PRED_L0);
362*c83a76b0SSuyog Pawar 
363*c83a76b0SSuyog Pawar                 two_pow_16 = (1 << 16);
364*c83a76b0SSuyog Pawar                 two_pow_15 = (1 << 15);
365*c83a76b0SSuyog Pawar 
366*c83a76b0SSuyog Pawar                 /* L0 MV */
367*c83a76b0SSuyog Pawar                 if(pred_flag_l0)
368*c83a76b0SSuyog Pawar                 {
369*c83a76b0SSuyog Pawar                     mvp_x = s_pred_mv.s_l0_mv.i2_mvx;
370*c83a76b0SSuyog Pawar                     mvp_y = s_pred_mv.s_l0_mv.i2_mvy;
371*c83a76b0SSuyog Pawar                     mvd_x = ps_pu->mv.s_l0_mv.i2_mvx;
372*c83a76b0SSuyog Pawar                     mvd_y = ps_pu->mv.s_l0_mv.i2_mvy;
373*c83a76b0SSuyog Pawar 
374*c83a76b0SSuyog Pawar                     tmp_x = (mvp_x + mvd_x + two_pow_16) & (two_pow_16 - 1);
375*c83a76b0SSuyog Pawar                     tmp_x = tmp_x >= two_pow_15 ?
376*c83a76b0SSuyog Pawar                                     (tmp_x - two_pow_16) : tmp_x;
377*c83a76b0SSuyog Pawar                     ps_pu->mv.s_l0_mv.i2_mvx = tmp_x;
378*c83a76b0SSuyog Pawar                     tmp_y = (mvp_y + mvd_y + two_pow_16) & (two_pow_16 - 1);
379*c83a76b0SSuyog Pawar                     tmp_y = tmp_y >= two_pow_15 ?
380*c83a76b0SSuyog Pawar                                     (tmp_y - two_pow_16) : tmp_y;
381*c83a76b0SSuyog Pawar                     ps_pu->mv.s_l0_mv.i2_mvy = tmp_y;
382*c83a76b0SSuyog Pawar                 }
383*c83a76b0SSuyog Pawar                 /* L1 MV */
384*c83a76b0SSuyog Pawar                 if(pred_flag_l1)
385*c83a76b0SSuyog Pawar                 {
386*c83a76b0SSuyog Pawar                     mvp_x = s_pred_mv.s_l1_mv.i2_mvx;
387*c83a76b0SSuyog Pawar                     mvp_y = s_pred_mv.s_l1_mv.i2_mvy;
388*c83a76b0SSuyog Pawar                     mvd_x = ps_pu->mv.s_l1_mv.i2_mvx;
389*c83a76b0SSuyog Pawar                     mvd_y = ps_pu->mv.s_l1_mv.i2_mvy;
390*c83a76b0SSuyog Pawar 
391*c83a76b0SSuyog Pawar                     tmp_x = (mvp_x + mvd_x + two_pow_16) & (two_pow_16 - 1);
392*c83a76b0SSuyog Pawar                     tmp_x = tmp_x >= two_pow_15 ?
393*c83a76b0SSuyog Pawar                                     (tmp_x - two_pow_16) : tmp_x;
394*c83a76b0SSuyog Pawar                     ps_pu->mv.s_l1_mv.i2_mvx = tmp_x;
395*c83a76b0SSuyog Pawar                     tmp_y = (mvp_y + mvd_y + two_pow_16) & (two_pow_16 - 1);
396*c83a76b0SSuyog Pawar                     tmp_y = tmp_y >= two_pow_15 ?
397*c83a76b0SSuyog Pawar                                     (tmp_y - two_pow_16) : tmp_y;
398*c83a76b0SSuyog Pawar                     ps_pu->mv.s_l1_mv.i2_mvy = tmp_y;
399*c83a76b0SSuyog Pawar                 }
400*c83a76b0SSuyog Pawar             }
401*c83a76b0SSuyog Pawar             else
402*c83a76b0SSuyog Pawar             {
403*c83a76b0SSuyog Pawar                 WORD32 part_mode;
404*c83a76b0SSuyog Pawar                 WORD32 part_idx;
405*c83a76b0SSuyog Pawar                 part_mode = ps_pu->b3_part_mode;
406*c83a76b0SSuyog Pawar                 //TODO: Get part_idx
407*c83a76b0SSuyog Pawar                 part_idx = ps_pu->b2_part_idx;
408*c83a76b0SSuyog Pawar 
409*c83a76b0SSuyog Pawar                 ihevcd_mv_merge(ps_mv_ctxt, pu4_top_pu_idx, pu4_left_pu_idx,
410*c83a76b0SSuyog Pawar                                 nbr_pu_idx_strd, ps_pu, part_mode,
411*c83a76b0SSuyog Pawar                                 part_idx, pu_wd_single_mcl, pu_ht_single_mcl,
412*c83a76b0SSuyog Pawar                                 pu_x_in_4x4_single_mcl << 2, pu_y_in_4x4_single_mcl << 2,
413*c83a76b0SSuyog Pawar                                 single_mcl_flag, u1_lb_avail, u1_l_avail, u1_tr_avail,
414*c83a76b0SSuyog Pawar                                 u1_t_avail, u1_tl_avail);
415*c83a76b0SSuyog Pawar 
416*c83a76b0SSuyog Pawar                 if(PRED_BI == ps_pu->b2_pred_mode)
417*c83a76b0SSuyog Pawar                 {
418*c83a76b0SSuyog Pawar                     if(((ps_pu->b3_part_mode == PART_2NxN) && (pu_wd == 8))
419*c83a76b0SSuyog Pawar                                     || ((ps_pu->b3_part_mode == PART_Nx2N)
420*c83a76b0SSuyog Pawar                                                     && (pu_ht == 8)))
421*c83a76b0SSuyog Pawar                     {
422*c83a76b0SSuyog Pawar                         ps_pu->b2_pred_mode = PRED_L0;
423*c83a76b0SSuyog Pawar                     }
424*c83a76b0SSuyog Pawar                 }
425*c83a76b0SSuyog Pawar             }
426*c83a76b0SSuyog Pawar         }
427*c83a76b0SSuyog Pawar 
428*c83a76b0SSuyog Pawar         {
429*c83a76b0SSuyog Pawar             slice_header_t *ps_slice_hdr;
430*c83a76b0SSuyog Pawar             pic_buf_t *ps_pic_buf_l0, *ps_pic_buf_l1;
431*c83a76b0SSuyog Pawar             ps_slice_hdr = ps_mv_ctxt->ps_slice_hdr;
432*c83a76b0SSuyog Pawar             ps_pic_buf_l0 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list0[ps_pu->mv.i1_l0_ref_idx].pv_pic_buf));
433*c83a76b0SSuyog Pawar             ps_pic_buf_l1 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list1[ps_pu->mv.i1_l1_ref_idx].pv_pic_buf));
434*c83a76b0SSuyog Pawar             ps_pu->mv.i1_l0_ref_pic_buf_id = ps_pic_buf_l0->u1_buf_id;
435*c83a76b0SSuyog Pawar             if(BSLICE == ps_slice_hdr->i1_slice_type)
436*c83a76b0SSuyog Pawar             {
437*c83a76b0SSuyog Pawar                 ps_pu->mv.i1_l1_ref_pic_buf_id = ps_pic_buf_l1->u1_buf_id;
438*c83a76b0SSuyog Pawar             }
439*c83a76b0SSuyog Pawar         }
440*c83a76b0SSuyog Pawar 
441*c83a76b0SSuyog Pawar         /* Neighbor availability inside CTB */
442*c83a76b0SSuyog Pawar         /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
443*c83a76b0SSuyog Pawar         /* Used for neighbor availability in intra pred */
444*c83a76b0SSuyog Pawar         {
445*c83a76b0SSuyog Pawar             WORD32 trans_in_min_tu;
446*c83a76b0SSuyog Pawar             UWORD32 cur_tu_in_bits;
447*c83a76b0SSuyog Pawar             UWORD32 cur_tu_avail_flag;
448*c83a76b0SSuyog Pawar 
449*c83a76b0SSuyog Pawar             trans_in_min_tu = pu_wd / MIN_PU_SIZE;
450*c83a76b0SSuyog Pawar             cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
451*c83a76b0SSuyog Pawar             cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
452*c83a76b0SSuyog Pawar 
453*c83a76b0SSuyog Pawar             cur_tu_avail_flag = cur_tu_in_bits >> (pu_x_in_4x4 + 1);
454*c83a76b0SSuyog Pawar 
455*c83a76b0SSuyog Pawar             for(i = 0; i < pu_ht / MIN_PU_SIZE; i++)
456*c83a76b0SSuyog Pawar                 au4_nbr_avail[1 + pu_y_in_4x4 + i] |= cur_tu_avail_flag;
457*c83a76b0SSuyog Pawar         }
458*c83a76b0SSuyog Pawar 
459*c83a76b0SSuyog Pawar         /* Neighbor PU idx update inside CTB */
460*c83a76b0SSuyog Pawar         /* 1byte per 4x4. Indicates the PU idx that 4x4 block belongs to */
461*c83a76b0SSuyog Pawar 
462*c83a76b0SSuyog Pawar         {
463*c83a76b0SSuyog Pawar             WORD32 row, col;
464*c83a76b0SSuyog Pawar             UWORD32 cur_pu_idx;
465*c83a76b0SSuyog Pawar             WORD32 offset;
466*c83a76b0SSuyog Pawar             cur_pu_idx = ctb_start_pu_idx + pu_cnt;
467*c83a76b0SSuyog Pawar 
468*c83a76b0SSuyog Pawar             offset = (1 + pu_x_in_4x4 + 0) + (1 + pu_y_in_4x4 + 0) * nbr_pu_idx_strd;
469*c83a76b0SSuyog Pawar 
470*c83a76b0SSuyog Pawar             for(row = 0; row < pu_ht / MIN_PU_SIZE; row++)
471*c83a76b0SSuyog Pawar             {
472*c83a76b0SSuyog Pawar                 for(col = 0; col < pu_wd / MIN_PU_SIZE; col++)
473*c83a76b0SSuyog Pawar                 {
474*c83a76b0SSuyog Pawar                     pu4_nbr_pu_idx[offset + col] = cur_pu_idx;
475*c83a76b0SSuyog Pawar                 }
476*c83a76b0SSuyog Pawar                 offset += nbr_pu_idx_strd;
477*c83a76b0SSuyog Pawar             }
478*c83a76b0SSuyog Pawar         }
479*c83a76b0SSuyog Pawar 
480*c83a76b0SSuyog Pawar     }
481*c83a76b0SSuyog Pawar 
482*c83a76b0SSuyog Pawar     /* Updating Top and Left pointers */
483*c83a76b0SSuyog Pawar     {
484*c83a76b0SSuyog Pawar         WORD32 offset_top, offset_left;
485*c83a76b0SSuyog Pawar 
486*c83a76b0SSuyog Pawar         offset_left = ctb_size_in_min_pu + (0 + 1) * nbr_pu_idx_strd;
487*c83a76b0SSuyog Pawar         offset_top = ctb_size_in_min_pu * nbr_pu_idx_strd + 0 + 1;
488*c83a76b0SSuyog Pawar 
489*c83a76b0SSuyog Pawar         /* Top Left */
490*c83a76b0SSuyog Pawar         /* saving top left before updating top ptr, as updating top ptr will overwrite the top left for the next ctb */
491*c83a76b0SSuyog Pawar         pu4_ctb_top_left_pu_idx[0] = pu4_ctb_top_pu_idx[ctb_size_in_min_pu - 1];
492*c83a76b0SSuyog Pawar 
493*c83a76b0SSuyog Pawar         for(i = 0; i < ctb_size_in_min_pu; i++)
494*c83a76b0SSuyog Pawar         {
495*c83a76b0SSuyog Pawar             /* Left */
496*c83a76b0SSuyog Pawar             /* Last column of au4_nbr_pu_idx */
497*c83a76b0SSuyog Pawar             pu4_ctb_left_pu_idx[i] = pu4_nbr_pu_idx[offset_left];
498*c83a76b0SSuyog Pawar             /* Top */
499*c83a76b0SSuyog Pawar             /* Last row of au4_nbr_pu_idx */
500*c83a76b0SSuyog Pawar             pu4_ctb_top_pu_idx[i] = pu4_nbr_pu_idx[offset_top];
501*c83a76b0SSuyog Pawar 
502*c83a76b0SSuyog Pawar             offset_left += nbr_pu_idx_strd;
503*c83a76b0SSuyog Pawar             offset_top += 1;
504*c83a76b0SSuyog Pawar         }
505*c83a76b0SSuyog Pawar     }
506*c83a76b0SSuyog Pawar 
507*c83a76b0SSuyog Pawar     /* Updating the CTB level PU idx (Used for collocated MV pred)*/
508*c83a76b0SSuyog Pawar     {
509*c83a76b0SSuyog Pawar         WORD32 ctb_row, ctb_col, index_pic_map, index_nbr_map;
510*c83a76b0SSuyog Pawar         WORD32 first_pu_of_ctb;
511*c83a76b0SSuyog Pawar         first_pu_of_ctb = pu4_nbr_pu_idx[1 + nbr_pu_idx_strd];
512*c83a76b0SSuyog Pawar         UWORD32 cur_ctb_ht_in_min_pu = MIN(((ps_sps->i2_pic_height_in_luma_samples
513*c83a76b0SSuyog Pawar                     - (ps_mv_ctxt->i4_ctb_y << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
514*c83a76b0SSuyog Pawar         UWORD32 cur_ctb_wd_in_min_pu = MIN(((ps_sps->i2_pic_width_in_luma_samples
515*c83a76b0SSuyog Pawar                     - (ps_mv_ctxt->i4_ctb_x << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
516*c83a76b0SSuyog Pawar 
517*c83a76b0SSuyog Pawar         index_pic_map = 0 * ctb_size_in_min_pu + 0;
518*c83a76b0SSuyog Pawar         index_nbr_map = (0 + 1) * nbr_pu_idx_strd + (0 + 1);
519*c83a76b0SSuyog Pawar 
520*c83a76b0SSuyog Pawar         for(ctb_row = 0; ctb_row < cur_ctb_ht_in_min_pu; ctb_row++)
521*c83a76b0SSuyog Pawar         {
522*c83a76b0SSuyog Pawar             for(ctb_col = 0; ctb_col < cur_ctb_wd_in_min_pu; ctb_col++)
523*c83a76b0SSuyog Pawar             {
524*c83a76b0SSuyog Pawar                 pu1_pic_pu_map_ctb[index_pic_map + ctb_col] = pu4_nbr_pu_idx[index_nbr_map + ctb_col]
525*c83a76b0SSuyog Pawar                                 - first_pu_of_ctb;
526*c83a76b0SSuyog Pawar             }
527*c83a76b0SSuyog Pawar             index_pic_map += ctb_size_in_min_pu;
528*c83a76b0SSuyog Pawar             index_nbr_map += nbr_pu_idx_strd;
529*c83a76b0SSuyog Pawar         }
530*c83a76b0SSuyog Pawar     }
531*c83a76b0SSuyog Pawar     return num_pu_per_ctb;
532*c83a76b0SSuyog Pawar }
533