1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar *******************************************************************************
20*c83a76b0SSuyog Pawar * @file
21*c83a76b0SSuyog Pawar * ihevc_boundary_strength.c
22*c83a76b0SSuyog Pawar *
23*c83a76b0SSuyog Pawar * @brief
24*c83a76b0SSuyog Pawar * Contains functions for computing boundary strength
25*c83a76b0SSuyog Pawar *
26*c83a76b0SSuyog Pawar * @author
27*c83a76b0SSuyog Pawar * Harish
28*c83a76b0SSuyog Pawar *
29*c83a76b0SSuyog Pawar * @par List of Functions:
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar * @remarks
32*c83a76b0SSuyog Pawar * None
33*c83a76b0SSuyog Pawar *
34*c83a76b0SSuyog Pawar *******************************************************************************
35*c83a76b0SSuyog Pawar */
36*c83a76b0SSuyog Pawar /*****************************************************************************/
37*c83a76b0SSuyog Pawar /* File Includes */
38*c83a76b0SSuyog Pawar /*****************************************************************************/
39*c83a76b0SSuyog Pawar #include <stdio.h>
40*c83a76b0SSuyog Pawar #include <stddef.h>
41*c83a76b0SSuyog Pawar #include <stdlib.h>
42*c83a76b0SSuyog Pawar #include <string.h>
43*c83a76b0SSuyog Pawar
44*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
45*c83a76b0SSuyog Pawar #include "iv.h"
46*c83a76b0SSuyog Pawar #include "ivd.h"
47*c83a76b0SSuyog Pawar #include "ihevcd_cxa.h"
48*c83a76b0SSuyog Pawar #include "ithread.h"
49*c83a76b0SSuyog Pawar
50*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
51*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
52*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
53*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
54*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
55*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
56*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
57*c83a76b0SSuyog Pawar
58*c83a76b0SSuyog Pawar #include "ihevc_error.h"
59*c83a76b0SSuyog Pawar #include "ihevc_common_tables.h"
60*c83a76b0SSuyog Pawar
61*c83a76b0SSuyog Pawar #include "ihevcd_trace.h"
62*c83a76b0SSuyog Pawar #include "ihevcd_defs.h"
63*c83a76b0SSuyog Pawar #include "ihevcd_function_selector.h"
64*c83a76b0SSuyog Pawar #include "ihevcd_structs.h"
65*c83a76b0SSuyog Pawar #include "ihevcd_error.h"
66*c83a76b0SSuyog Pawar #include "ihevcd_nal.h"
67*c83a76b0SSuyog Pawar #include "ihevcd_bitstream.h"
68*c83a76b0SSuyog Pawar #include "ihevcd_job_queue.h"
69*c83a76b0SSuyog Pawar #include "ihevcd_utils.h"
70*c83a76b0SSuyog Pawar #include "ihevcd_profile.h"
71*c83a76b0SSuyog Pawar
72*c83a76b0SSuyog Pawar /*****************************************************************************/
73*c83a76b0SSuyog Pawar /* Function Prototypes */
74*c83a76b0SSuyog Pawar /*****************************************************************************/
75*c83a76b0SSuyog Pawar
76*c83a76b0SSuyog Pawar
77*c83a76b0SSuyog Pawar #define SET_NGBHR_ALL_AVAIL(avail) avail = 0x1F;
78*c83a76b0SSuyog Pawar
79*c83a76b0SSuyog Pawar #define SET_NGBHR_BOTLEFT_NOTAVAIL(avail) avail &= ~0x10;
80*c83a76b0SSuyog Pawar #define SET_NGBHR_LEFT_NOTAVAIL(avail) avail &= ~0x8;
81*c83a76b0SSuyog Pawar #define SET_NGBHR_TOPLEFT_NOTAVAIL(avail) avail &= ~0x4;
82*c83a76b0SSuyog Pawar #define SET_NGBHR_TOP_NOTAVAIL(avail) avail &= ~0x2;
83*c83a76b0SSuyog Pawar #define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail) avail &= ~0x1;
84*c83a76b0SSuyog Pawar
ihevcd_pu_boundary_strength(pu_t * ps_pu,pu_t * ps_ngbr_pu)85*c83a76b0SSuyog Pawar WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
86*c83a76b0SSuyog Pawar pu_t *ps_ngbr_pu)
87*c83a76b0SSuyog Pawar {
88*c83a76b0SSuyog Pawar WORD32 i4_bs;
89*c83a76b0SSuyog Pawar UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
90*c83a76b0SSuyog Pawar UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
91*c83a76b0SSuyog Pawar
92*c83a76b0SSuyog Pawar WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
93*c83a76b0SSuyog Pawar WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
94*c83a76b0SSuyog Pawar
95*c83a76b0SSuyog Pawar WORD32 num_mv, ngbr_num_mv;
96*c83a76b0SSuyog Pawar
97*c83a76b0SSuyog Pawar num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
98*c83a76b0SSuyog Pawar ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
99*c83a76b0SSuyog Pawar
100*c83a76b0SSuyog Pawar l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
101*c83a76b0SSuyog Pawar l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
102*c83a76b0SSuyog Pawar ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
103*c83a76b0SSuyog Pawar ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
104*c83a76b0SSuyog Pawar
105*c83a76b0SSuyog Pawar
106*c83a76b0SSuyog Pawar i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
107*c83a76b0SSuyog Pawar i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
108*c83a76b0SSuyog Pawar i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
109*c83a76b0SSuyog Pawar i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
110*c83a76b0SSuyog Pawar
111*c83a76b0SSuyog Pawar i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
112*c83a76b0SSuyog Pawar i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
113*c83a76b0SSuyog Pawar i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
114*c83a76b0SSuyog Pawar i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
115*c83a76b0SSuyog Pawar
116*c83a76b0SSuyog Pawar
117*c83a76b0SSuyog Pawar /* If two motion vectors are used */
118*c83a76b0SSuyog Pawar if((2 == num_mv) &&
119*c83a76b0SSuyog Pawar (2 == ngbr_num_mv))
120*c83a76b0SSuyog Pawar {
121*c83a76b0SSuyog Pawar if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
122*c83a76b0SSuyog Pawar (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
123*c83a76b0SSuyog Pawar {
124*c83a76b0SSuyog Pawar if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
125*c83a76b0SSuyog Pawar {
126*c83a76b0SSuyog Pawar if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
127*c83a76b0SSuyog Pawar {
128*c83a76b0SSuyog Pawar i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
129*c83a76b0SSuyog Pawar (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
130*c83a76b0SSuyog Pawar (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
131*c83a76b0SSuyog Pawar (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
132*c83a76b0SSuyog Pawar }
133*c83a76b0SSuyog Pawar else
134*c83a76b0SSuyog Pawar {
135*c83a76b0SSuyog Pawar i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
136*c83a76b0SSuyog Pawar (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
137*c83a76b0SSuyog Pawar (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
138*c83a76b0SSuyog Pawar (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
139*c83a76b0SSuyog Pawar }
140*c83a76b0SSuyog Pawar }
141*c83a76b0SSuyog Pawar else /* Same L0 and L1 */
142*c83a76b0SSuyog Pawar {
143*c83a76b0SSuyog Pawar i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
144*c83a76b0SSuyog Pawar (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
145*c83a76b0SSuyog Pawar (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
146*c83a76b0SSuyog Pawar (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
147*c83a76b0SSuyog Pawar ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
148*c83a76b0SSuyog Pawar (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
149*c83a76b0SSuyog Pawar (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
150*c83a76b0SSuyog Pawar (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
151*c83a76b0SSuyog Pawar }
152*c83a76b0SSuyog Pawar }
153*c83a76b0SSuyog Pawar else /* If the reference pictures used are different */
154*c83a76b0SSuyog Pawar {
155*c83a76b0SSuyog Pawar i4_bs = 1;
156*c83a76b0SSuyog Pawar }
157*c83a76b0SSuyog Pawar }
158*c83a76b0SSuyog Pawar
159*c83a76b0SSuyog Pawar /* If one motion vector is used in both PUs */
160*c83a76b0SSuyog Pawar else if((1 == num_mv) &&
161*c83a76b0SSuyog Pawar (1 == ngbr_num_mv))
162*c83a76b0SSuyog Pawar {
163*c83a76b0SSuyog Pawar WORD16 i2_mv_x, i2_mv_y;
164*c83a76b0SSuyog Pawar WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
165*c83a76b0SSuyog Pawar UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
166*c83a76b0SSuyog Pawar
167*c83a76b0SSuyog Pawar if(PRED_L0 == ps_pu->b2_pred_mode)
168*c83a76b0SSuyog Pawar {
169*c83a76b0SSuyog Pawar i2_mv_x = i2_mv_x0;
170*c83a76b0SSuyog Pawar i2_mv_y = i2_mv_y0;
171*c83a76b0SSuyog Pawar ref_pic_buf_id = l0_ref_pic_buf_id;
172*c83a76b0SSuyog Pawar }
173*c83a76b0SSuyog Pawar else
174*c83a76b0SSuyog Pawar {
175*c83a76b0SSuyog Pawar i2_mv_x = i2_mv_x1;
176*c83a76b0SSuyog Pawar i2_mv_y = i2_mv_y1;
177*c83a76b0SSuyog Pawar ref_pic_buf_id = l1_ref_pic_buf_id;
178*c83a76b0SSuyog Pawar }
179*c83a76b0SSuyog Pawar
180*c83a76b0SSuyog Pawar if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
181*c83a76b0SSuyog Pawar {
182*c83a76b0SSuyog Pawar i2_ngbr_mv_x = i2_ngbr_mv_x0;
183*c83a76b0SSuyog Pawar i2_ngbr_mv_y = i2_ngbr_mv_y0;
184*c83a76b0SSuyog Pawar ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
185*c83a76b0SSuyog Pawar }
186*c83a76b0SSuyog Pawar else
187*c83a76b0SSuyog Pawar {
188*c83a76b0SSuyog Pawar i2_ngbr_mv_x = i2_ngbr_mv_x1;
189*c83a76b0SSuyog Pawar i2_ngbr_mv_y = i2_ngbr_mv_y1;
190*c83a76b0SSuyog Pawar ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
191*c83a76b0SSuyog Pawar }
192*c83a76b0SSuyog Pawar
193*c83a76b0SSuyog Pawar i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
194*c83a76b0SSuyog Pawar (ABS(i2_mv_x - i2_ngbr_mv_x) < 4) &&
195*c83a76b0SSuyog Pawar (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
196*c83a76b0SSuyog Pawar }
197*c83a76b0SSuyog Pawar
198*c83a76b0SSuyog Pawar /* If the no. of motion vectors is not the same */
199*c83a76b0SSuyog Pawar else
200*c83a76b0SSuyog Pawar {
201*c83a76b0SSuyog Pawar i4_bs = 1;
202*c83a76b0SSuyog Pawar }
203*c83a76b0SSuyog Pawar
204*c83a76b0SSuyog Pawar
205*c83a76b0SSuyog Pawar return i4_bs;
206*c83a76b0SSuyog Pawar }
207*c83a76b0SSuyog Pawar
208*c83a76b0SSuyog Pawar /* QP is also populated in the same function */
ihevcd_ctb_boundary_strength_islice(bs_ctxt_t * ps_bs_ctxt)209*c83a76b0SSuyog Pawar WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
210*c83a76b0SSuyog Pawar {
211*c83a76b0SSuyog Pawar pps_t *ps_pps;
212*c83a76b0SSuyog Pawar sps_t *ps_sps;
213*c83a76b0SSuyog Pawar tu_t *ps_tu;
214*c83a76b0SSuyog Pawar UWORD32 *pu4_vert_bs;
215*c83a76b0SSuyog Pawar UWORD32 *pu4_horz_bs;
216*c83a76b0SSuyog Pawar WORD32 bs_strd;
217*c83a76b0SSuyog Pawar WORD32 vert_bs0_tmp;
218*c83a76b0SSuyog Pawar WORD32 horz_bs0_tmp;
219*c83a76b0SSuyog Pawar UWORD8 *pu1_qp;
220*c83a76b0SSuyog Pawar WORD32 qp_strd;
221*c83a76b0SSuyog Pawar UWORD32 u4_qp_const_in_ctb;
222*c83a76b0SSuyog Pawar WORD32 ctb_indx;
223*c83a76b0SSuyog Pawar WORD32 i4_tu_cnt;
224*c83a76b0SSuyog Pawar WORD32 log2_ctb_size;
225*c83a76b0SSuyog Pawar WORD32 ctb_size;
226*c83a76b0SSuyog Pawar
227*c83a76b0SSuyog Pawar WORD8 i1_loop_filter_across_tiles_enabled_flag;
228*c83a76b0SSuyog Pawar WORD8 i1_loop_filter_across_slices_enabled_flag;
229*c83a76b0SSuyog Pawar
230*c83a76b0SSuyog Pawar WORD32 i;
231*c83a76b0SSuyog Pawar
232*c83a76b0SSuyog Pawar PROFILE_DISABLE_BOUNDARY_STRENGTH();
233*c83a76b0SSuyog Pawar
234*c83a76b0SSuyog Pawar ps_pps = ps_bs_ctxt->ps_pps;
235*c83a76b0SSuyog Pawar ps_sps = ps_bs_ctxt->ps_sps;
236*c83a76b0SSuyog Pawar i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
237*c83a76b0SSuyog Pawar i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
238*c83a76b0SSuyog Pawar i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
239*c83a76b0SSuyog Pawar
240*c83a76b0SSuyog Pawar log2_ctb_size = ps_sps->i1_log2_ctb_size;
241*c83a76b0SSuyog Pawar ctb_size = (1 << log2_ctb_size);
242*c83a76b0SSuyog Pawar
243*c83a76b0SSuyog Pawar /* strides are in units of number of bytes */
244*c83a76b0SSuyog Pawar /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
245*c83a76b0SSuyog Pawar bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
246*c83a76b0SSuyog Pawar
247*c83a76b0SSuyog Pawar pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
248*c83a76b0SSuyog Pawar (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
249*c83a76b0SSuyog Pawar ps_bs_ctxt->i4_ctb_y * bs_strd);
250*c83a76b0SSuyog Pawar pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
251*c83a76b0SSuyog Pawar (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
252*c83a76b0SSuyog Pawar ps_bs_ctxt->i4_ctb_y * bs_strd);
253*c83a76b0SSuyog Pawar
254*c83a76b0SSuyog Pawar /* ctb_size/8 elements per CTB */
255*c83a76b0SSuyog Pawar qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
256*c83a76b0SSuyog Pawar pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
257*c83a76b0SSuyog Pawar
258*c83a76b0SSuyog Pawar ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
259*c83a76b0SSuyog Pawar u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
260*c83a76b0SSuyog Pawar
261*c83a76b0SSuyog Pawar vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
262*c83a76b0SSuyog Pawar horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
263*c83a76b0SSuyog Pawar
264*c83a76b0SSuyog Pawar /* ctb_size/8 is the number of edges per CTB
265*c83a76b0SSuyog Pawar * ctb_size/4 is the number of BS values needed per edge
266*c83a76b0SSuyog Pawar * divided by 8 for the number of bytes
267*c83a76b0SSuyog Pawar * 2 is the number of bits needed for each BS value */
268*c83a76b0SSuyog Pawar /*
269*c83a76b0SSuyog Pawar memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
270*c83a76b0SSuyog Pawar memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
271*c83a76b0SSuyog Pawar */
272*c83a76b0SSuyog Pawar memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
273*c83a76b0SSuyog Pawar memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
274*c83a76b0SSuyog Pawar
275*c83a76b0SSuyog Pawar /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
276*c83a76b0SSuyog Pawar if(0 != ps_bs_ctxt->i4_ctb_x)
277*c83a76b0SSuyog Pawar {
278*c83a76b0SSuyog Pawar pu4_vert_bs[0] |= vert_bs0_tmp;
279*c83a76b0SSuyog Pawar }
280*c83a76b0SSuyog Pawar
281*c83a76b0SSuyog Pawar /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
282*c83a76b0SSuyog Pawar if(0 != ps_bs_ctxt->i4_ctb_y)
283*c83a76b0SSuyog Pawar {
284*c83a76b0SSuyog Pawar pu4_horz_bs[0] |= horz_bs0_tmp;
285*c83a76b0SSuyog Pawar }
286*c83a76b0SSuyog Pawar
287*c83a76b0SSuyog Pawar ps_tu = ps_bs_ctxt->ps_tu;
288*c83a76b0SSuyog Pawar
289*c83a76b0SSuyog Pawar /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
290*c83a76b0SSuyog Pawar if(u4_qp_const_in_ctb)
291*c83a76b0SSuyog Pawar pu1_qp[0] = ps_tu->b7_qp;
292*c83a76b0SSuyog Pawar
293*c83a76b0SSuyog Pawar for(i = 0; i < i4_tu_cnt; i++)
294*c83a76b0SSuyog Pawar {
295*c83a76b0SSuyog Pawar WORD32 start_pos_x;
296*c83a76b0SSuyog Pawar WORD32 start_pos_y;
297*c83a76b0SSuyog Pawar WORD32 tu_size;
298*c83a76b0SSuyog Pawar
299*c83a76b0SSuyog Pawar
300*c83a76b0SSuyog Pawar UWORD32 u4_bs;
301*c83a76b0SSuyog Pawar ps_tu = ps_bs_ctxt->ps_tu + i;
302*c83a76b0SSuyog Pawar
303*c83a76b0SSuyog Pawar /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
304*c83a76b0SSuyog Pawar start_pos_x = ps_tu->b4_pos_x;
305*c83a76b0SSuyog Pawar start_pos_y = ps_tu->b4_pos_y;
306*c83a76b0SSuyog Pawar
307*c83a76b0SSuyog Pawar tu_size = 1 << (ps_tu->b3_size + 2);
308*c83a76b0SSuyog Pawar tu_size >>= 2; /* TU size divided by 4 */
309*c83a76b0SSuyog Pawar
310*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_10(tu_size);
311*c83a76b0SSuyog Pawar
312*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
313*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1))
314*c83a76b0SSuyog Pawar {
315*c83a76b0SSuyog Pawar WORD32 shift;
316*c83a76b0SSuyog Pawar shift = start_pos_y * 2;
317*c83a76b0SSuyog Pawar /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
318*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
319*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
320*c83a76b0SSuyog Pawar */
321*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
322*c83a76b0SSuyog Pawar shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
323*c83a76b0SSuyog Pawar pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
324*c83a76b0SSuyog Pawar }
325*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
326*c83a76b0SSuyog Pawar if(0 == (start_pos_y & 1))
327*c83a76b0SSuyog Pawar {
328*c83a76b0SSuyog Pawar WORD32 shift;
329*c83a76b0SSuyog Pawar shift = start_pos_x * 2;
330*c83a76b0SSuyog Pawar /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
331*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
332*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
333*c83a76b0SSuyog Pawar */
334*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
335*c83a76b0SSuyog Pawar shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
336*c83a76b0SSuyog Pawar pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
337*c83a76b0SSuyog Pawar }
338*c83a76b0SSuyog Pawar
339*c83a76b0SSuyog Pawar /* Populating the QP array */
340*c83a76b0SSuyog Pawar if(0 == u4_qp_const_in_ctb)
341*c83a76b0SSuyog Pawar {
342*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
343*c83a76b0SSuyog Pawar {
344*c83a76b0SSuyog Pawar WORD32 row, col;
345*c83a76b0SSuyog Pawar for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
346*c83a76b0SSuyog Pawar {
347*c83a76b0SSuyog Pawar for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
348*c83a76b0SSuyog Pawar {
349*c83a76b0SSuyog Pawar pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
350*c83a76b0SSuyog Pawar }
351*c83a76b0SSuyog Pawar }
352*c83a76b0SSuyog Pawar }
353*c83a76b0SSuyog Pawar }
354*c83a76b0SSuyog Pawar
355*c83a76b0SSuyog Pawar }
356*c83a76b0SSuyog Pawar {
357*c83a76b0SSuyog Pawar /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
358*c83a76b0SSuyog Pawar UWORD32 ctb_addr;
359*c83a76b0SSuyog Pawar WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
360*c83a76b0SSuyog Pawar /* If left neighbor is not available, then set BS for entire first column to zero */
361*c83a76b0SSuyog Pawar if(!ps_pps->i1_tiles_enabled_flag)
362*c83a76b0SSuyog Pawar {
363*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
364*c83a76b0SSuyog Pawar (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
365*c83a76b0SSuyog Pawar (0 == ps_bs_ctxt->i4_ctb_x))
366*c83a76b0SSuyog Pawar {
367*c83a76b0SSuyog Pawar pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
368*c83a76b0SSuyog Pawar }
369*c83a76b0SSuyog Pawar }
370*c83a76b0SSuyog Pawar else
371*c83a76b0SSuyog Pawar {
372*c83a76b0SSuyog Pawar //If across-tiles is disabled
373*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
374*c83a76b0SSuyog Pawar {
375*c83a76b0SSuyog Pawar pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
376*c83a76b0SSuyog Pawar }
377*c83a76b0SSuyog Pawar else
378*c83a76b0SSuyog Pawar {
379*c83a76b0SSuyog Pawar ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
380*c83a76b0SSuyog Pawar slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
381*c83a76b0SSuyog Pawar if(ps_bs_ctxt->i4_ctb_x)
382*c83a76b0SSuyog Pawar {
383*c83a76b0SSuyog Pawar ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
384*c83a76b0SSuyog Pawar left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
385*c83a76b0SSuyog Pawar }
386*c83a76b0SSuyog Pawar /*If the 1st slice in a new tile is a dependent slice*/
387*c83a76b0SSuyog Pawar if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
388*c83a76b0SSuyog Pawar {
389*c83a76b0SSuyog Pawar /* Removed reduntant checks */
390*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_slices_enabled_flag && (
391*c83a76b0SSuyog Pawar ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
392*c83a76b0SSuyog Pawar ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
393*c83a76b0SSuyog Pawar (0 == ps_bs_ctxt->i4_ctb_x))
394*c83a76b0SSuyog Pawar {
395*c83a76b0SSuyog Pawar pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
396*c83a76b0SSuyog Pawar }
397*c83a76b0SSuyog Pawar }
398*c83a76b0SSuyog Pawar }
399*c83a76b0SSuyog Pawar }
400*c83a76b0SSuyog Pawar
401*c83a76b0SSuyog Pawar ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
402*c83a76b0SSuyog Pawar slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
403*c83a76b0SSuyog Pawar if(ps_bs_ctxt->i4_ctb_y)
404*c83a76b0SSuyog Pawar {
405*c83a76b0SSuyog Pawar ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
406*c83a76b0SSuyog Pawar top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
407*c83a76b0SSuyog Pawar }
408*c83a76b0SSuyog Pawar
409*c83a76b0SSuyog Pawar /* If top neighbor is not available, then set BS for entire first row to zero */
410*c83a76b0SSuyog Pawar /* Removed reduntant checks */
411*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
412*c83a76b0SSuyog Pawar || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
413*c83a76b0SSuyog Pawar || (0 == ps_bs_ctxt->i4_ctb_y))
414*c83a76b0SSuyog Pawar {
415*c83a76b0SSuyog Pawar pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
416*c83a76b0SSuyog Pawar }
417*c83a76b0SSuyog Pawar }
418*c83a76b0SSuyog Pawar
419*c83a76b0SSuyog Pawar /**
420*c83a76b0SSuyog Pawar * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
421*c83a76b0SSuyog Pawar * (They might have been set to non zero values because of CBF of the current CTB)
422*c83a76b0SSuyog Pawar * This block might not be needed for I slices*/
423*c83a76b0SSuyog Pawar {
424*c83a76b0SSuyog Pawar WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
425*c83a76b0SSuyog Pawar WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
426*c83a76b0SSuyog Pawar if(num_rows_remaining < (ctb_size >> 3))
427*c83a76b0SSuyog Pawar {
428*c83a76b0SSuyog Pawar /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
429*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
430*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
431*c83a76b0SSuyog Pawar */
432*c83a76b0SSuyog Pawar WORD32 offset;
433*c83a76b0SSuyog Pawar offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
434*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
435*c83a76b0SSuyog Pawar offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
436*c83a76b0SSuyog Pawar
437*c83a76b0SSuyog Pawar memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
438*c83a76b0SSuyog Pawar }
439*c83a76b0SSuyog Pawar
440*c83a76b0SSuyog Pawar if(num_cols_remaining < (ctb_size >> 3))
441*c83a76b0SSuyog Pawar {
442*c83a76b0SSuyog Pawar /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
443*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
444*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
445*c83a76b0SSuyog Pawar */
446*c83a76b0SSuyog Pawar
447*c83a76b0SSuyog Pawar WORD32 offset;
448*c83a76b0SSuyog Pawar offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
449*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
450*c83a76b0SSuyog Pawar offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
451*c83a76b0SSuyog Pawar
452*c83a76b0SSuyog Pawar memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
453*c83a76b0SSuyog Pawar }
454*c83a76b0SSuyog Pawar }
455*c83a76b0SSuyog Pawar
456*c83a76b0SSuyog Pawar return 0;
457*c83a76b0SSuyog Pawar }
ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t * ps_bs_ctxt)458*c83a76b0SSuyog Pawar WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
459*c83a76b0SSuyog Pawar {
460*c83a76b0SSuyog Pawar sps_t *ps_sps;
461*c83a76b0SSuyog Pawar pps_t *ps_pps;
462*c83a76b0SSuyog Pawar WORD32 cur_ctb_idx, next_ctb_idx = 0;
463*c83a76b0SSuyog Pawar WORD32 i4_tu_cnt;
464*c83a76b0SSuyog Pawar WORD32 i4_pu_cnt;
465*c83a76b0SSuyog Pawar tu_t *ps_tu;
466*c83a76b0SSuyog Pawar
467*c83a76b0SSuyog Pawar UWORD32 *pu4_vert_bs;
468*c83a76b0SSuyog Pawar UWORD32 *pu4_horz_bs;
469*c83a76b0SSuyog Pawar WORD32 bs_strd;
470*c83a76b0SSuyog Pawar WORD32 vert_bs0_tmp;
471*c83a76b0SSuyog Pawar WORD32 horz_bs0_tmp;
472*c83a76b0SSuyog Pawar UWORD8 *pu1_qp;
473*c83a76b0SSuyog Pawar WORD32 qp_strd;
474*c83a76b0SSuyog Pawar UWORD32 u4_qp_const_in_ctb;
475*c83a76b0SSuyog Pawar WORD32 ctb_indx;
476*c83a76b0SSuyog Pawar WORD32 log2_ctb_size;
477*c83a76b0SSuyog Pawar WORD32 ctb_size;
478*c83a76b0SSuyog Pawar
479*c83a76b0SSuyog Pawar WORD32 i;
480*c83a76b0SSuyog Pawar WORD8 i1_loop_filter_across_tiles_enabled_flag;
481*c83a76b0SSuyog Pawar WORD8 i1_loop_filter_across_slices_enabled_flag;
482*c83a76b0SSuyog Pawar
483*c83a76b0SSuyog Pawar PROFILE_DISABLE_BOUNDARY_STRENGTH();
484*c83a76b0SSuyog Pawar
485*c83a76b0SSuyog Pawar ps_sps = ps_bs_ctxt->ps_sps;
486*c83a76b0SSuyog Pawar ps_pps = ps_bs_ctxt->ps_pps;
487*c83a76b0SSuyog Pawar
488*c83a76b0SSuyog Pawar log2_ctb_size = ps_sps->i1_log2_ctb_size;
489*c83a76b0SSuyog Pawar ctb_size = (1 << log2_ctb_size);
490*c83a76b0SSuyog Pawar
491*c83a76b0SSuyog Pawar /* strides are in units of number of bytes */
492*c83a76b0SSuyog Pawar /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
493*c83a76b0SSuyog Pawar bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
494*c83a76b0SSuyog Pawar
495*c83a76b0SSuyog Pawar pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
496*c83a76b0SSuyog Pawar (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
497*c83a76b0SSuyog Pawar ps_bs_ctxt->i4_ctb_y * bs_strd);
498*c83a76b0SSuyog Pawar pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
499*c83a76b0SSuyog Pawar (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
500*c83a76b0SSuyog Pawar ps_bs_ctxt->i4_ctb_y * bs_strd);
501*c83a76b0SSuyog Pawar
502*c83a76b0SSuyog Pawar vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
503*c83a76b0SSuyog Pawar horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
504*c83a76b0SSuyog Pawar
505*c83a76b0SSuyog Pawar ps_tu = ps_bs_ctxt->ps_tu;
506*c83a76b0SSuyog Pawar
507*c83a76b0SSuyog Pawar /* ctb_size/8 elements per CTB */
508*c83a76b0SSuyog Pawar qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
509*c83a76b0SSuyog Pawar pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
510*c83a76b0SSuyog Pawar
511*c83a76b0SSuyog Pawar ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
512*c83a76b0SSuyog Pawar u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
513*c83a76b0SSuyog Pawar
514*c83a76b0SSuyog Pawar i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
515*c83a76b0SSuyog Pawar i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
516*c83a76b0SSuyog Pawar
517*c83a76b0SSuyog Pawar /* ctb_size/8 is the number of edges per CTB
518*c83a76b0SSuyog Pawar * ctb_size/4 is the number of BS values needed per edge
519*c83a76b0SSuyog Pawar * divided by 8 for the number of bytes
520*c83a76b0SSuyog Pawar * 2 is the number of bits needed for each BS value */
521*c83a76b0SSuyog Pawar /*
522*c83a76b0SSuyog Pawar memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
523*c83a76b0SSuyog Pawar memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
524*c83a76b0SSuyog Pawar */
525*c83a76b0SSuyog Pawar memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
526*c83a76b0SSuyog Pawar memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
527*c83a76b0SSuyog Pawar
528*c83a76b0SSuyog Pawar /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
529*c83a76b0SSuyog Pawar if(0 != ps_bs_ctxt->i4_ctb_x)
530*c83a76b0SSuyog Pawar {
531*c83a76b0SSuyog Pawar pu4_vert_bs[0] |= vert_bs0_tmp;
532*c83a76b0SSuyog Pawar }
533*c83a76b0SSuyog Pawar
534*c83a76b0SSuyog Pawar /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
535*c83a76b0SSuyog Pawar if(0 != ps_bs_ctxt->i4_ctb_y)
536*c83a76b0SSuyog Pawar {
537*c83a76b0SSuyog Pawar pu4_horz_bs[0] |= horz_bs0_tmp;
538*c83a76b0SSuyog Pawar }
539*c83a76b0SSuyog Pawar /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
540*c83a76b0SSuyog Pawar *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
541*c83a76b0SSuyog Pawar
542*c83a76b0SSuyog Pawar cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
543*c83a76b0SSuyog Pawar + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
544*c83a76b0SSuyog Pawar next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
545*c83a76b0SSuyog Pawar if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
546*c83a76b0SSuyog Pawar {
547*c83a76b0SSuyog Pawar i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
548*c83a76b0SSuyog Pawar }
549*c83a76b0SSuyog Pawar else
550*c83a76b0SSuyog Pawar {
551*c83a76b0SSuyog Pawar i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
552*c83a76b0SSuyog Pawar }
553*c83a76b0SSuyog Pawar
554*c83a76b0SSuyog Pawar ps_tu = ps_bs_ctxt->ps_tu;
555*c83a76b0SSuyog Pawar if(u4_qp_const_in_ctb)
556*c83a76b0SSuyog Pawar pu1_qp[0] = ps_tu->b7_qp;
557*c83a76b0SSuyog Pawar
558*c83a76b0SSuyog Pawar /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
559*c83a76b0SSuyog Pawar for(i = 0; i < i4_tu_cnt; i++)
560*c83a76b0SSuyog Pawar {
561*c83a76b0SSuyog Pawar WORD32 start_pos_x;
562*c83a76b0SSuyog Pawar WORD32 start_pos_y;
563*c83a76b0SSuyog Pawar WORD32 end_pos_x;
564*c83a76b0SSuyog Pawar WORD32 end_pos_y;
565*c83a76b0SSuyog Pawar WORD32 tu_size;
566*c83a76b0SSuyog Pawar UWORD32 u4_bs;
567*c83a76b0SSuyog Pawar WORD32 intra_flag;
568*c83a76b0SSuyog Pawar UWORD8 *pu1_pic_intra_flag;
569*c83a76b0SSuyog Pawar
570*c83a76b0SSuyog Pawar ps_tu = ps_bs_ctxt->ps_tu + i;
571*c83a76b0SSuyog Pawar
572*c83a76b0SSuyog Pawar start_pos_x = ps_tu->b4_pos_x;
573*c83a76b0SSuyog Pawar start_pos_y = ps_tu->b4_pos_y;
574*c83a76b0SSuyog Pawar
575*c83a76b0SSuyog Pawar tu_size = 1 << (ps_tu->b3_size + 2);
576*c83a76b0SSuyog Pawar tu_size >>= 2;
577*c83a76b0SSuyog Pawar
578*c83a76b0SSuyog Pawar end_pos_x = start_pos_x + tu_size;
579*c83a76b0SSuyog Pawar end_pos_y = start_pos_y + tu_size;
580*c83a76b0SSuyog Pawar
581*c83a76b0SSuyog Pawar {
582*c83a76b0SSuyog Pawar WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
583*c83a76b0SSuyog Pawar WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
584*c83a76b0SSuyog Pawar
585*c83a76b0SSuyog Pawar WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
586*c83a76b0SSuyog Pawar
587*c83a76b0SSuyog Pawar pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
588*c83a76b0SSuyog Pawar pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
589*c83a76b0SSuyog Pawar pu1_pic_intra_flag += (tu_abs_x >> 6);
590*c83a76b0SSuyog Pawar
591*c83a76b0SSuyog Pawar intra_flag = *pu1_pic_intra_flag;
592*c83a76b0SSuyog Pawar intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
593*c83a76b0SSuyog Pawar }
594*c83a76b0SSuyog Pawar if(intra_flag)
595*c83a76b0SSuyog Pawar {
596*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_10(tu_size);
597*c83a76b0SSuyog Pawar
598*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
599*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1))
600*c83a76b0SSuyog Pawar {
601*c83a76b0SSuyog Pawar WORD32 shift;
602*c83a76b0SSuyog Pawar shift = start_pos_y * 2;
603*c83a76b0SSuyog Pawar /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
604*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
605*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
606*c83a76b0SSuyog Pawar */
607*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
608*c83a76b0SSuyog Pawar shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
609*c83a76b0SSuyog Pawar pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
610*c83a76b0SSuyog Pawar }
611*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
612*c83a76b0SSuyog Pawar if(0 == (start_pos_y & 1))
613*c83a76b0SSuyog Pawar {
614*c83a76b0SSuyog Pawar WORD32 shift;
615*c83a76b0SSuyog Pawar shift = start_pos_x * 2;
616*c83a76b0SSuyog Pawar /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
617*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
618*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
619*c83a76b0SSuyog Pawar */
620*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
621*c83a76b0SSuyog Pawar shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
622*c83a76b0SSuyog Pawar pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
623*c83a76b0SSuyog Pawar }
624*c83a76b0SSuyog Pawar }
625*c83a76b0SSuyog Pawar
626*c83a76b0SSuyog Pawar
627*c83a76b0SSuyog Pawar /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
628*c83a76b0SSuyog Pawar if(ps_tu->b1_y_cbf)
629*c83a76b0SSuyog Pawar {
630*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_01(tu_size);
631*c83a76b0SSuyog Pawar
632*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
633*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1))
634*c83a76b0SSuyog Pawar {
635*c83a76b0SSuyog Pawar WORD32 shift;
636*c83a76b0SSuyog Pawar shift = start_pos_y * 2;
637*c83a76b0SSuyog Pawar /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
638*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
639*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
640*c83a76b0SSuyog Pawar */
641*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
642*c83a76b0SSuyog Pawar shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
643*c83a76b0SSuyog Pawar pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
644*c83a76b0SSuyog Pawar }
645*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
646*c83a76b0SSuyog Pawar if(0 == (start_pos_y & 1))
647*c83a76b0SSuyog Pawar {
648*c83a76b0SSuyog Pawar WORD32 shift;
649*c83a76b0SSuyog Pawar shift = start_pos_x * 2;
650*c83a76b0SSuyog Pawar /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
651*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
652*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
653*c83a76b0SSuyog Pawar */
654*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
655*c83a76b0SSuyog Pawar shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
656*c83a76b0SSuyog Pawar pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
657*c83a76b0SSuyog Pawar }
658*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
659*c83a76b0SSuyog Pawar if(0 == (end_pos_x & 1))
660*c83a76b0SSuyog Pawar {
661*c83a76b0SSuyog Pawar if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
662*c83a76b0SSuyog Pawar {
663*c83a76b0SSuyog Pawar WORD32 shift;
664*c83a76b0SSuyog Pawar shift = start_pos_y * 2;
665*c83a76b0SSuyog Pawar shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
666*c83a76b0SSuyog Pawar pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
667*c83a76b0SSuyog Pawar }
668*c83a76b0SSuyog Pawar }
669*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
670*c83a76b0SSuyog Pawar if(0 == (end_pos_y & 1))
671*c83a76b0SSuyog Pawar {
672*c83a76b0SSuyog Pawar /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
673*c83a76b0SSuyog Pawar if(ctb_size / 8 == (end_pos_y >> 1))
674*c83a76b0SSuyog Pawar {
675*c83a76b0SSuyog Pawar *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
676*c83a76b0SSuyog Pawar }
677*c83a76b0SSuyog Pawar else
678*c83a76b0SSuyog Pawar {
679*c83a76b0SSuyog Pawar WORD32 shift;
680*c83a76b0SSuyog Pawar shift = start_pos_x * 2;
681*c83a76b0SSuyog Pawar shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
682*c83a76b0SSuyog Pawar pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
683*c83a76b0SSuyog Pawar }
684*c83a76b0SSuyog Pawar }
685*c83a76b0SSuyog Pawar }
686*c83a76b0SSuyog Pawar
687*c83a76b0SSuyog Pawar if(0 == u4_qp_const_in_ctb)
688*c83a76b0SSuyog Pawar {
689*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
690*c83a76b0SSuyog Pawar {
691*c83a76b0SSuyog Pawar WORD32 row, col;
692*c83a76b0SSuyog Pawar for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
693*c83a76b0SSuyog Pawar {
694*c83a76b0SSuyog Pawar for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
695*c83a76b0SSuyog Pawar {
696*c83a76b0SSuyog Pawar pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
697*c83a76b0SSuyog Pawar }
698*c83a76b0SSuyog Pawar }
699*c83a76b0SSuyog Pawar }
700*c83a76b0SSuyog Pawar }
701*c83a76b0SSuyog Pawar }
702*c83a76b0SSuyog Pawar
703*c83a76b0SSuyog Pawar /* For all PUs in the CTB,
704*c83a76b0SSuyog Pawar For left and top edges, compute BS */
705*c83a76b0SSuyog Pawar
706*c83a76b0SSuyog Pawar cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
707*c83a76b0SSuyog Pawar + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
708*c83a76b0SSuyog Pawar
709*c83a76b0SSuyog Pawar {
710*c83a76b0SSuyog Pawar WORD32 next_ctb_idx;
711*c83a76b0SSuyog Pawar next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
712*c83a76b0SSuyog Pawar i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
713*c83a76b0SSuyog Pawar }
714*c83a76b0SSuyog Pawar
715*c83a76b0SSuyog Pawar for(i = 0; i < i4_pu_cnt; i++)
716*c83a76b0SSuyog Pawar {
717*c83a76b0SSuyog Pawar WORD32 start_pos_x;
718*c83a76b0SSuyog Pawar WORD32 start_pos_y;
719*c83a76b0SSuyog Pawar WORD32 end_pos_x;
720*c83a76b0SSuyog Pawar WORD32 end_pos_y;
721*c83a76b0SSuyog Pawar WORD32 pu_wd, pu_ht;
722*c83a76b0SSuyog Pawar UWORD32 u4_bs;
723*c83a76b0SSuyog Pawar pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
724*c83a76b0SSuyog Pawar pu_t *ps_ngbr_pu;
725*c83a76b0SSuyog Pawar UWORD32 u4_ngbr_pu_indx;
726*c83a76b0SSuyog Pawar
727*c83a76b0SSuyog Pawar start_pos_x = ps_pu->b4_pos_x;
728*c83a76b0SSuyog Pawar start_pos_y = ps_pu->b4_pos_y;
729*c83a76b0SSuyog Pawar
730*c83a76b0SSuyog Pawar pu_wd = (ps_pu->b4_wd + 1);
731*c83a76b0SSuyog Pawar pu_ht = (ps_pu->b4_ht + 1);
732*c83a76b0SSuyog Pawar
733*c83a76b0SSuyog Pawar end_pos_x = start_pos_x + pu_wd;
734*c83a76b0SSuyog Pawar end_pos_y = start_pos_y + pu_ht;
735*c83a76b0SSuyog Pawar
736*c83a76b0SSuyog Pawar /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
737*c83a76b0SSuyog Pawar /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
738*c83a76b0SSuyog Pawar if(ps_pu->b1_intra_flag)
739*c83a76b0SSuyog Pawar {
740*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_10(pu_ht);
741*c83a76b0SSuyog Pawar
742*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
743*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1))
744*c83a76b0SSuyog Pawar {
745*c83a76b0SSuyog Pawar WORD32 shift;
746*c83a76b0SSuyog Pawar shift = start_pos_y * 2;
747*c83a76b0SSuyog Pawar /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
748*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
749*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
750*c83a76b0SSuyog Pawar */
751*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
752*c83a76b0SSuyog Pawar shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
753*c83a76b0SSuyog Pawar pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
754*c83a76b0SSuyog Pawar }
755*c83a76b0SSuyog Pawar
756*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_10(pu_wd);
757*c83a76b0SSuyog Pawar
758*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
759*c83a76b0SSuyog Pawar if(0 == (start_pos_y & 1))
760*c83a76b0SSuyog Pawar {
761*c83a76b0SSuyog Pawar WORD32 shift;
762*c83a76b0SSuyog Pawar shift = start_pos_x * 2;
763*c83a76b0SSuyog Pawar /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
764*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
765*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
766*c83a76b0SSuyog Pawar */
767*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
768*c83a76b0SSuyog Pawar shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
769*c83a76b0SSuyog Pawar pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
770*c83a76b0SSuyog Pawar }
771*c83a76b0SSuyog Pawar }
772*c83a76b0SSuyog Pawar
773*c83a76b0SSuyog Pawar else
774*c83a76b0SSuyog Pawar {
775*c83a76b0SSuyog Pawar /* Vertical edge */
776*c83a76b0SSuyog Pawar /* Process only if the edge is not a frame edge */
777*c83a76b0SSuyog Pawar if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
778*c83a76b0SSuyog Pawar {
779*c83a76b0SSuyog Pawar do
780*c83a76b0SSuyog Pawar {
781*c83a76b0SSuyog Pawar WORD32 pu_ngbr_ht;
782*c83a76b0SSuyog Pawar WORD32 min_pu_ht;
783*c83a76b0SSuyog Pawar WORD32 ngbr_end_pos_y;
784*c83a76b0SSuyog Pawar UWORD32 ngbr_pu_idx_strd;
785*c83a76b0SSuyog Pawar ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
786*c83a76b0SSuyog Pawar u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
787*c83a76b0SSuyog Pawar ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
788*c83a76b0SSuyog Pawar
789*c83a76b0SSuyog Pawar pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
790*c83a76b0SSuyog Pawar ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
791*c83a76b0SSuyog Pawar
792*c83a76b0SSuyog Pawar min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
793*c83a76b0SSuyog Pawar
794*c83a76b0SSuyog Pawar if(ps_ngbr_pu->b1_intra_flag)
795*c83a76b0SSuyog Pawar {
796*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_10(min_pu_ht);
797*c83a76b0SSuyog Pawar
798*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
799*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1))
800*c83a76b0SSuyog Pawar {
801*c83a76b0SSuyog Pawar WORD32 shift;
802*c83a76b0SSuyog Pawar shift = start_pos_y * 2;
803*c83a76b0SSuyog Pawar /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
804*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
805*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
806*c83a76b0SSuyog Pawar */
807*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
808*c83a76b0SSuyog Pawar shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
809*c83a76b0SSuyog Pawar pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
810*c83a76b0SSuyog Pawar }
811*c83a76b0SSuyog Pawar }
812*c83a76b0SSuyog Pawar else
813*c83a76b0SSuyog Pawar {
814*c83a76b0SSuyog Pawar u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
815*c83a76b0SSuyog Pawar if(u4_bs)
816*c83a76b0SSuyog Pawar {
817*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_01(min_pu_ht);
818*c83a76b0SSuyog Pawar if(0 == (start_pos_x & 1))
819*c83a76b0SSuyog Pawar {
820*c83a76b0SSuyog Pawar WORD32 shift;
821*c83a76b0SSuyog Pawar shift = start_pos_y * 2;
822*c83a76b0SSuyog Pawar /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
823*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
824*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
825*c83a76b0SSuyog Pawar */
826*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
827*c83a76b0SSuyog Pawar shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
828*c83a76b0SSuyog Pawar pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
829*c83a76b0SSuyog Pawar }
830*c83a76b0SSuyog Pawar }
831*c83a76b0SSuyog Pawar }
832*c83a76b0SSuyog Pawar
833*c83a76b0SSuyog Pawar pu_ht -= min_pu_ht;
834*c83a76b0SSuyog Pawar start_pos_y += min_pu_ht;
835*c83a76b0SSuyog Pawar }while(pu_ht > 0);
836*c83a76b0SSuyog Pawar
837*c83a76b0SSuyog Pawar /* Reinitialising since the values are updated in the previous loop */
838*c83a76b0SSuyog Pawar pu_ht = ps_pu->b4_ht + 1;
839*c83a76b0SSuyog Pawar start_pos_y = ps_pu->b4_pos_y;
840*c83a76b0SSuyog Pawar }
841*c83a76b0SSuyog Pawar
842*c83a76b0SSuyog Pawar /* Horizontal edge */
843*c83a76b0SSuyog Pawar /* Process only if the edge is not a frame edge */
844*c83a76b0SSuyog Pawar if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
845*c83a76b0SSuyog Pawar {
846*c83a76b0SSuyog Pawar do
847*c83a76b0SSuyog Pawar {
848*c83a76b0SSuyog Pawar WORD32 pu_ngbr_wd;
849*c83a76b0SSuyog Pawar WORD32 min_pu_wd;
850*c83a76b0SSuyog Pawar WORD32 ngbr_end_pos_x;
851*c83a76b0SSuyog Pawar UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
852*c83a76b0SSuyog Pawar u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
853*c83a76b0SSuyog Pawar ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
854*c83a76b0SSuyog Pawar
855*c83a76b0SSuyog Pawar pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
856*c83a76b0SSuyog Pawar ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
857*c83a76b0SSuyog Pawar
858*c83a76b0SSuyog Pawar min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
859*c83a76b0SSuyog Pawar
860*c83a76b0SSuyog Pawar if(ps_ngbr_pu->b1_intra_flag)
861*c83a76b0SSuyog Pawar {
862*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_10(min_pu_wd);
863*c83a76b0SSuyog Pawar
864*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
865*c83a76b0SSuyog Pawar if(0 == (start_pos_y & 1))
866*c83a76b0SSuyog Pawar {
867*c83a76b0SSuyog Pawar WORD32 shift;
868*c83a76b0SSuyog Pawar shift = start_pos_x * 2;
869*c83a76b0SSuyog Pawar /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
870*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
871*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
872*c83a76b0SSuyog Pawar */
873*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
874*c83a76b0SSuyog Pawar shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
875*c83a76b0SSuyog Pawar pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
876*c83a76b0SSuyog Pawar }
877*c83a76b0SSuyog Pawar }
878*c83a76b0SSuyog Pawar else
879*c83a76b0SSuyog Pawar {
880*c83a76b0SSuyog Pawar u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
881*c83a76b0SSuyog Pawar if(u4_bs)
882*c83a76b0SSuyog Pawar {
883*c83a76b0SSuyog Pawar u4_bs = DUP_LSB_01(min_pu_wd);
884*c83a76b0SSuyog Pawar
885*c83a76b0SSuyog Pawar /* Only if the current edge falls on 8 pixel grid set BS */
886*c83a76b0SSuyog Pawar if(0 == (start_pos_y & 1))
887*c83a76b0SSuyog Pawar {
888*c83a76b0SSuyog Pawar WORD32 shift;
889*c83a76b0SSuyog Pawar shift = start_pos_x * 2;
890*c83a76b0SSuyog Pawar /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
891*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
892*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
893*c83a76b0SSuyog Pawar */
894*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
895*c83a76b0SSuyog Pawar shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
896*c83a76b0SSuyog Pawar pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
897*c83a76b0SSuyog Pawar }
898*c83a76b0SSuyog Pawar }
899*c83a76b0SSuyog Pawar }
900*c83a76b0SSuyog Pawar
901*c83a76b0SSuyog Pawar pu_wd -= min_pu_wd;
902*c83a76b0SSuyog Pawar start_pos_x += min_pu_wd;
903*c83a76b0SSuyog Pawar }while(pu_wd > 0);
904*c83a76b0SSuyog Pawar
905*c83a76b0SSuyog Pawar /* Reinitialising since the values are updated in the previous loop */
906*c83a76b0SSuyog Pawar pu_wd = ps_pu->b4_wd + 1;
907*c83a76b0SSuyog Pawar start_pos_x = ps_pu->b4_pos_x;
908*c83a76b0SSuyog Pawar }
909*c83a76b0SSuyog Pawar }
910*c83a76b0SSuyog Pawar }
911*c83a76b0SSuyog Pawar
912*c83a76b0SSuyog Pawar {
913*c83a76b0SSuyog Pawar /* If left neighbor is not available, then set BS for entire first column to zero */
914*c83a76b0SSuyog Pawar UWORD32 ctb_addr;
915*c83a76b0SSuyog Pawar WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
916*c83a76b0SSuyog Pawar
917*c83a76b0SSuyog Pawar if(!ps_pps->i1_tiles_enabled_flag)
918*c83a76b0SSuyog Pawar {
919*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
920*c83a76b0SSuyog Pawar (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
921*c83a76b0SSuyog Pawar (0 == ps_bs_ctxt->i4_ctb_x))
922*c83a76b0SSuyog Pawar {
923*c83a76b0SSuyog Pawar pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
924*c83a76b0SSuyog Pawar }
925*c83a76b0SSuyog Pawar }
926*c83a76b0SSuyog Pawar else
927*c83a76b0SSuyog Pawar {
928*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
929*c83a76b0SSuyog Pawar {
930*c83a76b0SSuyog Pawar pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
931*c83a76b0SSuyog Pawar }
932*c83a76b0SSuyog Pawar else
933*c83a76b0SSuyog Pawar {
934*c83a76b0SSuyog Pawar
935*c83a76b0SSuyog Pawar ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
936*c83a76b0SSuyog Pawar slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
937*c83a76b0SSuyog Pawar
938*c83a76b0SSuyog Pawar if(ps_bs_ctxt->i4_ctb_x)
939*c83a76b0SSuyog Pawar {
940*c83a76b0SSuyog Pawar ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
941*c83a76b0SSuyog Pawar left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
942*c83a76b0SSuyog Pawar }
943*c83a76b0SSuyog Pawar
944*c83a76b0SSuyog Pawar if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
945*c83a76b0SSuyog Pawar {
946*c83a76b0SSuyog Pawar /* Removed reduntant checks */
947*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_slices_enabled_flag && (
948*c83a76b0SSuyog Pawar (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
949*c83a76b0SSuyog Pawar ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
950*c83a76b0SSuyog Pawar {
951*c83a76b0SSuyog Pawar pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
952*c83a76b0SSuyog Pawar }
953*c83a76b0SSuyog Pawar }
954*c83a76b0SSuyog Pawar }
955*c83a76b0SSuyog Pawar }
956*c83a76b0SSuyog Pawar
957*c83a76b0SSuyog Pawar ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
958*c83a76b0SSuyog Pawar slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
959*c83a76b0SSuyog Pawar if(ps_bs_ctxt->i4_ctb_y)
960*c83a76b0SSuyog Pawar {
961*c83a76b0SSuyog Pawar ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
962*c83a76b0SSuyog Pawar top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
963*c83a76b0SSuyog Pawar }
964*c83a76b0SSuyog Pawar /* If top neighbor is not available, then set BS for entire first row to zero */
965*c83a76b0SSuyog Pawar /* Removed reduntant checks */
966*c83a76b0SSuyog Pawar if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
967*c83a76b0SSuyog Pawar || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
968*c83a76b0SSuyog Pawar || (0 == ps_bs_ctxt->i4_ctb_y))
969*c83a76b0SSuyog Pawar {
970*c83a76b0SSuyog Pawar pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
971*c83a76b0SSuyog Pawar }
972*c83a76b0SSuyog Pawar }
973*c83a76b0SSuyog Pawar
974*c83a76b0SSuyog Pawar /**
975*c83a76b0SSuyog Pawar * Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
976*c83a76b0SSuyog Pawar * (They might have set to non zero values because of CBF of the current CTB)*/
977*c83a76b0SSuyog Pawar {
978*c83a76b0SSuyog Pawar WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
979*c83a76b0SSuyog Pawar WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
980*c83a76b0SSuyog Pawar if(num_rows_remaining < (ctb_size >> 3))
981*c83a76b0SSuyog Pawar {
982*c83a76b0SSuyog Pawar /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
983*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
984*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
985*c83a76b0SSuyog Pawar */
986*c83a76b0SSuyog Pawar WORD32 offset;
987*c83a76b0SSuyog Pawar offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
988*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
989*c83a76b0SSuyog Pawar offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
990*c83a76b0SSuyog Pawar
991*c83a76b0SSuyog Pawar memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
992*c83a76b0SSuyog Pawar }
993*c83a76b0SSuyog Pawar
994*c83a76b0SSuyog Pawar if(num_cols_remaining < (ctb_size >> 3))
995*c83a76b0SSuyog Pawar {
996*c83a76b0SSuyog Pawar /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
997*c83a76b0SSuyog Pawar * will reduce to the following assuming ctb size is one of 16, 32 and 64
998*c83a76b0SSuyog Pawar * and deblocking is done on 8x8 grid
999*c83a76b0SSuyog Pawar */
1000*c83a76b0SSuyog Pawar
1001*c83a76b0SSuyog Pawar WORD32 offset;
1002*c83a76b0SSuyog Pawar offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
1003*c83a76b0SSuyog Pawar if(6 != log2_ctb_size)
1004*c83a76b0SSuyog Pawar offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
1005*c83a76b0SSuyog Pawar
1006*c83a76b0SSuyog Pawar memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
1007*c83a76b0SSuyog Pawar }
1008*c83a76b0SSuyog Pawar }
1009*c83a76b0SSuyog Pawar return 0;
1010*c83a76b0SSuyog Pawar }
1011