1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar
21*c83a76b0SSuyog Pawar /**
22*c83a76b0SSuyog Pawar *******************************************************************************
23*c83a76b0SSuyog Pawar * @file
24*c83a76b0SSuyog Pawar * ihevce_inter_pred.c
25*c83a76b0SSuyog Pawar *
26*c83a76b0SSuyog Pawar * @brief
27*c83a76b0SSuyog Pawar * Contains funtions for giving out prediction samples for a given pu
28*c83a76b0SSuyog Pawar *
29*c83a76b0SSuyog Pawar * @author
30*c83a76b0SSuyog Pawar * Ittiam
31*c83a76b0SSuyog Pawar *
32*c83a76b0SSuyog Pawar * @par List of Functions:
33*c83a76b0SSuyog Pawar * - ihevc_inter_pred()
34*c83a76b0SSuyog Pawar *
35*c83a76b0SSuyog Pawar *
36*c83a76b0SSuyog Pawar *******************************************************************************
37*c83a76b0SSuyog Pawar */
38*c83a76b0SSuyog Pawar /* System include files */
39*c83a76b0SSuyog Pawar #include <stdio.h>
40*c83a76b0SSuyog Pawar #include <string.h>
41*c83a76b0SSuyog Pawar #include <stdlib.h>
42*c83a76b0SSuyog Pawar #include <assert.h>
43*c83a76b0SSuyog Pawar #include <stdarg.h>
44*c83a76b0SSuyog Pawar #include <math.h>
45*c83a76b0SSuyog Pawar
46*c83a76b0SSuyog Pawar /* User include files */
47*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
48*c83a76b0SSuyog Pawar #include "itt_video_api.h"
49*c83a76b0SSuyog Pawar #include "ihevce_api.h"
50*c83a76b0SSuyog Pawar
51*c83a76b0SSuyog Pawar #include "rc_cntrl_param.h"
52*c83a76b0SSuyog Pawar #include "rc_frame_info_collector.h"
53*c83a76b0SSuyog Pawar #include "rc_look_ahead_params.h"
54*c83a76b0SSuyog Pawar
55*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
56*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
57*c83a76b0SSuyog Pawar #include "ihevc_structs.h"
58*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
59*c83a76b0SSuyog Pawar #include "ihevc_deblk.h"
60*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
61*c83a76b0SSuyog Pawar #include "ihevc_chroma_itrans_recon.h"
62*c83a76b0SSuyog Pawar #include "ihevc_chroma_intra_pred.h"
63*c83a76b0SSuyog Pawar #include "ihevc_intra_pred.h"
64*c83a76b0SSuyog Pawar #include "ihevc_inter_pred.h"
65*c83a76b0SSuyog Pawar #include "ihevc_mem_fns.h"
66*c83a76b0SSuyog Pawar #include "ihevc_padding.h"
67*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
68*c83a76b0SSuyog Pawar #include "ihevc_sao.h"
69*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
70*c83a76b0SSuyog Pawar #include "ihevc_quant_iquant_ssd.h"
71*c83a76b0SSuyog Pawar #include "ihevc_cabac_tables.h"
72*c83a76b0SSuyog Pawar
73*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
74*c83a76b0SSuyog Pawar #include "ihevce_lap_enc_structs.h"
75*c83a76b0SSuyog Pawar #include "ihevce_multi_thrd_structs.h"
76*c83a76b0SSuyog Pawar #include "ihevce_me_common_defs.h"
77*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
78*c83a76b0SSuyog Pawar #include "ihevce_error_codes.h"
79*c83a76b0SSuyog Pawar #include "ihevce_bitstream.h"
80*c83a76b0SSuyog Pawar #include "ihevce_cabac.h"
81*c83a76b0SSuyog Pawar #include "ihevce_rdoq_macros.h"
82*c83a76b0SSuyog Pawar #include "ihevce_function_selector.h"
83*c83a76b0SSuyog Pawar #include "ihevce_enc_structs.h"
84*c83a76b0SSuyog Pawar #include "ihevce_entropy_structs.h"
85*c83a76b0SSuyog Pawar #include "ihevce_cmn_utils_instr_set_router.h"
86*c83a76b0SSuyog Pawar #include "ihevce_enc_loop_structs.h"
87*c83a76b0SSuyog Pawar #include "ihevce_inter_pred.h"
88*c83a76b0SSuyog Pawar #include "ihevc_weighted_pred.h"
89*c83a76b0SSuyog Pawar
90*c83a76b0SSuyog Pawar /*****************************************************************************/
91*c83a76b0SSuyog Pawar /* Global tables */
92*c83a76b0SSuyog Pawar /*****************************************************************************/
93*c83a76b0SSuyog Pawar
94*c83a76b0SSuyog Pawar /**
95*c83a76b0SSuyog Pawar ******************************************************************************
96*c83a76b0SSuyog Pawar * @brief Table of filter tap coefficients for HEVC luma inter prediction
97*c83a76b0SSuyog Pawar * input : sub pel mv position (dx/dy = 0:3)
98*c83a76b0SSuyog Pawar * output : filter coeffs to be used for that position
99*c83a76b0SSuyog Pawar *
100*c83a76b0SSuyog Pawar * @remarks See section 8.5.2.2.2.1 Luma sample interpolation process of HEVC
101*c83a76b0SSuyog Pawar ******************************************************************************
102*c83a76b0SSuyog Pawar */
103*c83a76b0SSuyog Pawar WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 },
104*c83a76b0SSuyog Pawar { -1, 4, -10, 58, 17, -5, 1, 0 },
105*c83a76b0SSuyog Pawar { -1, 4, -11, 40, 40, -11, 4, -1 },
106*c83a76b0SSuyog Pawar { 0, 1, -5, 17, 58, -10, 4, -1 } };
107*c83a76b0SSuyog Pawar
108*c83a76b0SSuyog Pawar /**
109*c83a76b0SSuyog Pawar ******************************************************************************
110*c83a76b0SSuyog Pawar * @brief Table of filter tap coefficients for HEVC chroma inter prediction
111*c83a76b0SSuyog Pawar * input : chroma sub pel mv position (dx/dy = 0:7)
112*c83a76b0SSuyog Pawar * output : filter coeffs to be used for that position
113*c83a76b0SSuyog Pawar *
114*c83a76b0SSuyog Pawar * @remarks See section 8.5.2.2.2.2 Chroma sample interpolation process of HEVC
115*c83a76b0SSuyog Pawar The filter uses only the first four elements in each array
116*c83a76b0SSuyog Pawar ******************************************************************************
117*c83a76b0SSuyog Pawar */
118*c83a76b0SSuyog Pawar WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA] = { { 0, 64, 0, 0 }, { -2, 58, 10, -2 },
119*c83a76b0SSuyog Pawar { -4, 54, 16, -2 }, { -6, 46, 28, -4 },
120*c83a76b0SSuyog Pawar { -4, 36, 36, -4 }, { -4, 28, 46, -6 },
121*c83a76b0SSuyog Pawar { -2, 16, 54, -4 }, { -2, 10, 58, -2 } };
122*c83a76b0SSuyog Pawar
123*c83a76b0SSuyog Pawar /*****************************************************************************/
124*c83a76b0SSuyog Pawar /* Function Definitions */
125*c83a76b0SSuyog Pawar /*****************************************************************************/
126*c83a76b0SSuyog Pawar
127*c83a76b0SSuyog Pawar /**
128*c83a76b0SSuyog Pawar *******************************************************************************
129*c83a76b0SSuyog Pawar *
130*c83a76b0SSuyog Pawar * @brief
131*c83a76b0SSuyog Pawar * Performs Luma inter pred based on sub pel position dxdy and store the result
132*c83a76b0SSuyog Pawar * in a 16 bit destination buffer
133*c83a76b0SSuyog Pawar *
134*c83a76b0SSuyog Pawar * @param[in] pu1_src
135*c83a76b0SSuyog Pawar * pointer to the source correspoding to integer pel position of a mv (left and
136*c83a76b0SSuyog Pawar * top justified integer position)
137*c83a76b0SSuyog Pawar *
138*c83a76b0SSuyog Pawar * @param[out] pi2_dst
139*c83a76b0SSuyog Pawar * WORD16 pointer to the destination
140*c83a76b0SSuyog Pawar *
141*c83a76b0SSuyog Pawar * @param[in] src_strd
142*c83a76b0SSuyog Pawar * source buffer stride
143*c83a76b0SSuyog Pawar *
144*c83a76b0SSuyog Pawar * @param[in] dst_strd
145*c83a76b0SSuyog Pawar * destination buffer stride
146*c83a76b0SSuyog Pawar *
147*c83a76b0SSuyog Pawar * @param[in] pi2_hdst_scratch
148*c83a76b0SSuyog Pawar * scratch buffer for intermediate storage of horizontal filter output; used as
149*c83a76b0SSuyog Pawar * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
150*c83a76b0SSuyog Pawar *
151*c83a76b0SSuyog Pawar * Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
152*c83a76b0SSuyog Pawar *
153*c83a76b0SSuyog Pawar * @param[in] ht
154*c83a76b0SSuyog Pawar * width of the prediction unit
155*c83a76b0SSuyog Pawar *
156*c83a76b0SSuyog Pawar * @param[in] wd
157*c83a76b0SSuyog Pawar * width of the prediction unit
158*c83a76b0SSuyog Pawar *
159*c83a76b0SSuyog Pawar * @param[in] dx
160*c83a76b0SSuyog Pawar * qpel position[0:3] of mv in x direction
161*c83a76b0SSuyog Pawar *
162*c83a76b0SSuyog Pawar * @param[in] dy
163*c83a76b0SSuyog Pawar * qpel position[0:3] of mv in y direction
164*c83a76b0SSuyog Pawar *
165*c83a76b0SSuyog Pawar * @returns
166*c83a76b0SSuyog Pawar * none
167*c83a76b0SSuyog Pawar *
168*c83a76b0SSuyog Pawar * @remarks
169*c83a76b0SSuyog Pawar *
170*c83a76b0SSuyog Pawar *******************************************************************************
171*c83a76b0SSuyog Pawar */
ihevce_luma_interpolate_16bit_dxdy(UWORD8 * pu1_src,WORD16 * pi2_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)172*c83a76b0SSuyog Pawar void ihevce_luma_interpolate_16bit_dxdy(
173*c83a76b0SSuyog Pawar UWORD8 *pu1_src,
174*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
175*c83a76b0SSuyog Pawar WORD32 src_strd,
176*c83a76b0SSuyog Pawar WORD32 dst_strd,
177*c83a76b0SSuyog Pawar WORD16 *pi2_hdst_scratch,
178*c83a76b0SSuyog Pawar WORD32 ht,
179*c83a76b0SSuyog Pawar WORD32 wd,
180*c83a76b0SSuyog Pawar WORD32 dy,
181*c83a76b0SSuyog Pawar WORD32 dx,
182*c83a76b0SSuyog Pawar func_selector_t *ps_func_selector)
183*c83a76b0SSuyog Pawar {
184*c83a76b0SSuyog Pawar if((0 == dx) && (0 == dy))
185*c83a76b0SSuyog Pawar {
186*c83a76b0SSuyog Pawar /*--------- full pel position : copy input by upscaling-------*/
187*c83a76b0SSuyog Pawar
188*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr(
189*c83a76b0SSuyog Pawar pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
190*c83a76b0SSuyog Pawar }
191*c83a76b0SSuyog Pawar else if((0 != dx) && (0 != dy))
192*c83a76b0SSuyog Pawar {
193*c83a76b0SSuyog Pawar /*----------sub pel in both x and y direction---------*/
194*c83a76b0SSuyog Pawar
195*c83a76b0SSuyog Pawar UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
196*c83a76b0SSuyog Pawar WORD32 hdst_buf_stride = wd;
197*c83a76b0SSuyog Pawar WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
198*c83a76b0SSuyog Pawar
199*c83a76b0SSuyog Pawar /* horizontal filtering of source done in a scratch buffer first */
200*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
201*c83a76b0SSuyog Pawar pu1_horz_src,
202*c83a76b0SSuyog Pawar pi2_hdst_scratch,
203*c83a76b0SSuyog Pawar src_strd,
204*c83a76b0SSuyog Pawar hdst_buf_stride,
205*c83a76b0SSuyog Pawar &gai1_hevc_luma_filter_taps[dx][0],
206*c83a76b0SSuyog Pawar (ht + NTAPS_LUMA - 1),
207*c83a76b0SSuyog Pawar wd);
208*c83a76b0SSuyog Pawar
209*c83a76b0SSuyog Pawar /* vertical filtering on scratch buffer and stored in desitnation */
210*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr(
211*c83a76b0SSuyog Pawar pi2_vert_src,
212*c83a76b0SSuyog Pawar pi2_dst,
213*c83a76b0SSuyog Pawar hdst_buf_stride,
214*c83a76b0SSuyog Pawar dst_strd,
215*c83a76b0SSuyog Pawar &gai1_hevc_luma_filter_taps[dy][0],
216*c83a76b0SSuyog Pawar ht,
217*c83a76b0SSuyog Pawar wd);
218*c83a76b0SSuyog Pawar }
219*c83a76b0SSuyog Pawar else if(0 == dy)
220*c83a76b0SSuyog Pawar {
221*c83a76b0SSuyog Pawar /*----------sub pel in x direction only ---------*/
222*c83a76b0SSuyog Pawar
223*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
224*c83a76b0SSuyog Pawar pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
225*c83a76b0SSuyog Pawar }
226*c83a76b0SSuyog Pawar else /* if (0 == dx) */
227*c83a76b0SSuyog Pawar {
228*c83a76b0SSuyog Pawar /*----------sub pel in y direction only ---------*/
229*c83a76b0SSuyog Pawar
230*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr(
231*c83a76b0SSuyog Pawar pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
232*c83a76b0SSuyog Pawar }
233*c83a76b0SSuyog Pawar }
234*c83a76b0SSuyog Pawar
235*c83a76b0SSuyog Pawar /**
236*c83a76b0SSuyog Pawar *******************************************************************************
237*c83a76b0SSuyog Pawar *
238*c83a76b0SSuyog Pawar * @brief
239*c83a76b0SSuyog Pawar * Performs Luma inter pred based on sub pel position dxdy and store the result
240*c83a76b0SSuyog Pawar * in a 8 bit destination buffer
241*c83a76b0SSuyog Pawar *
242*c83a76b0SSuyog Pawar * @param[in] pu1_src
243*c83a76b0SSuyog Pawar * pointer to the source correspoding to integer pel position of a mv (left and
244*c83a76b0SSuyog Pawar * top justified integer position)
245*c83a76b0SSuyog Pawar *
246*c83a76b0SSuyog Pawar * @param[out] pu1_dst
247*c83a76b0SSuyog Pawar * UWORD8 pointer to the destination
248*c83a76b0SSuyog Pawar *
249*c83a76b0SSuyog Pawar * @param[in] src_strd
250*c83a76b0SSuyog Pawar * source buffer stride
251*c83a76b0SSuyog Pawar *
252*c83a76b0SSuyog Pawar * @param[in] dst_strd
253*c83a76b0SSuyog Pawar * destination buffer stride
254*c83a76b0SSuyog Pawar *
255*c83a76b0SSuyog Pawar * @param[in] pi2_hdst_scratch
256*c83a76b0SSuyog Pawar * scratch buffer for intermediate storage of horizontal filter output; used as
257*c83a76b0SSuyog Pawar * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
258*c83a76b0SSuyog Pawar *
259*c83a76b0SSuyog Pawar * Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
260*c83a76b0SSuyog Pawar *
261*c83a76b0SSuyog Pawar * @param[in] ht
262*c83a76b0SSuyog Pawar * width of the prediction unit
263*c83a76b0SSuyog Pawar *
264*c83a76b0SSuyog Pawar * @param[in] wd
265*c83a76b0SSuyog Pawar * width of the prediction unit
266*c83a76b0SSuyog Pawar *
267*c83a76b0SSuyog Pawar * @param[in] dx
268*c83a76b0SSuyog Pawar * qpel position[0:3] of mv in x direction
269*c83a76b0SSuyog Pawar *
270*c83a76b0SSuyog Pawar * @param[in] dy
271*c83a76b0SSuyog Pawar * qpel position[0:3] of mv in y direction
272*c83a76b0SSuyog Pawar *
273*c83a76b0SSuyog Pawar * @returns
274*c83a76b0SSuyog Pawar * none
275*c83a76b0SSuyog Pawar *
276*c83a76b0SSuyog Pawar * @remarks
277*c83a76b0SSuyog Pawar *
278*c83a76b0SSuyog Pawar *******************************************************************************
279*c83a76b0SSuyog Pawar */
ihevce_luma_interpolate_8bit_dxdy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)280*c83a76b0SSuyog Pawar void ihevce_luma_interpolate_8bit_dxdy(
281*c83a76b0SSuyog Pawar UWORD8 *pu1_src,
282*c83a76b0SSuyog Pawar UWORD8 *pu1_dst,
283*c83a76b0SSuyog Pawar WORD32 src_strd,
284*c83a76b0SSuyog Pawar WORD32 dst_strd,
285*c83a76b0SSuyog Pawar WORD16 *pi2_hdst_scratch,
286*c83a76b0SSuyog Pawar WORD32 ht,
287*c83a76b0SSuyog Pawar WORD32 wd,
288*c83a76b0SSuyog Pawar WORD32 dy,
289*c83a76b0SSuyog Pawar WORD32 dx,
290*c83a76b0SSuyog Pawar func_selector_t *ps_func_selector)
291*c83a76b0SSuyog Pawar {
292*c83a76b0SSuyog Pawar if((0 == dx) && (0 == dy))
293*c83a76b0SSuyog Pawar {
294*c83a76b0SSuyog Pawar /*--------- full pel position : copy input as is -------*/
295*c83a76b0SSuyog Pawar
296*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_copy_fptr(
297*c83a76b0SSuyog Pawar pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
298*c83a76b0SSuyog Pawar }
299*c83a76b0SSuyog Pawar else if((0 != dx) && (0 != dy))
300*c83a76b0SSuyog Pawar {
301*c83a76b0SSuyog Pawar /*----------sub pel in both x and y direction---------*/
302*c83a76b0SSuyog Pawar
303*c83a76b0SSuyog Pawar UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
304*c83a76b0SSuyog Pawar WORD32 hdst_buf_stride = wd;
305*c83a76b0SSuyog Pawar WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
306*c83a76b0SSuyog Pawar
307*c83a76b0SSuyog Pawar /* horizontal filtering of source done in a scratch buffer first */
308*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
309*c83a76b0SSuyog Pawar pu1_horz_src,
310*c83a76b0SSuyog Pawar pi2_hdst_scratch,
311*c83a76b0SSuyog Pawar src_strd,
312*c83a76b0SSuyog Pawar hdst_buf_stride,
313*c83a76b0SSuyog Pawar &gai1_hevc_luma_filter_taps[dx][0],
314*c83a76b0SSuyog Pawar (ht + NTAPS_LUMA - 1),
315*c83a76b0SSuyog Pawar wd);
316*c83a76b0SSuyog Pawar
317*c83a76b0SSuyog Pawar /* vertical filtering on scratch buffer and stored in desitnation */
318*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr(
319*c83a76b0SSuyog Pawar pi2_vert_src,
320*c83a76b0SSuyog Pawar pu1_dst,
321*c83a76b0SSuyog Pawar hdst_buf_stride,
322*c83a76b0SSuyog Pawar dst_strd,
323*c83a76b0SSuyog Pawar &gai1_hevc_luma_filter_taps[dy][0],
324*c83a76b0SSuyog Pawar ht,
325*c83a76b0SSuyog Pawar wd);
326*c83a76b0SSuyog Pawar }
327*c83a76b0SSuyog Pawar else if(0 == dy)
328*c83a76b0SSuyog Pawar {
329*c83a76b0SSuyog Pawar /*----------sub pel in x direction only ---------*/
330*c83a76b0SSuyog Pawar
331*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_horz_fptr(
332*c83a76b0SSuyog Pawar pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
333*c83a76b0SSuyog Pawar }
334*c83a76b0SSuyog Pawar else /* if (0 == dx) */
335*c83a76b0SSuyog Pawar {
336*c83a76b0SSuyog Pawar /*----------sub pel in y direction only ---------*/
337*c83a76b0SSuyog Pawar
338*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_luma_vert_fptr(
339*c83a76b0SSuyog Pawar pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
340*c83a76b0SSuyog Pawar }
341*c83a76b0SSuyog Pawar }
342*c83a76b0SSuyog Pawar
343*c83a76b0SSuyog Pawar /**
344*c83a76b0SSuyog Pawar *******************************************************************************
345*c83a76b0SSuyog Pawar *
346*c83a76b0SSuyog Pawar * @brief
347*c83a76b0SSuyog Pawar * Performs Luma prediction for a inter prediction unit(PU)
348*c83a76b0SSuyog Pawar *
349*c83a76b0SSuyog Pawar * @par Description:
350*c83a76b0SSuyog Pawar * For a given PU, Inter prediction followed by weighted prediction (if
351*c83a76b0SSuyog Pawar * required)
352*c83a76b0SSuyog Pawar *
353*c83a76b0SSuyog Pawar * @param[in] ps_inter_pred_ctxt
354*c83a76b0SSuyog Pawar * context for inter prediction; contains ref list, weight offsets, ctb offsets
355*c83a76b0SSuyog Pawar *
356*c83a76b0SSuyog Pawar * @param[in] ps_pu
357*c83a76b0SSuyog Pawar * pointer to PU structure whose inter prediction needs to be done
358*c83a76b0SSuyog Pawar *
359*c83a76b0SSuyog Pawar * @param[in] pu1_dst_buf
360*c83a76b0SSuyog Pawar * pointer to destination buffer where the inter prediction is done
361*c83a76b0SSuyog Pawar *
362*c83a76b0SSuyog Pawar * @param[in] dst_stride
363*c83a76b0SSuyog Pawar * pitch of the destination buffer
364*c83a76b0SSuyog Pawar *
365*c83a76b0SSuyog Pawar * @returns
366*c83a76b0SSuyog Pawar * IV_FAIL for mvs going outside ref frame padded limits
367*c83a76b0SSuyog Pawar * IV_SUCCESS after completing mc for given inter pu
368*c83a76b0SSuyog Pawar *
369*c83a76b0SSuyog Pawar * @remarks
370*c83a76b0SSuyog Pawar *
371*c83a76b0SSuyog Pawar *******************************************************************************
372*c83a76b0SSuyog Pawar */
ihevce_luma_inter_pred_pu(void * pv_inter_pred_ctxt,pu_t * ps_pu,void * pv_dst_buf,WORD32 dst_stride,WORD32 i4_flag_inter_pred_source)373*c83a76b0SSuyog Pawar IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu(
374*c83a76b0SSuyog Pawar void *pv_inter_pred_ctxt,
375*c83a76b0SSuyog Pawar pu_t *ps_pu,
376*c83a76b0SSuyog Pawar void *pv_dst_buf,
377*c83a76b0SSuyog Pawar WORD32 dst_stride,
378*c83a76b0SSuyog Pawar WORD32 i4_flag_inter_pred_source)
379*c83a76b0SSuyog Pawar {
380*c83a76b0SSuyog Pawar inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
381*c83a76b0SSuyog Pawar func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
382*c83a76b0SSuyog Pawar
383*c83a76b0SSuyog Pawar WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
384*c83a76b0SSuyog Pawar UWORD8 *pu1_dst_buf = (UWORD8 *)pv_dst_buf;
385*c83a76b0SSuyog Pawar WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2;
386*c83a76b0SSuyog Pawar WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2;
387*c83a76b0SSuyog Pawar
388*c83a76b0SSuyog Pawar WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
389*c83a76b0SSuyog Pawar ps_inter_pred_ctxt->i1_weighted_bipred_flag;
390*c83a76b0SSuyog Pawar
391*c83a76b0SSuyog Pawar /* 16bit dest required for interpolate if weighted pred is on or bipred */
392*c83a76b0SSuyog Pawar WORD32 store_16bit_output;
393*c83a76b0SSuyog Pawar
394*c83a76b0SSuyog Pawar recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
395*c83a76b0SSuyog Pawar UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
396*c83a76b0SSuyog Pawar WORD32 ref_pic_stride;
397*c83a76b0SSuyog Pawar
398*c83a76b0SSuyog Pawar /* offset of reference block in integer pel units */
399*c83a76b0SSuyog Pawar WORD32 frm_x_ofst, frm_y_ofst;
400*c83a76b0SSuyog Pawar WORD32 frm_x_pu, frm_y_pu;
401*c83a76b0SSuyog Pawar
402*c83a76b0SSuyog Pawar /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
403*c83a76b0SSuyog Pawar WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
404*c83a76b0SSuyog Pawar WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
405*c83a76b0SSuyog Pawar
406*c83a76b0SSuyog Pawar /* scratch buffer for horizontal interpolation destination */
407*c83a76b0SSuyog Pawar WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
408*c83a76b0SSuyog Pawar
409*c83a76b0SSuyog Pawar WORD32 wgt0, wgt1, off0, off1, shift, lvl_shift0, lvl_shift1;
410*c83a76b0SSuyog Pawar
411*c83a76b0SSuyog Pawar /* get PU's frm x and frm y offset */
412*c83a76b0SSuyog Pawar frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
413*c83a76b0SSuyog Pawar frm_y_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_y + (ps_pu->b4_pos_y << 2);
414*c83a76b0SSuyog Pawar
415*c83a76b0SSuyog Pawar /* sanity checks */
416*c83a76b0SSuyog Pawar ASSERT((wp_flag == 0) || (wp_flag == 1));
417*c83a76b0SSuyog Pawar ASSERT(dst_stride >= pu_wd);
418*c83a76b0SSuyog Pawar ASSERT(ps_pu->b1_intra_flag == 0);
419*c83a76b0SSuyog Pawar
420*c83a76b0SSuyog Pawar lvl_shift0 = 0;
421*c83a76b0SSuyog Pawar lvl_shift1 = 0;
422*c83a76b0SSuyog Pawar
423*c83a76b0SSuyog Pawar if(wp_flag)
424*c83a76b0SSuyog Pawar {
425*c83a76b0SSuyog Pawar UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
426*c83a76b0SSuyog Pawar
427*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L1)
428*c83a76b0SSuyog Pawar {
429*c83a76b0SSuyog Pawar ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
430*c83a76b0SSuyog Pawar u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_luma_weight_enable_flag;
431*c83a76b0SSuyog Pawar }
432*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L0)
433*c83a76b0SSuyog Pawar {
434*c83a76b0SSuyog Pawar ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
435*c83a76b0SSuyog Pawar u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_luma_weight_enable_flag;
436*c83a76b0SSuyog Pawar }
437*c83a76b0SSuyog Pawar if(inter_pred_idc == PRED_BI)
438*c83a76b0SSuyog Pawar {
439*c83a76b0SSuyog Pawar wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
440*c83a76b0SSuyog Pawar }
441*c83a76b0SSuyog Pawar else if(inter_pred_idc == PRED_L0)
442*c83a76b0SSuyog Pawar {
443*c83a76b0SSuyog Pawar wp_flag = u1_is_wgt_pred_L0;
444*c83a76b0SSuyog Pawar }
445*c83a76b0SSuyog Pawar else if(inter_pred_idc == PRED_L1)
446*c83a76b0SSuyog Pawar {
447*c83a76b0SSuyog Pawar wp_flag = u1_is_wgt_pred_L1;
448*c83a76b0SSuyog Pawar }
449*c83a76b0SSuyog Pawar else
450*c83a76b0SSuyog Pawar {
451*c83a76b0SSuyog Pawar /*other values are not allowed*/
452*c83a76b0SSuyog Pawar assert(0);
453*c83a76b0SSuyog Pawar }
454*c83a76b0SSuyog Pawar }
455*c83a76b0SSuyog Pawar store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
456*c83a76b0SSuyog Pawar
457*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L1)
458*c83a76b0SSuyog Pawar {
459*c83a76b0SSuyog Pawar /*****************************************************/
460*c83a76b0SSuyog Pawar /* L0 inter prediction */
461*c83a76b0SSuyog Pawar /*****************************************************/
462*c83a76b0SSuyog Pawar
463*c83a76b0SSuyog Pawar /* motion vecs in qpel precision */
464*c83a76b0SSuyog Pawar WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
465*c83a76b0SSuyog Pawar WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
466*c83a76b0SSuyog Pawar
467*c83a76b0SSuyog Pawar /* sub pel offsets in x and y direction w.r.t integer pel */
468*c83a76b0SSuyog Pawar WORD32 dx = mv_x & 0x3;
469*c83a76b0SSuyog Pawar WORD32 dy = mv_y & 0x3;
470*c83a76b0SSuyog Pawar
471*c83a76b0SSuyog Pawar /* ref idx is currently stored in the lower 4bits */
472*c83a76b0SSuyog Pawar WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
473*c83a76b0SSuyog Pawar
474*c83a76b0SSuyog Pawar /* x and y integer offsets w.r.t frame start */
475*c83a76b0SSuyog Pawar frm_x_ofst = (frm_x_pu + (mv_x >> 2));
476*c83a76b0SSuyog Pawar frm_y_ofst = (frm_y_pu + (mv_y >> 2));
477*c83a76b0SSuyog Pawar
478*c83a76b0SSuyog Pawar ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
479*c83a76b0SSuyog Pawar
480*c83a76b0SSuyog Pawar /* picture buffer start and stride */
481*c83a76b0SSuyog Pawar if(i4_flag_inter_pred_source == 1)
482*c83a76b0SSuyog Pawar {
483*c83a76b0SSuyog Pawar pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc_src.pv_y_buf;
484*c83a76b0SSuyog Pawar }
485*c83a76b0SSuyog Pawar else
486*c83a76b0SSuyog Pawar {
487*c83a76b0SSuyog Pawar pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_y_buf;
488*c83a76b0SSuyog Pawar }
489*c83a76b0SSuyog Pawar ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_strd;
490*c83a76b0SSuyog Pawar
491*c83a76b0SSuyog Pawar /* Error check for mvs going out of ref frame padded limits */
492*c83a76b0SSuyog Pawar {
493*c83a76b0SSuyog Pawar WORD32 min_x, max_x = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_wd;
494*c83a76b0SSuyog Pawar WORD32 min_y, max_y = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_ht;
495*c83a76b0SSuyog Pawar
496*c83a76b0SSuyog Pawar min_x =
497*c83a76b0SSuyog Pawar -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
498*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
499*c83a76b0SSuyog Pawar : (PAD_HORZ - 4));
500*c83a76b0SSuyog Pawar
501*c83a76b0SSuyog Pawar max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
502*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
503*c83a76b0SSuyog Pawar : (PAD_HORZ - 4);
504*c83a76b0SSuyog Pawar
505*c83a76b0SSuyog Pawar min_y =
506*c83a76b0SSuyog Pawar -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
507*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
508*c83a76b0SSuyog Pawar : (PAD_VERT - 4));
509*c83a76b0SSuyog Pawar
510*c83a76b0SSuyog Pawar max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
511*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
512*c83a76b0SSuyog Pawar : (PAD_VERT - 4);
513*c83a76b0SSuyog Pawar
514*c83a76b0SSuyog Pawar if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
515*c83a76b0SSuyog Pawar //ASSERT(0);
516*c83a76b0SSuyog Pawar return (IV_FAIL);
517*c83a76b0SSuyog Pawar
518*c83a76b0SSuyog Pawar if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
519*c83a76b0SSuyog Pawar //ASSERT(0);
520*c83a76b0SSuyog Pawar return (IV_FAIL);
521*c83a76b0SSuyog Pawar }
522*c83a76b0SSuyog Pawar
523*c83a76b0SSuyog Pawar /* point to reference start location in ref frame */
524*c83a76b0SSuyog Pawar /* Assuming clipping of mv is not required here as ME would */
525*c83a76b0SSuyog Pawar /* take care of mv access not going beyond padded data */
526*c83a76b0SSuyog Pawar pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
527*c83a76b0SSuyog Pawar
528*c83a76b0SSuyog Pawar /* level shifted for subpel with both x and y componenet being non 0 */
529*c83a76b0SSuyog Pawar /* this is because the interpolate function subtract this to contain */
530*c83a76b0SSuyog Pawar /* the resulting data in 16 bits */
531*c83a76b0SSuyog Pawar lvl_shift0 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
532*c83a76b0SSuyog Pawar
533*c83a76b0SSuyog Pawar if(store_16bit_output)
534*c83a76b0SSuyog Pawar {
535*c83a76b0SSuyog Pawar /* do interpolation in 16bit L0 scratch buffer */
536*c83a76b0SSuyog Pawar ihevce_luma_interpolate_16bit_dxdy(
537*c83a76b0SSuyog Pawar pu1_ref_int_pel,
538*c83a76b0SSuyog Pawar pi2_scr_buf_l0,
539*c83a76b0SSuyog Pawar ref_pic_stride,
540*c83a76b0SSuyog Pawar pu_wd,
541*c83a76b0SSuyog Pawar pi2_horz_scratch,
542*c83a76b0SSuyog Pawar pu_ht,
543*c83a76b0SSuyog Pawar pu_wd,
544*c83a76b0SSuyog Pawar dy,
545*c83a76b0SSuyog Pawar dx,
546*c83a76b0SSuyog Pawar ps_func_selector);
547*c83a76b0SSuyog Pawar }
548*c83a76b0SSuyog Pawar else
549*c83a76b0SSuyog Pawar {
550*c83a76b0SSuyog Pawar /* do interpolation in 8bit destination buffer and return */
551*c83a76b0SSuyog Pawar ihevce_luma_interpolate_8bit_dxdy(
552*c83a76b0SSuyog Pawar pu1_ref_int_pel,
553*c83a76b0SSuyog Pawar pu1_dst_buf,
554*c83a76b0SSuyog Pawar ref_pic_stride,
555*c83a76b0SSuyog Pawar dst_stride,
556*c83a76b0SSuyog Pawar pi2_horz_scratch,
557*c83a76b0SSuyog Pawar pu_ht,
558*c83a76b0SSuyog Pawar pu_wd,
559*c83a76b0SSuyog Pawar dy,
560*c83a76b0SSuyog Pawar dx,
561*c83a76b0SSuyog Pawar ps_func_selector);
562*c83a76b0SSuyog Pawar
563*c83a76b0SSuyog Pawar return (IV_SUCCESS);
564*c83a76b0SSuyog Pawar }
565*c83a76b0SSuyog Pawar }
566*c83a76b0SSuyog Pawar
567*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L0)
568*c83a76b0SSuyog Pawar {
569*c83a76b0SSuyog Pawar /*****************************************************/
570*c83a76b0SSuyog Pawar /* L1 inter prediction */
571*c83a76b0SSuyog Pawar /*****************************************************/
572*c83a76b0SSuyog Pawar
573*c83a76b0SSuyog Pawar /* motion vecs in qpel precision */
574*c83a76b0SSuyog Pawar WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
575*c83a76b0SSuyog Pawar WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
576*c83a76b0SSuyog Pawar
577*c83a76b0SSuyog Pawar /* sub pel offsets in x and y direction w.r.t integer pel */
578*c83a76b0SSuyog Pawar WORD32 dx = mv_x & 0x3;
579*c83a76b0SSuyog Pawar WORD32 dy = mv_y & 0x3;
580*c83a76b0SSuyog Pawar
581*c83a76b0SSuyog Pawar /* ref idx is currently stored in the lower 4bits */
582*c83a76b0SSuyog Pawar WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
583*c83a76b0SSuyog Pawar
584*c83a76b0SSuyog Pawar /* x and y integer offsets w.r.t frame start */
585*c83a76b0SSuyog Pawar frm_x_ofst = (frm_x_pu + (mv_x >> 2));
586*c83a76b0SSuyog Pawar frm_y_ofst = (frm_y_pu + (mv_y >> 2));
587*c83a76b0SSuyog Pawar
588*c83a76b0SSuyog Pawar ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
589*c83a76b0SSuyog Pawar
590*c83a76b0SSuyog Pawar /* picture buffer start and stride */
591*c83a76b0SSuyog Pawar
592*c83a76b0SSuyog Pawar if(i4_flag_inter_pred_source == 1)
593*c83a76b0SSuyog Pawar {
594*c83a76b0SSuyog Pawar pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc_src.pv_y_buf;
595*c83a76b0SSuyog Pawar }
596*c83a76b0SSuyog Pawar else
597*c83a76b0SSuyog Pawar {
598*c83a76b0SSuyog Pawar pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_y_buf;
599*c83a76b0SSuyog Pawar }
600*c83a76b0SSuyog Pawar ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_strd;
601*c83a76b0SSuyog Pawar
602*c83a76b0SSuyog Pawar /* Error check for mvs going out of ref frame padded limits */
603*c83a76b0SSuyog Pawar {
604*c83a76b0SSuyog Pawar WORD32 min_x, max_x = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_wd;
605*c83a76b0SSuyog Pawar WORD32 min_y, max_y = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_ht;
606*c83a76b0SSuyog Pawar
607*c83a76b0SSuyog Pawar min_x =
608*c83a76b0SSuyog Pawar -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
609*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
610*c83a76b0SSuyog Pawar : (PAD_HORZ - 4));
611*c83a76b0SSuyog Pawar
612*c83a76b0SSuyog Pawar max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
613*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
614*c83a76b0SSuyog Pawar : (PAD_HORZ - 4);
615*c83a76b0SSuyog Pawar
616*c83a76b0SSuyog Pawar min_y =
617*c83a76b0SSuyog Pawar -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
618*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
619*c83a76b0SSuyog Pawar : (PAD_VERT - 4));
620*c83a76b0SSuyog Pawar
621*c83a76b0SSuyog Pawar max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
622*c83a76b0SSuyog Pawar ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
623*c83a76b0SSuyog Pawar : (PAD_VERT - 4);
624*c83a76b0SSuyog Pawar
625*c83a76b0SSuyog Pawar if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
626*c83a76b0SSuyog Pawar //ASSERT(0);
627*c83a76b0SSuyog Pawar return (IV_FAIL);
628*c83a76b0SSuyog Pawar
629*c83a76b0SSuyog Pawar if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
630*c83a76b0SSuyog Pawar //ASSERT(0);
631*c83a76b0SSuyog Pawar return (IV_FAIL);
632*c83a76b0SSuyog Pawar }
633*c83a76b0SSuyog Pawar
634*c83a76b0SSuyog Pawar /* point to reference start location in ref frame */
635*c83a76b0SSuyog Pawar /* Assuming clipping of mv is not required here as ME would */
636*c83a76b0SSuyog Pawar /* take care of mv access not going beyond padded data */
637*c83a76b0SSuyog Pawar pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
638*c83a76b0SSuyog Pawar
639*c83a76b0SSuyog Pawar /* level shifted for subpel with both x and y componenet being non 0 */
640*c83a76b0SSuyog Pawar /* this is because the interpolate function subtract this to contain */
641*c83a76b0SSuyog Pawar /* the resulting data in 16 bits */
642*c83a76b0SSuyog Pawar lvl_shift1 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
643*c83a76b0SSuyog Pawar
644*c83a76b0SSuyog Pawar if(store_16bit_output)
645*c83a76b0SSuyog Pawar {
646*c83a76b0SSuyog Pawar /* do interpolation in 16bit L1 scratch buffer */
647*c83a76b0SSuyog Pawar ihevce_luma_interpolate_16bit_dxdy(
648*c83a76b0SSuyog Pawar pu1_ref_int_pel,
649*c83a76b0SSuyog Pawar pi2_scr_buf_l1,
650*c83a76b0SSuyog Pawar ref_pic_stride,
651*c83a76b0SSuyog Pawar pu_wd,
652*c83a76b0SSuyog Pawar pi2_horz_scratch,
653*c83a76b0SSuyog Pawar pu_ht,
654*c83a76b0SSuyog Pawar pu_wd,
655*c83a76b0SSuyog Pawar dy,
656*c83a76b0SSuyog Pawar dx,
657*c83a76b0SSuyog Pawar ps_func_selector);
658*c83a76b0SSuyog Pawar }
659*c83a76b0SSuyog Pawar else
660*c83a76b0SSuyog Pawar {
661*c83a76b0SSuyog Pawar /* do interpolation in 8bit destination buffer and return */
662*c83a76b0SSuyog Pawar ihevce_luma_interpolate_8bit_dxdy(
663*c83a76b0SSuyog Pawar pu1_ref_int_pel,
664*c83a76b0SSuyog Pawar pu1_dst_buf,
665*c83a76b0SSuyog Pawar ref_pic_stride,
666*c83a76b0SSuyog Pawar dst_stride,
667*c83a76b0SSuyog Pawar pi2_horz_scratch,
668*c83a76b0SSuyog Pawar pu_ht,
669*c83a76b0SSuyog Pawar pu_wd,
670*c83a76b0SSuyog Pawar dy,
671*c83a76b0SSuyog Pawar dx,
672*c83a76b0SSuyog Pawar ps_func_selector);
673*c83a76b0SSuyog Pawar
674*c83a76b0SSuyog Pawar return (IV_SUCCESS);
675*c83a76b0SSuyog Pawar }
676*c83a76b0SSuyog Pawar }
677*c83a76b0SSuyog Pawar
678*c83a76b0SSuyog Pawar if((inter_pred_idc != PRED_BI) && wp_flag)
679*c83a76b0SSuyog Pawar {
680*c83a76b0SSuyog Pawar /*****************************************************/
681*c83a76b0SSuyog Pawar /* unidirection weighted prediction */
682*c83a76b0SSuyog Pawar /*****************************************************/
683*c83a76b0SSuyog Pawar ihevce_wght_offst_t *ps_weight_offset;
684*c83a76b0SSuyog Pawar WORD16 *pi2_src;
685*c83a76b0SSuyog Pawar WORD32 lvl_shift;
686*c83a76b0SSuyog Pawar
687*c83a76b0SSuyog Pawar /* intialize the weight, offsets and ref based on l0/l1 mode */
688*c83a76b0SSuyog Pawar if(inter_pred_idc == PRED_L0)
689*c83a76b0SSuyog Pawar {
690*c83a76b0SSuyog Pawar pi2_src = pi2_scr_buf_l0;
691*c83a76b0SSuyog Pawar ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
692*c83a76b0SSuyog Pawar lvl_shift = lvl_shift0;
693*c83a76b0SSuyog Pawar }
694*c83a76b0SSuyog Pawar else
695*c83a76b0SSuyog Pawar {
696*c83a76b0SSuyog Pawar pi2_src = pi2_scr_buf_l1;
697*c83a76b0SSuyog Pawar ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
698*c83a76b0SSuyog Pawar lvl_shift = lvl_shift1;
699*c83a76b0SSuyog Pawar }
700*c83a76b0SSuyog Pawar
701*c83a76b0SSuyog Pawar wgt0 = ps_weight_offset->i2_luma_weight;
702*c83a76b0SSuyog Pawar off0 = ps_weight_offset->i2_luma_offset;
703*c83a76b0SSuyog Pawar shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
704*c83a76b0SSuyog Pawar
705*c83a76b0SSuyog Pawar /* do the uni directional weighted prediction */
706*c83a76b0SSuyog Pawar ps_func_selector->ihevc_weighted_pred_uni_fptr(
707*c83a76b0SSuyog Pawar pi2_src, pu1_dst_buf, pu_wd, dst_stride, wgt0, off0, shift, lvl_shift, pu_ht, pu_wd);
708*c83a76b0SSuyog Pawar }
709*c83a76b0SSuyog Pawar else
710*c83a76b0SSuyog Pawar {
711*c83a76b0SSuyog Pawar /*****************************************************/
712*c83a76b0SSuyog Pawar /* Bipred prediction */
713*c83a76b0SSuyog Pawar /*****************************************************/
714*c83a76b0SSuyog Pawar
715*c83a76b0SSuyog Pawar if(wp_flag)
716*c83a76b0SSuyog Pawar {
717*c83a76b0SSuyog Pawar /*****************************************************/
718*c83a76b0SSuyog Pawar /* Bi pred weighted prediction */
719*c83a76b0SSuyog Pawar /*****************************************************/
720*c83a76b0SSuyog Pawar wgt0 = ps_ref_pic_l0->s_weight_offset.i2_luma_weight;
721*c83a76b0SSuyog Pawar off0 = ps_ref_pic_l0->s_weight_offset.i2_luma_offset;
722*c83a76b0SSuyog Pawar
723*c83a76b0SSuyog Pawar wgt1 = ps_ref_pic_l1->s_weight_offset.i2_luma_weight;
724*c83a76b0SSuyog Pawar off1 = ps_ref_pic_l1->s_weight_offset.i2_luma_offset;
725*c83a76b0SSuyog Pawar
726*c83a76b0SSuyog Pawar shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
727*c83a76b0SSuyog Pawar
728*c83a76b0SSuyog Pawar ps_func_selector->ihevc_weighted_pred_bi_fptr(
729*c83a76b0SSuyog Pawar pi2_scr_buf_l0,
730*c83a76b0SSuyog Pawar pi2_scr_buf_l1,
731*c83a76b0SSuyog Pawar pu1_dst_buf,
732*c83a76b0SSuyog Pawar pu_wd,
733*c83a76b0SSuyog Pawar pu_wd,
734*c83a76b0SSuyog Pawar dst_stride,
735*c83a76b0SSuyog Pawar wgt0,
736*c83a76b0SSuyog Pawar off0,
737*c83a76b0SSuyog Pawar wgt1,
738*c83a76b0SSuyog Pawar off1,
739*c83a76b0SSuyog Pawar shift,
740*c83a76b0SSuyog Pawar lvl_shift0,
741*c83a76b0SSuyog Pawar lvl_shift1,
742*c83a76b0SSuyog Pawar pu_ht,
743*c83a76b0SSuyog Pawar pu_wd);
744*c83a76b0SSuyog Pawar }
745*c83a76b0SSuyog Pawar else
746*c83a76b0SSuyog Pawar {
747*c83a76b0SSuyog Pawar /*****************************************************/
748*c83a76b0SSuyog Pawar /* Default Bi pred prediction */
749*c83a76b0SSuyog Pawar /*****************************************************/
750*c83a76b0SSuyog Pawar ps_func_selector->ihevc_weighted_pred_bi_default_fptr(
751*c83a76b0SSuyog Pawar pi2_scr_buf_l0,
752*c83a76b0SSuyog Pawar pi2_scr_buf_l1,
753*c83a76b0SSuyog Pawar pu1_dst_buf,
754*c83a76b0SSuyog Pawar pu_wd,
755*c83a76b0SSuyog Pawar pu_wd,
756*c83a76b0SSuyog Pawar dst_stride,
757*c83a76b0SSuyog Pawar lvl_shift0,
758*c83a76b0SSuyog Pawar lvl_shift1,
759*c83a76b0SSuyog Pawar pu_ht,
760*c83a76b0SSuyog Pawar pu_wd);
761*c83a76b0SSuyog Pawar }
762*c83a76b0SSuyog Pawar }
763*c83a76b0SSuyog Pawar
764*c83a76b0SSuyog Pawar return (IV_SUCCESS);
765*c83a76b0SSuyog Pawar }
766*c83a76b0SSuyog Pawar
767*c83a76b0SSuyog Pawar /**
768*c83a76b0SSuyog Pawar *******************************************************************************
769*c83a76b0SSuyog Pawar *
770*c83a76b0SSuyog Pawar * @brief
771*c83a76b0SSuyog Pawar * Performs Chroma inter pred based on sub pel position dxdy and store the
772*c83a76b0SSuyog Pawar * result in a 16 bit destination buffer
773*c83a76b0SSuyog Pawar *
774*c83a76b0SSuyog Pawar * @param[in] pu1_src
775*c83a76b0SSuyog Pawar * pointer to the source correspoding to integer pel position of a mv (left and
776*c83a76b0SSuyog Pawar * top justified integer position)
777*c83a76b0SSuyog Pawar *
778*c83a76b0SSuyog Pawar * @param[out] pi2_dst
779*c83a76b0SSuyog Pawar * WORD16 pointer to the destination
780*c83a76b0SSuyog Pawar *
781*c83a76b0SSuyog Pawar * @param[in] src_strd
782*c83a76b0SSuyog Pawar * source buffer stride
783*c83a76b0SSuyog Pawar *
784*c83a76b0SSuyog Pawar * @param[in] dst_strd
785*c83a76b0SSuyog Pawar * destination buffer stride
786*c83a76b0SSuyog Pawar *
787*c83a76b0SSuyog Pawar * @param[in] pi2_hdst_scratch
788*c83a76b0SSuyog Pawar * scratch buffer for intermediate storage of horizontal filter output; used as
789*c83a76b0SSuyog Pawar * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
790*c83a76b0SSuyog Pawar *
791*c83a76b0SSuyog Pawar * Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
792*c83a76b0SSuyog Pawar *
793*c83a76b0SSuyog Pawar * @param[in] ht
794*c83a76b0SSuyog Pawar * width of the prediction unit
795*c83a76b0SSuyog Pawar *
796*c83a76b0SSuyog Pawar * @param[in] wd
797*c83a76b0SSuyog Pawar * width of the prediction unit
798*c83a76b0SSuyog Pawar *
799*c83a76b0SSuyog Pawar * @param[in] dx
800*c83a76b0SSuyog Pawar * 1/8th pel position[0:7] of mv in x direction
801*c83a76b0SSuyog Pawar *
802*c83a76b0SSuyog Pawar * @param[in] dy
803*c83a76b0SSuyog Pawar * 1/8th pel position[0:7] of mv in y direction
804*c83a76b0SSuyog Pawar *
805*c83a76b0SSuyog Pawar * @returns
806*c83a76b0SSuyog Pawar * none
807*c83a76b0SSuyog Pawar *
808*c83a76b0SSuyog Pawar * @remarks
809*c83a76b0SSuyog Pawar *
810*c83a76b0SSuyog Pawar *******************************************************************************
811*c83a76b0SSuyog Pawar */
ihevce_chroma_interpolate_16bit_dxdy(UWORD8 * pu1_src,WORD16 * pi2_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)812*c83a76b0SSuyog Pawar void ihevce_chroma_interpolate_16bit_dxdy(
813*c83a76b0SSuyog Pawar UWORD8 *pu1_src,
814*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
815*c83a76b0SSuyog Pawar WORD32 src_strd,
816*c83a76b0SSuyog Pawar WORD32 dst_strd,
817*c83a76b0SSuyog Pawar WORD16 *pi2_hdst_scratch,
818*c83a76b0SSuyog Pawar WORD32 ht,
819*c83a76b0SSuyog Pawar WORD32 wd,
820*c83a76b0SSuyog Pawar WORD32 dy,
821*c83a76b0SSuyog Pawar WORD32 dx,
822*c83a76b0SSuyog Pawar func_selector_t *ps_func_selector)
823*c83a76b0SSuyog Pawar {
824*c83a76b0SSuyog Pawar if((0 == dx) && (0 == dy))
825*c83a76b0SSuyog Pawar {
826*c83a76b0SSuyog Pawar /*--------- full pel position : copy input by upscaling-------*/
827*c83a76b0SSuyog Pawar
828*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr(
829*c83a76b0SSuyog Pawar pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
830*c83a76b0SSuyog Pawar }
831*c83a76b0SSuyog Pawar else if((0 != dx) && (0 != dy))
832*c83a76b0SSuyog Pawar {
833*c83a76b0SSuyog Pawar /*----------sub pel in both x and y direction---------*/
834*c83a76b0SSuyog Pawar
835*c83a76b0SSuyog Pawar UWORD8 *pu1_horz_src = pu1_src - src_strd;
836*c83a76b0SSuyog Pawar WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
837*c83a76b0SSuyog Pawar WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
838*c83a76b0SSuyog Pawar
839*c83a76b0SSuyog Pawar /* horizontal filtering of source done in a scratch buffer first */
840*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
841*c83a76b0SSuyog Pawar pu1_horz_src,
842*c83a76b0SSuyog Pawar pi2_hdst_scratch,
843*c83a76b0SSuyog Pawar src_strd,
844*c83a76b0SSuyog Pawar hdst_buf_stride,
845*c83a76b0SSuyog Pawar &gai1_hevc_chroma_filter_taps[dx][0],
846*c83a76b0SSuyog Pawar (ht + NTAPS_CHROMA - 1),
847*c83a76b0SSuyog Pawar wd);
848*c83a76b0SSuyog Pawar
849*c83a76b0SSuyog Pawar /* vertical filtering on scratch buffer and stored in desitnation */
850*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr(
851*c83a76b0SSuyog Pawar pi2_vert_src,
852*c83a76b0SSuyog Pawar pi2_dst,
853*c83a76b0SSuyog Pawar hdst_buf_stride,
854*c83a76b0SSuyog Pawar dst_strd,
855*c83a76b0SSuyog Pawar &gai1_hevc_chroma_filter_taps[dy][0],
856*c83a76b0SSuyog Pawar ht,
857*c83a76b0SSuyog Pawar wd);
858*c83a76b0SSuyog Pawar }
859*c83a76b0SSuyog Pawar else if(0 == dy)
860*c83a76b0SSuyog Pawar {
861*c83a76b0SSuyog Pawar /*----------sub pel in x direction only ---------*/
862*c83a76b0SSuyog Pawar
863*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
864*c83a76b0SSuyog Pawar pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
865*c83a76b0SSuyog Pawar }
866*c83a76b0SSuyog Pawar else /* if (0 == dx) */
867*c83a76b0SSuyog Pawar {
868*c83a76b0SSuyog Pawar /*----------sub pel in y direction only ---------*/
869*c83a76b0SSuyog Pawar
870*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr(
871*c83a76b0SSuyog Pawar pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
872*c83a76b0SSuyog Pawar }
873*c83a76b0SSuyog Pawar }
874*c83a76b0SSuyog Pawar
875*c83a76b0SSuyog Pawar /**
876*c83a76b0SSuyog Pawar *******************************************************************************
877*c83a76b0SSuyog Pawar *
878*c83a76b0SSuyog Pawar * @brief
879*c83a76b0SSuyog Pawar * Performs Chroma inter pred based on sub pel position dxdy and store the
880*c83a76b0SSuyog Pawar * result in a 8 bit destination buffer
881*c83a76b0SSuyog Pawar *
882*c83a76b0SSuyog Pawar * @param[in] pu1_src
883*c83a76b0SSuyog Pawar * pointer to the source correspoding to integer pel position of a mv (left and
884*c83a76b0SSuyog Pawar * top justified integer position)
885*c83a76b0SSuyog Pawar *
886*c83a76b0SSuyog Pawar * @param[out] pu1_dst
887*c83a76b0SSuyog Pawar * UWORD8 pointer to the destination
888*c83a76b0SSuyog Pawar *
889*c83a76b0SSuyog Pawar * @param[in] src_strd
890*c83a76b0SSuyog Pawar * source buffer stride
891*c83a76b0SSuyog Pawar *
892*c83a76b0SSuyog Pawar * @param[in] dst_strd
893*c83a76b0SSuyog Pawar * destination buffer stride
894*c83a76b0SSuyog Pawar *
895*c83a76b0SSuyog Pawar * @param[in] pi2_hdst_scratch
896*c83a76b0SSuyog Pawar * scratch buffer for intermediate storage of horizontal filter output; used as
897*c83a76b0SSuyog Pawar * input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
898*c83a76b0SSuyog Pawar *
899*c83a76b0SSuyog Pawar * Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
900*c83a76b0SSuyog Pawar *
901*c83a76b0SSuyog Pawar * @param[in] ht
902*c83a76b0SSuyog Pawar * width of the prediction unit
903*c83a76b0SSuyog Pawar *
904*c83a76b0SSuyog Pawar * @param[in] wd
905*c83a76b0SSuyog Pawar * width of the prediction unit
906*c83a76b0SSuyog Pawar *
907*c83a76b0SSuyog Pawar * @param[in] dx
908*c83a76b0SSuyog Pawar * 1/8th pel position[0:7] of mv in x direction
909*c83a76b0SSuyog Pawar *
910*c83a76b0SSuyog Pawar * @param[in] dy
911*c83a76b0SSuyog Pawar * 1/8th pel position[0:7] of mv in y direction
912*c83a76b0SSuyog Pawar *
913*c83a76b0SSuyog Pawar * @returns
914*c83a76b0SSuyog Pawar * none
915*c83a76b0SSuyog Pawar *
916*c83a76b0SSuyog Pawar * @remarks
917*c83a76b0SSuyog Pawar *
918*c83a76b0SSuyog Pawar *******************************************************************************
919*c83a76b0SSuyog Pawar */
ihevce_chroma_interpolate_8bit_dxdy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)920*c83a76b0SSuyog Pawar void ihevce_chroma_interpolate_8bit_dxdy(
921*c83a76b0SSuyog Pawar UWORD8 *pu1_src,
922*c83a76b0SSuyog Pawar UWORD8 *pu1_dst,
923*c83a76b0SSuyog Pawar WORD32 src_strd,
924*c83a76b0SSuyog Pawar WORD32 dst_strd,
925*c83a76b0SSuyog Pawar WORD16 *pi2_hdst_scratch,
926*c83a76b0SSuyog Pawar WORD32 ht,
927*c83a76b0SSuyog Pawar WORD32 wd,
928*c83a76b0SSuyog Pawar WORD32 dy,
929*c83a76b0SSuyog Pawar WORD32 dx,
930*c83a76b0SSuyog Pawar func_selector_t *ps_func_selector)
931*c83a76b0SSuyog Pawar {
932*c83a76b0SSuyog Pawar if((0 == dx) && (0 == dy))
933*c83a76b0SSuyog Pawar {
934*c83a76b0SSuyog Pawar /*--------- full pel position : copy input as is -------*/
935*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_copy_fptr(
936*c83a76b0SSuyog Pawar pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
937*c83a76b0SSuyog Pawar }
938*c83a76b0SSuyog Pawar else if((0 != dx) && (0 != dy))
939*c83a76b0SSuyog Pawar {
940*c83a76b0SSuyog Pawar /*----------sub pel in both x and y direction---------*/
941*c83a76b0SSuyog Pawar UWORD8 *pu1_horz_src = pu1_src - src_strd;
942*c83a76b0SSuyog Pawar WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
943*c83a76b0SSuyog Pawar WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
944*c83a76b0SSuyog Pawar
945*c83a76b0SSuyog Pawar /* horizontal filtering of source done in a scratch buffer first */
946*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
947*c83a76b0SSuyog Pawar pu1_horz_src,
948*c83a76b0SSuyog Pawar pi2_hdst_scratch,
949*c83a76b0SSuyog Pawar src_strd,
950*c83a76b0SSuyog Pawar hdst_buf_stride,
951*c83a76b0SSuyog Pawar &gai1_hevc_chroma_filter_taps[dx][0],
952*c83a76b0SSuyog Pawar (ht + NTAPS_CHROMA - 1),
953*c83a76b0SSuyog Pawar wd);
954*c83a76b0SSuyog Pawar
955*c83a76b0SSuyog Pawar /* vertical filtering on scratch buffer and stored in desitnation */
956*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr(
957*c83a76b0SSuyog Pawar pi2_vert_src,
958*c83a76b0SSuyog Pawar pu1_dst,
959*c83a76b0SSuyog Pawar hdst_buf_stride,
960*c83a76b0SSuyog Pawar dst_strd,
961*c83a76b0SSuyog Pawar &gai1_hevc_chroma_filter_taps[dy][0],
962*c83a76b0SSuyog Pawar ht,
963*c83a76b0SSuyog Pawar wd);
964*c83a76b0SSuyog Pawar }
965*c83a76b0SSuyog Pawar else if(0 == dy)
966*c83a76b0SSuyog Pawar {
967*c83a76b0SSuyog Pawar /*----------sub pel in x direction only ---------*/
968*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_horz_fptr(
969*c83a76b0SSuyog Pawar pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
970*c83a76b0SSuyog Pawar }
971*c83a76b0SSuyog Pawar else /* if (0 == dx) */
972*c83a76b0SSuyog Pawar {
973*c83a76b0SSuyog Pawar /*----------sub pel in y direction only ---------*/
974*c83a76b0SSuyog Pawar ps_func_selector->ihevc_inter_pred_chroma_vert_fptr(
975*c83a76b0SSuyog Pawar pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
976*c83a76b0SSuyog Pawar }
977*c83a76b0SSuyog Pawar }
978*c83a76b0SSuyog Pawar
979*c83a76b0SSuyog Pawar /**
980*c83a76b0SSuyog Pawar *******************************************************************************
981*c83a76b0SSuyog Pawar *
982*c83a76b0SSuyog Pawar * @brief
983*c83a76b0SSuyog Pawar * Performs Chroma prediction for a inter prediction unit(PU)
984*c83a76b0SSuyog Pawar *
985*c83a76b0SSuyog Pawar * @par Description:
986*c83a76b0SSuyog Pawar * For a given PU, Inter prediction followed by weighted prediction (if
987*c83a76b0SSuyog Pawar * required). The reference and destination buffers are uv interleaved
988*c83a76b0SSuyog Pawar *
989*c83a76b0SSuyog Pawar * @param[in] ps_inter_pred_ctxt
990*c83a76b0SSuyog Pawar * context for inter prediction; contains ref list, weight offsets, ctb offsets
991*c83a76b0SSuyog Pawar *
992*c83a76b0SSuyog Pawar * @param[in] ps_pu
993*c83a76b0SSuyog Pawar * pointer to PU structure whose inter prediction needs to be done
994*c83a76b0SSuyog Pawar *
995*c83a76b0SSuyog Pawar * @param[in] pu1_dst_buf
996*c83a76b0SSuyog Pawar * pointer to destination buffer where the inter prediction is done
997*c83a76b0SSuyog Pawar *
998*c83a76b0SSuyog Pawar * @param[in] dst_stride
999*c83a76b0SSuyog Pawar * pitch of the destination buffer
1000*c83a76b0SSuyog Pawar *
1001*c83a76b0SSuyog Pawar * @returns
1002*c83a76b0SSuyog Pawar * none
1003*c83a76b0SSuyog Pawar *
1004*c83a76b0SSuyog Pawar * @remarks
1005*c83a76b0SSuyog Pawar *
1006*c83a76b0SSuyog Pawar *******************************************************************************
1007*c83a76b0SSuyog Pawar */
ihevce_chroma_inter_pred_pu(void * pv_inter_pred_ctxt,pu_t * ps_pu,UWORD8 * pu1_dst_buf,WORD32 dst_stride)1008*c83a76b0SSuyog Pawar void ihevce_chroma_inter_pred_pu(
1009*c83a76b0SSuyog Pawar void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride)
1010*c83a76b0SSuyog Pawar {
1011*c83a76b0SSuyog Pawar inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
1012*c83a76b0SSuyog Pawar func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
1013*c83a76b0SSuyog Pawar
1014*c83a76b0SSuyog Pawar WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
1015*c83a76b0SSuyog Pawar UWORD8 u1_is_422 = (ps_inter_pred_ctxt->u1_chroma_array_type == 2);
1016*c83a76b0SSuyog Pawar /* chroma width and height are half of luma width and height */
1017*c83a76b0SSuyog Pawar WORD32 pu_wd_chroma = (ps_pu->b4_wd + 1) << 1;
1018*c83a76b0SSuyog Pawar WORD32 pu_ht_chroma = (ps_pu->b4_ht + 1) << (u1_is_422 + 1);
1019*c83a76b0SSuyog Pawar
1020*c83a76b0SSuyog Pawar WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
1021*c83a76b0SSuyog Pawar ps_inter_pred_ctxt->i1_weighted_bipred_flag;
1022*c83a76b0SSuyog Pawar
1023*c83a76b0SSuyog Pawar /* 16bit dest required for interpolate if weighted pred is on or bipred */
1024*c83a76b0SSuyog Pawar WORD32 store_16bit_output;
1025*c83a76b0SSuyog Pawar
1026*c83a76b0SSuyog Pawar recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
1027*c83a76b0SSuyog Pawar UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
1028*c83a76b0SSuyog Pawar WORD32 ref_pic_stride;
1029*c83a76b0SSuyog Pawar
1030*c83a76b0SSuyog Pawar /* offset of reference block in integer pel units */
1031*c83a76b0SSuyog Pawar WORD32 frm_x_ofst, frm_y_ofst;
1032*c83a76b0SSuyog Pawar WORD32 frm_x_pu, frm_y_pu;
1033*c83a76b0SSuyog Pawar
1034*c83a76b0SSuyog Pawar /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
1035*c83a76b0SSuyog Pawar WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
1036*c83a76b0SSuyog Pawar WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
1037*c83a76b0SSuyog Pawar
1038*c83a76b0SSuyog Pawar /* scratch buffer for horizontal interpolation destination */
1039*c83a76b0SSuyog Pawar WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
1040*c83a76b0SSuyog Pawar
1041*c83a76b0SSuyog Pawar /* get PU's frm x and frm y offset : Note uv is interleaved */
1042*c83a76b0SSuyog Pawar frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
1043*c83a76b0SSuyog Pawar frm_y_pu = (ps_inter_pred_ctxt->i4_ctb_frm_pos_y >> (u1_is_422 == 0)) +
1044*c83a76b0SSuyog Pawar (ps_pu->b4_pos_y << (u1_is_422 + 1));
1045*c83a76b0SSuyog Pawar
1046*c83a76b0SSuyog Pawar /* sanity checks */
1047*c83a76b0SSuyog Pawar ASSERT((wp_flag == 0) || (wp_flag == 1));
1048*c83a76b0SSuyog Pawar ASSERT(dst_stride >= (pu_wd_chroma << 1)); /* uv interleaved */
1049*c83a76b0SSuyog Pawar ASSERT(ps_pu->b1_intra_flag == 0);
1050*c83a76b0SSuyog Pawar
1051*c83a76b0SSuyog Pawar if(wp_flag)
1052*c83a76b0SSuyog Pawar {
1053*c83a76b0SSuyog Pawar UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
1054*c83a76b0SSuyog Pawar
1055*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L1)
1056*c83a76b0SSuyog Pawar {
1057*c83a76b0SSuyog Pawar ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
1058*c83a76b0SSuyog Pawar u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_chroma_weight_enable_flag;
1059*c83a76b0SSuyog Pawar }
1060*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L0)
1061*c83a76b0SSuyog Pawar {
1062*c83a76b0SSuyog Pawar ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
1063*c83a76b0SSuyog Pawar u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_chroma_weight_enable_flag;
1064*c83a76b0SSuyog Pawar }
1065*c83a76b0SSuyog Pawar if(inter_pred_idc == PRED_BI)
1066*c83a76b0SSuyog Pawar {
1067*c83a76b0SSuyog Pawar wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
1068*c83a76b0SSuyog Pawar }
1069*c83a76b0SSuyog Pawar else if(inter_pred_idc == PRED_L0)
1070*c83a76b0SSuyog Pawar {
1071*c83a76b0SSuyog Pawar wp_flag = u1_is_wgt_pred_L0;
1072*c83a76b0SSuyog Pawar }
1073*c83a76b0SSuyog Pawar else if(inter_pred_idc == PRED_L1)
1074*c83a76b0SSuyog Pawar {
1075*c83a76b0SSuyog Pawar wp_flag = u1_is_wgt_pred_L1;
1076*c83a76b0SSuyog Pawar }
1077*c83a76b0SSuyog Pawar else
1078*c83a76b0SSuyog Pawar {
1079*c83a76b0SSuyog Pawar /*other values are not allowed*/
1080*c83a76b0SSuyog Pawar assert(0);
1081*c83a76b0SSuyog Pawar }
1082*c83a76b0SSuyog Pawar }
1083*c83a76b0SSuyog Pawar store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
1084*c83a76b0SSuyog Pawar
1085*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L1)
1086*c83a76b0SSuyog Pawar {
1087*c83a76b0SSuyog Pawar /*****************************************************/
1088*c83a76b0SSuyog Pawar /* L0 inter prediction(Chroma ) */
1089*c83a76b0SSuyog Pawar /*****************************************************/
1090*c83a76b0SSuyog Pawar
1091*c83a76b0SSuyog Pawar /* motion vecs in qpel precision */
1092*c83a76b0SSuyog Pawar WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
1093*c83a76b0SSuyog Pawar WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
1094*c83a76b0SSuyog Pawar
1095*c83a76b0SSuyog Pawar /* sub pel offsets in x and y direction w.r.t integer pel */
1096*c83a76b0SSuyog Pawar WORD32 dx = mv_x & 0x7;
1097*c83a76b0SSuyog Pawar WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1098*c83a76b0SSuyog Pawar
1099*c83a76b0SSuyog Pawar /* ref idx is currently stored in the lower 4bits */
1100*c83a76b0SSuyog Pawar WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
1101*c83a76b0SSuyog Pawar
1102*c83a76b0SSuyog Pawar /* x and y integer offsets w.r.t frame start */
1103*c83a76b0SSuyog Pawar
1104*c83a76b0SSuyog Pawar frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1105*c83a76b0SSuyog Pawar frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1106*c83a76b0SSuyog Pawar
1107*c83a76b0SSuyog Pawar ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
1108*c83a76b0SSuyog Pawar
1109*c83a76b0SSuyog Pawar /* picture buffer start and stride */
1110*c83a76b0SSuyog Pawar pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_u_buf;
1111*c83a76b0SSuyog Pawar ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_uv_strd;
1112*c83a76b0SSuyog Pawar
1113*c83a76b0SSuyog Pawar /* point to reference start location in ref frame */
1114*c83a76b0SSuyog Pawar /* Assuming clipping of mv is not required here as ME would */
1115*c83a76b0SSuyog Pawar /* take care of mv access not going beyond padded data */
1116*c83a76b0SSuyog Pawar pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1117*c83a76b0SSuyog Pawar
1118*c83a76b0SSuyog Pawar if(store_16bit_output)
1119*c83a76b0SSuyog Pawar {
1120*c83a76b0SSuyog Pawar /* do interpolation in 16bit L0 scratch buffer */
1121*c83a76b0SSuyog Pawar ihevce_chroma_interpolate_16bit_dxdy(
1122*c83a76b0SSuyog Pawar pu1_ref_int_pel,
1123*c83a76b0SSuyog Pawar pi2_scr_buf_l0,
1124*c83a76b0SSuyog Pawar ref_pic_stride,
1125*c83a76b0SSuyog Pawar (pu_wd_chroma << 1),
1126*c83a76b0SSuyog Pawar pi2_horz_scratch,
1127*c83a76b0SSuyog Pawar pu_ht_chroma,
1128*c83a76b0SSuyog Pawar pu_wd_chroma,
1129*c83a76b0SSuyog Pawar dy,
1130*c83a76b0SSuyog Pawar dx,
1131*c83a76b0SSuyog Pawar ps_func_selector);
1132*c83a76b0SSuyog Pawar }
1133*c83a76b0SSuyog Pawar else
1134*c83a76b0SSuyog Pawar {
1135*c83a76b0SSuyog Pawar /* do interpolation in 8bit destination buffer and return */
1136*c83a76b0SSuyog Pawar ihevce_chroma_interpolate_8bit_dxdy(
1137*c83a76b0SSuyog Pawar pu1_ref_int_pel,
1138*c83a76b0SSuyog Pawar pu1_dst_buf,
1139*c83a76b0SSuyog Pawar ref_pic_stride,
1140*c83a76b0SSuyog Pawar dst_stride,
1141*c83a76b0SSuyog Pawar pi2_horz_scratch,
1142*c83a76b0SSuyog Pawar pu_ht_chroma,
1143*c83a76b0SSuyog Pawar pu_wd_chroma,
1144*c83a76b0SSuyog Pawar dy,
1145*c83a76b0SSuyog Pawar dx,
1146*c83a76b0SSuyog Pawar ps_func_selector);
1147*c83a76b0SSuyog Pawar
1148*c83a76b0SSuyog Pawar return;
1149*c83a76b0SSuyog Pawar }
1150*c83a76b0SSuyog Pawar }
1151*c83a76b0SSuyog Pawar
1152*c83a76b0SSuyog Pawar if(inter_pred_idc != PRED_L0)
1153*c83a76b0SSuyog Pawar {
1154*c83a76b0SSuyog Pawar /*****************************************************/
1155*c83a76b0SSuyog Pawar /* L1 inter prediction(Chroma) */
1156*c83a76b0SSuyog Pawar /*****************************************************/
1157*c83a76b0SSuyog Pawar
1158*c83a76b0SSuyog Pawar /* motion vecs in qpel precision */
1159*c83a76b0SSuyog Pawar WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
1160*c83a76b0SSuyog Pawar WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
1161*c83a76b0SSuyog Pawar
1162*c83a76b0SSuyog Pawar /* sub pel offsets in x and y direction w.r.t integer pel */
1163*c83a76b0SSuyog Pawar WORD32 dx = mv_x & 0x7;
1164*c83a76b0SSuyog Pawar WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1165*c83a76b0SSuyog Pawar
1166*c83a76b0SSuyog Pawar /* ref idx is currently stored in the lower 4bits */
1167*c83a76b0SSuyog Pawar WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
1168*c83a76b0SSuyog Pawar
1169*c83a76b0SSuyog Pawar /* x and y integer offsets w.r.t frame start */
1170*c83a76b0SSuyog Pawar frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1171*c83a76b0SSuyog Pawar frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1172*c83a76b0SSuyog Pawar
1173*c83a76b0SSuyog Pawar ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
1174*c83a76b0SSuyog Pawar
1175*c83a76b0SSuyog Pawar /* picture buffer start and stride */
1176*c83a76b0SSuyog Pawar pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_u_buf;
1177*c83a76b0SSuyog Pawar ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_uv_strd;
1178*c83a76b0SSuyog Pawar
1179*c83a76b0SSuyog Pawar /* point to reference start location in ref frame */
1180*c83a76b0SSuyog Pawar /* Assuming clipping of mv is not required here as ME would */
1181*c83a76b0SSuyog Pawar /* take care of mv access not going beyond padded data */
1182*c83a76b0SSuyog Pawar pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1183*c83a76b0SSuyog Pawar
1184*c83a76b0SSuyog Pawar if(store_16bit_output)
1185*c83a76b0SSuyog Pawar {
1186*c83a76b0SSuyog Pawar /* do interpolation in 16bit L1 scratch buffer */
1187*c83a76b0SSuyog Pawar ihevce_chroma_interpolate_16bit_dxdy(
1188*c83a76b0SSuyog Pawar pu1_ref_int_pel,
1189*c83a76b0SSuyog Pawar pi2_scr_buf_l1,
1190*c83a76b0SSuyog Pawar ref_pic_stride,
1191*c83a76b0SSuyog Pawar (pu_wd_chroma << 1),
1192*c83a76b0SSuyog Pawar pi2_horz_scratch,
1193*c83a76b0SSuyog Pawar pu_ht_chroma,
1194*c83a76b0SSuyog Pawar pu_wd_chroma,
1195*c83a76b0SSuyog Pawar dy,
1196*c83a76b0SSuyog Pawar dx,
1197*c83a76b0SSuyog Pawar ps_func_selector);
1198*c83a76b0SSuyog Pawar }
1199*c83a76b0SSuyog Pawar else
1200*c83a76b0SSuyog Pawar {
1201*c83a76b0SSuyog Pawar /* do interpolation in 8bit destination buffer and return */
1202*c83a76b0SSuyog Pawar ihevce_chroma_interpolate_8bit_dxdy(
1203*c83a76b0SSuyog Pawar pu1_ref_int_pel,
1204*c83a76b0SSuyog Pawar pu1_dst_buf,
1205*c83a76b0SSuyog Pawar ref_pic_stride,
1206*c83a76b0SSuyog Pawar dst_stride,
1207*c83a76b0SSuyog Pawar pi2_horz_scratch,
1208*c83a76b0SSuyog Pawar pu_ht_chroma,
1209*c83a76b0SSuyog Pawar pu_wd_chroma,
1210*c83a76b0SSuyog Pawar dy,
1211*c83a76b0SSuyog Pawar dx,
1212*c83a76b0SSuyog Pawar ps_func_selector);
1213*c83a76b0SSuyog Pawar
1214*c83a76b0SSuyog Pawar return;
1215*c83a76b0SSuyog Pawar }
1216*c83a76b0SSuyog Pawar }
1217*c83a76b0SSuyog Pawar
1218*c83a76b0SSuyog Pawar if((inter_pred_idc != PRED_BI) && wp_flag)
1219*c83a76b0SSuyog Pawar {
1220*c83a76b0SSuyog Pawar /*****************************************************/
1221*c83a76b0SSuyog Pawar /* unidirection weighted prediction(Chroma) */
1222*c83a76b0SSuyog Pawar /*****************************************************/
1223*c83a76b0SSuyog Pawar ihevce_wght_offst_t *ps_weight_offset;
1224*c83a76b0SSuyog Pawar WORD16 *pi2_src;
1225*c83a76b0SSuyog Pawar WORD32 lvl_shift = 0;
1226*c83a76b0SSuyog Pawar WORD32 wgt_cb, wgt_cr, off_cb, off_cr;
1227*c83a76b0SSuyog Pawar WORD32 shift;
1228*c83a76b0SSuyog Pawar
1229*c83a76b0SSuyog Pawar /* intialize the weight, offsets and ref based on l0/l1 mode */
1230*c83a76b0SSuyog Pawar if(inter_pred_idc == PRED_L0)
1231*c83a76b0SSuyog Pawar {
1232*c83a76b0SSuyog Pawar pi2_src = pi2_scr_buf_l0;
1233*c83a76b0SSuyog Pawar ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
1234*c83a76b0SSuyog Pawar }
1235*c83a76b0SSuyog Pawar else
1236*c83a76b0SSuyog Pawar {
1237*c83a76b0SSuyog Pawar pi2_src = pi2_scr_buf_l1;
1238*c83a76b0SSuyog Pawar ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
1239*c83a76b0SSuyog Pawar }
1240*c83a76b0SSuyog Pawar
1241*c83a76b0SSuyog Pawar wgt_cb = ps_weight_offset->i2_cb_weight;
1242*c83a76b0SSuyog Pawar off_cb = ps_weight_offset->i2_cb_offset;
1243*c83a76b0SSuyog Pawar wgt_cr = ps_weight_offset->i2_cr_weight;
1244*c83a76b0SSuyog Pawar off_cr = ps_weight_offset->i2_cr_offset;
1245*c83a76b0SSuyog Pawar
1246*c83a76b0SSuyog Pawar shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
1247*c83a76b0SSuyog Pawar
1248*c83a76b0SSuyog Pawar /* do the uni directional weighted prediction */
1249*c83a76b0SSuyog Pawar ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr(
1250*c83a76b0SSuyog Pawar pi2_src,
1251*c83a76b0SSuyog Pawar pu1_dst_buf,
1252*c83a76b0SSuyog Pawar (pu_wd_chroma << 1),
1253*c83a76b0SSuyog Pawar dst_stride,
1254*c83a76b0SSuyog Pawar wgt_cb,
1255*c83a76b0SSuyog Pawar wgt_cr,
1256*c83a76b0SSuyog Pawar off_cb,
1257*c83a76b0SSuyog Pawar off_cr,
1258*c83a76b0SSuyog Pawar shift,
1259*c83a76b0SSuyog Pawar lvl_shift,
1260*c83a76b0SSuyog Pawar pu_ht_chroma,
1261*c83a76b0SSuyog Pawar pu_wd_chroma);
1262*c83a76b0SSuyog Pawar }
1263*c83a76b0SSuyog Pawar else
1264*c83a76b0SSuyog Pawar {
1265*c83a76b0SSuyog Pawar /*****************************************************/
1266*c83a76b0SSuyog Pawar /* Bipred prediction(Chroma) */
1267*c83a76b0SSuyog Pawar /*****************************************************/
1268*c83a76b0SSuyog Pawar if(wp_flag)
1269*c83a76b0SSuyog Pawar {
1270*c83a76b0SSuyog Pawar WORD32 wgt0_cb, wgt1_cb, wgt0_cr, wgt1_cr;
1271*c83a76b0SSuyog Pawar WORD32 off0_cb, off1_cb, off0_cr, off1_cr;
1272*c83a76b0SSuyog Pawar WORD32 shift;
1273*c83a76b0SSuyog Pawar
1274*c83a76b0SSuyog Pawar /*****************************************************/
1275*c83a76b0SSuyog Pawar /* Bi pred weighted prediction (Chroma) */
1276*c83a76b0SSuyog Pawar /*****************************************************/
1277*c83a76b0SSuyog Pawar wgt0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_weight;
1278*c83a76b0SSuyog Pawar off0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_offset;
1279*c83a76b0SSuyog Pawar
1280*c83a76b0SSuyog Pawar wgt0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_weight;
1281*c83a76b0SSuyog Pawar off0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_offset;
1282*c83a76b0SSuyog Pawar
1283*c83a76b0SSuyog Pawar wgt1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_weight;
1284*c83a76b0SSuyog Pawar off1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_offset;
1285*c83a76b0SSuyog Pawar
1286*c83a76b0SSuyog Pawar wgt1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_weight;
1287*c83a76b0SSuyog Pawar off1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_offset;
1288*c83a76b0SSuyog Pawar
1289*c83a76b0SSuyog Pawar shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
1290*c83a76b0SSuyog Pawar
1291*c83a76b0SSuyog Pawar ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr(
1292*c83a76b0SSuyog Pawar pi2_scr_buf_l0,
1293*c83a76b0SSuyog Pawar pi2_scr_buf_l1,
1294*c83a76b0SSuyog Pawar pu1_dst_buf,
1295*c83a76b0SSuyog Pawar (pu_wd_chroma << 1),
1296*c83a76b0SSuyog Pawar (pu_wd_chroma << 1),
1297*c83a76b0SSuyog Pawar dst_stride,
1298*c83a76b0SSuyog Pawar wgt0_cb,
1299*c83a76b0SSuyog Pawar wgt0_cr,
1300*c83a76b0SSuyog Pawar off0_cb,
1301*c83a76b0SSuyog Pawar off0_cr,
1302*c83a76b0SSuyog Pawar wgt1_cb,
1303*c83a76b0SSuyog Pawar wgt1_cr,
1304*c83a76b0SSuyog Pawar off1_cb,
1305*c83a76b0SSuyog Pawar off1_cr,
1306*c83a76b0SSuyog Pawar shift,
1307*c83a76b0SSuyog Pawar 0,
1308*c83a76b0SSuyog Pawar 0,
1309*c83a76b0SSuyog Pawar pu_ht_chroma,
1310*c83a76b0SSuyog Pawar pu_wd_chroma);
1311*c83a76b0SSuyog Pawar }
1312*c83a76b0SSuyog Pawar else
1313*c83a76b0SSuyog Pawar {
1314*c83a76b0SSuyog Pawar /*****************************************************/
1315*c83a76b0SSuyog Pawar /* Default Bi pred prediction (Chroma) */
1316*c83a76b0SSuyog Pawar /*****************************************************/
1317*c83a76b0SSuyog Pawar ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr(
1318*c83a76b0SSuyog Pawar pi2_scr_buf_l0,
1319*c83a76b0SSuyog Pawar pi2_scr_buf_l1,
1320*c83a76b0SSuyog Pawar pu1_dst_buf,
1321*c83a76b0SSuyog Pawar (pu_wd_chroma << 1),
1322*c83a76b0SSuyog Pawar (pu_wd_chroma << 1),
1323*c83a76b0SSuyog Pawar dst_stride,
1324*c83a76b0SSuyog Pawar 0,
1325*c83a76b0SSuyog Pawar 0,
1326*c83a76b0SSuyog Pawar pu_ht_chroma,
1327*c83a76b0SSuyog Pawar pu_wd_chroma);
1328*c83a76b0SSuyog Pawar }
1329*c83a76b0SSuyog Pawar }
1330*c83a76b0SSuyog Pawar }
1331