1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar *******************************************************************************
20*c83a76b0SSuyog Pawar * @file
21*c83a76b0SSuyog Pawar * ihevc_iquant_itrans_recon.c
22*c83a76b0SSuyog Pawar *
23*c83a76b0SSuyog Pawar * @brief
24*c83a76b0SSuyog Pawar * Contains function definitions for inverse quantization, inverse
25*c83a76b0SSuyog Pawar * transform and reconstruction
26*c83a76b0SSuyog Pawar *
27*c83a76b0SSuyog Pawar * @author
28*c83a76b0SSuyog Pawar * 100470
29*c83a76b0SSuyog Pawar *
30*c83a76b0SSuyog Pawar * @par List of Functions:
31*c83a76b0SSuyog Pawar * - ihevc_iquant_itrans_recon_4x4_ttype1()
32*c83a76b0SSuyog Pawar * - ihevc_iquant_itrans_recon_4x4()
33*c83a76b0SSuyog Pawar *
34*c83a76b0SSuyog Pawar * @remarks
35*c83a76b0SSuyog Pawar * None
36*c83a76b0SSuyog Pawar *
37*c83a76b0SSuyog Pawar *******************************************************************************
38*c83a76b0SSuyog Pawar */
39*c83a76b0SSuyog Pawar #include <stdio.h>
40*c83a76b0SSuyog Pawar #include <string.h>
41*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
42*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
43*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
44*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
45*c83a76b0SSuyog Pawar #include "ihevc_trans_tables.h"
46*c83a76b0SSuyog Pawar #include "ihevc_iquant_itrans_recon.h"
47*c83a76b0SSuyog Pawar #include "ihevc_func_selector.h"
48*c83a76b0SSuyog Pawar #include "ihevc_trans_macros.h"
49*c83a76b0SSuyog Pawar
50*c83a76b0SSuyog Pawar /* All the functions here are replicated from ihevc_itrans.c and modified to */
51*c83a76b0SSuyog Pawar /* include reconstruction */
52*c83a76b0SSuyog Pawar
53*c83a76b0SSuyog Pawar /**
54*c83a76b0SSuyog Pawar *******************************************************************************
55*c83a76b0SSuyog Pawar *
56*c83a76b0SSuyog Pawar * @brief
57*c83a76b0SSuyog Pawar * This function performs inverse quantization, inverse transform
58*c83a76b0SSuyog Pawar * type1(DST) and reconstruction for 4x4 input block
59*c83a76b0SSuyog Pawar *
60*c83a76b0SSuyog Pawar * @par Description:
61*c83a76b0SSuyog Pawar * Performs inverse quantization , inverse transform type 1 and adds
62*c83a76b0SSuyog Pawar * prediction data and clips output to 8 bit
63*c83a76b0SSuyog Pawar *
64*c83a76b0SSuyog Pawar * @param[in] pi2_src
65*c83a76b0SSuyog Pawar * Input 4x4 coefficients
66*c83a76b0SSuyog Pawar *
67*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
68*c83a76b0SSuyog Pawar * Temporary 4x4 buffer for storing inverse
69*c83a76b0SSuyog Pawar * transform 1st stage output
70*c83a76b0SSuyog Pawar *
71*c83a76b0SSuyog Pawar * @param[in] pu1_pred
72*c83a76b0SSuyog Pawar * Prediction 4x4 block
73*c83a76b0SSuyog Pawar *
74*c83a76b0SSuyog Pawar * @param[in] pi2_dequant_coeff
75*c83a76b0SSuyog Pawar * Dequant Coeffs
76*c83a76b0SSuyog Pawar *
77*c83a76b0SSuyog Pawar * @param[out] pu1_dst
78*c83a76b0SSuyog Pawar * Output 4x4 block
79*c83a76b0SSuyog Pawar *
80*c83a76b0SSuyog Pawar * @param[in] qp_div
81*c83a76b0SSuyog Pawar * Quantization parameter / 6
82*c83a76b0SSuyog Pawar *
83*c83a76b0SSuyog Pawar * @param[in] qp_rem
84*c83a76b0SSuyog Pawar * Quantization parameter % 6
85*c83a76b0SSuyog Pawar *
86*c83a76b0SSuyog Pawar * @param[in] src_strd
87*c83a76b0SSuyog Pawar * Input stride
88*c83a76b0SSuyog Pawar *
89*c83a76b0SSuyog Pawar * @param[in] pred_strd
90*c83a76b0SSuyog Pawar * Prediction stride
91*c83a76b0SSuyog Pawar *
92*c83a76b0SSuyog Pawar * @param[in] dst_strd
93*c83a76b0SSuyog Pawar * Output Stride
94*c83a76b0SSuyog Pawar *
95*c83a76b0SSuyog Pawar * @param[in] zero_cols
96*c83a76b0SSuyog Pawar * Zero columns in pi2_src
97*c83a76b0SSuyog Pawar *
98*c83a76b0SSuyog Pawar * @param[in] zero_rows
99*c83a76b0SSuyog Pawar * Zero Rows in pi2_src
100*c83a76b0SSuyog Pawar *
101*c83a76b0SSuyog Pawar * @returns Void
102*c83a76b0SSuyog Pawar *
103*c83a76b0SSuyog Pawar * @remarks
104*c83a76b0SSuyog Pawar * None
105*c83a76b0SSuyog Pawar *
106*c83a76b0SSuyog Pawar *******************************************************************************
107*c83a76b0SSuyog Pawar */
108*c83a76b0SSuyog Pawar
ihevc_iquant_itrans_recon_4x4_ttype1(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,WORD16 * pi2_dequant_coeff,UWORD8 * pu1_dst,WORD32 qp_div,WORD32 qp_rem,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)109*c83a76b0SSuyog Pawar void ihevc_iquant_itrans_recon_4x4_ttype1(WORD16 *pi2_src,
110*c83a76b0SSuyog Pawar WORD16 *pi2_tmp,
111*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
112*c83a76b0SSuyog Pawar WORD16 *pi2_dequant_coeff,
113*c83a76b0SSuyog Pawar UWORD8 *pu1_dst,
114*c83a76b0SSuyog Pawar WORD32 qp_div, /* qpscaled / 6 */
115*c83a76b0SSuyog Pawar WORD32 qp_rem, /* qpscaled % 6 */
116*c83a76b0SSuyog Pawar WORD32 src_strd,
117*c83a76b0SSuyog Pawar WORD32 pred_strd,
118*c83a76b0SSuyog Pawar WORD32 dst_strd,
119*c83a76b0SSuyog Pawar WORD32 zero_cols,
120*c83a76b0SSuyog Pawar WORD32 zero_rows)
121*c83a76b0SSuyog Pawar {
122*c83a76b0SSuyog Pawar UNUSED(zero_rows);
123*c83a76b0SSuyog Pawar /* Inverse Quant and Inverse Transform and Reconstruction */
124*c83a76b0SSuyog Pawar {
125*c83a76b0SSuyog Pawar WORD32 i, c[4];
126*c83a76b0SSuyog Pawar WORD32 add;
127*c83a76b0SSuyog Pawar WORD32 shift;
128*c83a76b0SSuyog Pawar WORD16 *pi2_tmp_orig;
129*c83a76b0SSuyog Pawar WORD32 shift_iq;
130*c83a76b0SSuyog Pawar WORD32 trans_size;
131*c83a76b0SSuyog Pawar /* Inverse Quantization constants */
132*c83a76b0SSuyog Pawar {
133*c83a76b0SSuyog Pawar WORD32 log2_trans_size, bit_depth;
134*c83a76b0SSuyog Pawar
135*c83a76b0SSuyog Pawar log2_trans_size = 2;
136*c83a76b0SSuyog Pawar bit_depth = 8 + 0;
137*c83a76b0SSuyog Pawar shift_iq = bit_depth + log2_trans_size - 5;
138*c83a76b0SSuyog Pawar }
139*c83a76b0SSuyog Pawar
140*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_4;
141*c83a76b0SSuyog Pawar pi2_tmp_orig = pi2_tmp;
142*c83a76b0SSuyog Pawar
143*c83a76b0SSuyog Pawar /* Inverse Transform 1st stage */
144*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_1;
145*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
146*c83a76b0SSuyog Pawar
147*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
148*c83a76b0SSuyog Pawar {
149*c83a76b0SSuyog Pawar /* Checking for Zero Cols */
150*c83a76b0SSuyog Pawar if((zero_cols & 1) == 1)
151*c83a76b0SSuyog Pawar {
152*c83a76b0SSuyog Pawar memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
153*c83a76b0SSuyog Pawar }
154*c83a76b0SSuyog Pawar else
155*c83a76b0SSuyog Pawar {
156*c83a76b0SSuyog Pawar WORD32 iq_tmp_1, iq_tmp_2, iq_tmp_3;
157*c83a76b0SSuyog Pawar // Intermediate Variables
158*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_1,
159*c83a76b0SSuyog Pawar pi2_src[0 * src_strd],
160*c83a76b0SSuyog Pawar pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
161*c83a76b0SSuyog Pawar shift_iq, qp_div);
162*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_2,
163*c83a76b0SSuyog Pawar pi2_src[2 * src_strd],
164*c83a76b0SSuyog Pawar pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
165*c83a76b0SSuyog Pawar shift_iq, qp_div);
166*c83a76b0SSuyog Pawar c[0] = iq_tmp_1 + iq_tmp_2;
167*c83a76b0SSuyog Pawar
168*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_1,
169*c83a76b0SSuyog Pawar pi2_src[2 * src_strd],
170*c83a76b0SSuyog Pawar pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
171*c83a76b0SSuyog Pawar shift_iq, qp_div);
172*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_2,
173*c83a76b0SSuyog Pawar pi2_src[3 * src_strd],
174*c83a76b0SSuyog Pawar pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
175*c83a76b0SSuyog Pawar shift_iq, qp_div);
176*c83a76b0SSuyog Pawar c[1] = iq_tmp_1 + iq_tmp_2;
177*c83a76b0SSuyog Pawar
178*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_1,
179*c83a76b0SSuyog Pawar pi2_src[0 * src_strd],
180*c83a76b0SSuyog Pawar pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
181*c83a76b0SSuyog Pawar shift_iq, qp_div);
182*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_2,
183*c83a76b0SSuyog Pawar pi2_src[3 * src_strd],
184*c83a76b0SSuyog Pawar pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
185*c83a76b0SSuyog Pawar shift_iq, qp_div);
186*c83a76b0SSuyog Pawar c[2] = iq_tmp_1 - iq_tmp_2;
187*c83a76b0SSuyog Pawar
188*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_1,
189*c83a76b0SSuyog Pawar pi2_src[1 * src_strd],
190*c83a76b0SSuyog Pawar pi2_dequant_coeff[1 * trans_size] * g_ihevc_iquant_scales[qp_rem],
191*c83a76b0SSuyog Pawar shift_iq, qp_div);
192*c83a76b0SSuyog Pawar c[3] = 74 * iq_tmp_1;
193*c83a76b0SSuyog Pawar
194*c83a76b0SSuyog Pawar pi2_tmp[0] =
195*c83a76b0SSuyog Pawar CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
196*c83a76b0SSuyog Pawar pi2_tmp[1] =
197*c83a76b0SSuyog Pawar CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
198*c83a76b0SSuyog Pawar
199*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_1,
200*c83a76b0SSuyog Pawar pi2_src[0 * src_strd],
201*c83a76b0SSuyog Pawar pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
202*c83a76b0SSuyog Pawar shift_iq, qp_div);
203*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_2,
204*c83a76b0SSuyog Pawar pi2_src[2 * src_strd],
205*c83a76b0SSuyog Pawar pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
206*c83a76b0SSuyog Pawar shift_iq, qp_div);
207*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_3,
208*c83a76b0SSuyog Pawar pi2_src[3 * src_strd],
209*c83a76b0SSuyog Pawar pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
210*c83a76b0SSuyog Pawar shift_iq, qp_div);
211*c83a76b0SSuyog Pawar
212*c83a76b0SSuyog Pawar pi2_tmp[2] =
213*c83a76b0SSuyog Pawar CLIP_S16((74 * (iq_tmp_1 - iq_tmp_2 + iq_tmp_3) + add) >> shift);
214*c83a76b0SSuyog Pawar pi2_tmp[3] =
215*c83a76b0SSuyog Pawar CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
216*c83a76b0SSuyog Pawar }
217*c83a76b0SSuyog Pawar pi2_src++;
218*c83a76b0SSuyog Pawar pi2_dequant_coeff++;
219*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
220*c83a76b0SSuyog Pawar zero_cols = zero_cols >> 1;
221*c83a76b0SSuyog Pawar }
222*c83a76b0SSuyog Pawar
223*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
224*c83a76b0SSuyog Pawar
225*c83a76b0SSuyog Pawar /* Inverse Transform 2nd stage */
226*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_2;
227*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
228*c83a76b0SSuyog Pawar
229*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
230*c83a76b0SSuyog Pawar {
231*c83a76b0SSuyog Pawar WORD32 itrans_out;
232*c83a76b0SSuyog Pawar
233*c83a76b0SSuyog Pawar // Intermediate Variables
234*c83a76b0SSuyog Pawar c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size];
235*c83a76b0SSuyog Pawar c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size];
236*c83a76b0SSuyog Pawar c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size];
237*c83a76b0SSuyog Pawar c[3] = 74 * pi2_tmp[trans_size];
238*c83a76b0SSuyog Pawar
239*c83a76b0SSuyog Pawar itrans_out =
240*c83a76b0SSuyog Pawar CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
241*c83a76b0SSuyog Pawar pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
242*c83a76b0SSuyog Pawar
243*c83a76b0SSuyog Pawar itrans_out =
244*c83a76b0SSuyog Pawar CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
245*c83a76b0SSuyog Pawar pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
246*c83a76b0SSuyog Pawar
247*c83a76b0SSuyog Pawar itrans_out =
248*c83a76b0SSuyog Pawar CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift);
249*c83a76b0SSuyog Pawar pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
250*c83a76b0SSuyog Pawar
251*c83a76b0SSuyog Pawar itrans_out =
252*c83a76b0SSuyog Pawar CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
253*c83a76b0SSuyog Pawar pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
254*c83a76b0SSuyog Pawar pi2_tmp++;
255*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
256*c83a76b0SSuyog Pawar pu1_dst += dst_strd;
257*c83a76b0SSuyog Pawar }
258*c83a76b0SSuyog Pawar }
259*c83a76b0SSuyog Pawar }
260*c83a76b0SSuyog Pawar
261*c83a76b0SSuyog Pawar /**
262*c83a76b0SSuyog Pawar *******************************************************************************
263*c83a76b0SSuyog Pawar *
264*c83a76b0SSuyog Pawar * @brief
265*c83a76b0SSuyog Pawar * This function performs inverse quantization, inverse transform and
266*c83a76b0SSuyog Pawar * reconstruction for 4x4 input block
267*c83a76b0SSuyog Pawar *
268*c83a76b0SSuyog Pawar * @par Description:
269*c83a76b0SSuyog Pawar * Performs inverse quantization , inverse transform and adds the
270*c83a76b0SSuyog Pawar * prediction data and clips output to 8 bit
271*c83a76b0SSuyog Pawar *
272*c83a76b0SSuyog Pawar * @param[in] pi2_src
273*c83a76b0SSuyog Pawar * Input 4x4 coefficients
274*c83a76b0SSuyog Pawar *
275*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
276*c83a76b0SSuyog Pawar * Temporary 4x4 buffer for storing inverse
277*c83a76b0SSuyog Pawar * transform 1st stage output
278*c83a76b0SSuyog Pawar *
279*c83a76b0SSuyog Pawar * @param[in] pu1_pred
280*c83a76b0SSuyog Pawar * Prediction 4x4 block
281*c83a76b0SSuyog Pawar *
282*c83a76b0SSuyog Pawar * @param[in] pi2_dequant_coeff
283*c83a76b0SSuyog Pawar * Dequant Coeffs
284*c83a76b0SSuyog Pawar *
285*c83a76b0SSuyog Pawar * @param[out] pu1_dst
286*c83a76b0SSuyog Pawar * Output 4x4 block
287*c83a76b0SSuyog Pawar *
288*c83a76b0SSuyog Pawar * @param[in] qp_div
289*c83a76b0SSuyog Pawar * Quantization parameter / 6
290*c83a76b0SSuyog Pawar *
291*c83a76b0SSuyog Pawar * @param[in] qp_rem
292*c83a76b0SSuyog Pawar * Quantization parameter % 6
293*c83a76b0SSuyog Pawar *
294*c83a76b0SSuyog Pawar * @param[in] src_strd
295*c83a76b0SSuyog Pawar * Input stride
296*c83a76b0SSuyog Pawar *
297*c83a76b0SSuyog Pawar * @param[in] pred_strd
298*c83a76b0SSuyog Pawar * Prediction stride
299*c83a76b0SSuyog Pawar *
300*c83a76b0SSuyog Pawar * @param[in] dst_strd
301*c83a76b0SSuyog Pawar * Output Stride
302*c83a76b0SSuyog Pawar *
303*c83a76b0SSuyog Pawar * @param[in] zero_cols
304*c83a76b0SSuyog Pawar * Zero columns in pi2_src
305*c83a76b0SSuyog Pawar *
306*c83a76b0SSuyog Pawar * @param[in] zero_rows
307*c83a76b0SSuyog Pawar * Zero Rows in pi2_src
308*c83a76b0SSuyog Pawar *
309*c83a76b0SSuyog Pawar * @returns Void
310*c83a76b0SSuyog Pawar *
311*c83a76b0SSuyog Pawar * @remarks
312*c83a76b0SSuyog Pawar * None
313*c83a76b0SSuyog Pawar *
314*c83a76b0SSuyog Pawar *******************************************************************************
315*c83a76b0SSuyog Pawar */
316*c83a76b0SSuyog Pawar
ihevc_iquant_itrans_recon_4x4(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,WORD16 * pi2_dequant_coeff,UWORD8 * pu1_dst,WORD32 qp_div,WORD32 qp_rem,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)317*c83a76b0SSuyog Pawar void ihevc_iquant_itrans_recon_4x4(WORD16 *pi2_src,
318*c83a76b0SSuyog Pawar WORD16 *pi2_tmp,
319*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
320*c83a76b0SSuyog Pawar WORD16 *pi2_dequant_coeff,
321*c83a76b0SSuyog Pawar UWORD8 *pu1_dst,
322*c83a76b0SSuyog Pawar WORD32 qp_div, /* qpscaled / 6 */
323*c83a76b0SSuyog Pawar WORD32 qp_rem, /* qpscaled % 6 */
324*c83a76b0SSuyog Pawar WORD32 src_strd,
325*c83a76b0SSuyog Pawar WORD32 pred_strd,
326*c83a76b0SSuyog Pawar WORD32 dst_strd,
327*c83a76b0SSuyog Pawar WORD32 zero_cols,
328*c83a76b0SSuyog Pawar WORD32 zero_rows)
329*c83a76b0SSuyog Pawar {
330*c83a76b0SSuyog Pawar UNUSED(zero_rows);
331*c83a76b0SSuyog Pawar /* Inverse Transform */
332*c83a76b0SSuyog Pawar {
333*c83a76b0SSuyog Pawar WORD32 j;
334*c83a76b0SSuyog Pawar WORD32 e[2], o[2];
335*c83a76b0SSuyog Pawar WORD32 add;
336*c83a76b0SSuyog Pawar WORD32 shift;
337*c83a76b0SSuyog Pawar WORD16 *pi2_tmp_orig;
338*c83a76b0SSuyog Pawar WORD32 shift_iq;
339*c83a76b0SSuyog Pawar WORD32 trans_size;
340*c83a76b0SSuyog Pawar /* Inverse Quantization constants */
341*c83a76b0SSuyog Pawar {
342*c83a76b0SSuyog Pawar WORD32 log2_trans_size, bit_depth;
343*c83a76b0SSuyog Pawar
344*c83a76b0SSuyog Pawar log2_trans_size = 2;
345*c83a76b0SSuyog Pawar bit_depth = 8 + 0;
346*c83a76b0SSuyog Pawar shift_iq = bit_depth + log2_trans_size - 5;
347*c83a76b0SSuyog Pawar }
348*c83a76b0SSuyog Pawar
349*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_4;
350*c83a76b0SSuyog Pawar pi2_tmp_orig = pi2_tmp;
351*c83a76b0SSuyog Pawar
352*c83a76b0SSuyog Pawar /* Inverse Transform 1st stage */
353*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_1;
354*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
355*c83a76b0SSuyog Pawar
356*c83a76b0SSuyog Pawar for(j = 0; j < trans_size; j++)
357*c83a76b0SSuyog Pawar {
358*c83a76b0SSuyog Pawar /* Checking for Zero Cols */
359*c83a76b0SSuyog Pawar if((zero_cols & 1) == 1)
360*c83a76b0SSuyog Pawar {
361*c83a76b0SSuyog Pawar memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
362*c83a76b0SSuyog Pawar }
363*c83a76b0SSuyog Pawar else
364*c83a76b0SSuyog Pawar {
365*c83a76b0SSuyog Pawar WORD32 iq_tmp_1, iq_tmp_2;
366*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
367*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_1,
368*c83a76b0SSuyog Pawar pi2_src[1 * src_strd],
369*c83a76b0SSuyog Pawar pi2_dequant_coeff[1 * trans_size] * g_ihevc_iquant_scales[qp_rem],
370*c83a76b0SSuyog Pawar shift_iq, qp_div);
371*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_2,
372*c83a76b0SSuyog Pawar pi2_src[3 * src_strd],
373*c83a76b0SSuyog Pawar pi2_dequant_coeff[3 * trans_size] * g_ihevc_iquant_scales[qp_rem],
374*c83a76b0SSuyog Pawar shift_iq, qp_div);
375*c83a76b0SSuyog Pawar
376*c83a76b0SSuyog Pawar o[0] = g_ai2_ihevc_trans_4[1][0] * iq_tmp_1
377*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][0] * iq_tmp_2;
378*c83a76b0SSuyog Pawar o[1] = g_ai2_ihevc_trans_4[1][1] * iq_tmp_1
379*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][1] * iq_tmp_2;
380*c83a76b0SSuyog Pawar
381*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_1,
382*c83a76b0SSuyog Pawar pi2_src[0 * src_strd],
383*c83a76b0SSuyog Pawar pi2_dequant_coeff[0 * trans_size] * g_ihevc_iquant_scales[qp_rem],
384*c83a76b0SSuyog Pawar shift_iq, qp_div);
385*c83a76b0SSuyog Pawar IQUANT_4x4(iq_tmp_2,
386*c83a76b0SSuyog Pawar pi2_src[2 * src_strd],
387*c83a76b0SSuyog Pawar pi2_dequant_coeff[2 * trans_size] * g_ihevc_iquant_scales[qp_rem],
388*c83a76b0SSuyog Pawar shift_iq, qp_div);
389*c83a76b0SSuyog Pawar
390*c83a76b0SSuyog Pawar e[0] = g_ai2_ihevc_trans_4[0][0] * iq_tmp_1
391*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][0] * iq_tmp_2;
392*c83a76b0SSuyog Pawar e[1] = g_ai2_ihevc_trans_4[0][1] * iq_tmp_1
393*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][1] * iq_tmp_2;
394*c83a76b0SSuyog Pawar
395*c83a76b0SSuyog Pawar pi2_tmp[0] =
396*c83a76b0SSuyog Pawar CLIP_S16(((e[0] + o[0] + add) >> shift));
397*c83a76b0SSuyog Pawar pi2_tmp[1] =
398*c83a76b0SSuyog Pawar CLIP_S16(((e[1] + o[1] + add) >> shift));
399*c83a76b0SSuyog Pawar pi2_tmp[2] =
400*c83a76b0SSuyog Pawar CLIP_S16(((e[1] - o[1] + add) >> shift));
401*c83a76b0SSuyog Pawar pi2_tmp[3] =
402*c83a76b0SSuyog Pawar CLIP_S16(((e[0] - o[0] + add) >> shift));
403*c83a76b0SSuyog Pawar }
404*c83a76b0SSuyog Pawar pi2_src++;
405*c83a76b0SSuyog Pawar pi2_dequant_coeff++;
406*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
407*c83a76b0SSuyog Pawar zero_cols = zero_cols >> 1;
408*c83a76b0SSuyog Pawar }
409*c83a76b0SSuyog Pawar
410*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
411*c83a76b0SSuyog Pawar
412*c83a76b0SSuyog Pawar /* Inverse Transform 2nd stage */
413*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_2;
414*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
415*c83a76b0SSuyog Pawar
416*c83a76b0SSuyog Pawar for(j = 0; j < trans_size; j++)
417*c83a76b0SSuyog Pawar {
418*c83a76b0SSuyog Pawar WORD32 itrans_out;
419*c83a76b0SSuyog Pawar
420*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
421*c83a76b0SSuyog Pawar o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size]
422*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][0]
423*c83a76b0SSuyog Pawar * pi2_tmp[3 * trans_size];
424*c83a76b0SSuyog Pawar o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size]
425*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][1]
426*c83a76b0SSuyog Pawar * pi2_tmp[3 * trans_size];
427*c83a76b0SSuyog Pawar e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0]
428*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][0]
429*c83a76b0SSuyog Pawar * pi2_tmp[2 * trans_size];
430*c83a76b0SSuyog Pawar e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0]
431*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][1]
432*c83a76b0SSuyog Pawar * pi2_tmp[2 * trans_size];
433*c83a76b0SSuyog Pawar
434*c83a76b0SSuyog Pawar itrans_out =
435*c83a76b0SSuyog Pawar CLIP_S16(((e[0] + o[0] + add) >> shift));
436*c83a76b0SSuyog Pawar pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
437*c83a76b0SSuyog Pawar
438*c83a76b0SSuyog Pawar itrans_out =
439*c83a76b0SSuyog Pawar CLIP_S16(((e[1] + o[1] + add) >> shift));
440*c83a76b0SSuyog Pawar pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
441*c83a76b0SSuyog Pawar
442*c83a76b0SSuyog Pawar itrans_out =
443*c83a76b0SSuyog Pawar CLIP_S16(((e[1] - o[1] + add) >> shift));
444*c83a76b0SSuyog Pawar pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
445*c83a76b0SSuyog Pawar
446*c83a76b0SSuyog Pawar itrans_out =
447*c83a76b0SSuyog Pawar CLIP_S16(((e[0] - o[0] + add) >> shift));
448*c83a76b0SSuyog Pawar pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
449*c83a76b0SSuyog Pawar
450*c83a76b0SSuyog Pawar pi2_tmp++;
451*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
452*c83a76b0SSuyog Pawar pu1_dst += dst_strd;
453*c83a76b0SSuyog Pawar
454*c83a76b0SSuyog Pawar }
455*c83a76b0SSuyog Pawar }
456*c83a76b0SSuyog Pawar }
457