1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar *******************************************************************************
20*c83a76b0SSuyog Pawar * @file
21*c83a76b0SSuyog Pawar * ihevc_itrans_recon_8x8.c
22*c83a76b0SSuyog Pawar *
23*c83a76b0SSuyog Pawar * @brief
24*c83a76b0SSuyog Pawar * Contains function definitions for inverse transform and reconstruction 8x8
25*c83a76b0SSuyog Pawar *
26*c83a76b0SSuyog Pawar *
27*c83a76b0SSuyog Pawar * @author
28*c83a76b0SSuyog Pawar * 100470
29*c83a76b0SSuyog Pawar *
30*c83a76b0SSuyog Pawar * @par List of Functions:
31*c83a76b0SSuyog Pawar * - ihevc_itrans_recon_8x8()
32*c83a76b0SSuyog Pawar *
33*c83a76b0SSuyog Pawar * @remarks
34*c83a76b0SSuyog Pawar * None
35*c83a76b0SSuyog Pawar *
36*c83a76b0SSuyog Pawar *******************************************************************************
37*c83a76b0SSuyog Pawar */
38*c83a76b0SSuyog Pawar #include <stdio.h>
39*c83a76b0SSuyog Pawar #include <string.h>
40*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
41*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
42*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
43*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
44*c83a76b0SSuyog Pawar #include "ihevc_trans_tables.h"
45*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
46*c83a76b0SSuyog Pawar #include "ihevc_func_selector.h"
47*c83a76b0SSuyog Pawar #include "ihevc_trans_macros.h"
48*c83a76b0SSuyog Pawar
49*c83a76b0SSuyog Pawar /**
50*c83a76b0SSuyog Pawar *******************************************************************************
51*c83a76b0SSuyog Pawar *
52*c83a76b0SSuyog Pawar * @brief
53*c83a76b0SSuyog Pawar * This function performs Inverse transform and reconstruction for 8x8
54*c83a76b0SSuyog Pawar * input block
55*c83a76b0SSuyog Pawar *
56*c83a76b0SSuyog Pawar * @par Description:
57*c83a76b0SSuyog Pawar * Performs inverse transform and adds the prediction data and clips output
58*c83a76b0SSuyog Pawar * to 8 bit
59*c83a76b0SSuyog Pawar *
60*c83a76b0SSuyog Pawar * @param[in] pi2_src
61*c83a76b0SSuyog Pawar * Input 8x8 coefficients
62*c83a76b0SSuyog Pawar *
63*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
64*c83a76b0SSuyog Pawar * Temporary 8x8 buffer for storing inverse
65*c83a76b0SSuyog Pawar *
66*c83a76b0SSuyog Pawar * transform
67*c83a76b0SSuyog Pawar * 1st stage output
68*c83a76b0SSuyog Pawar *
69*c83a76b0SSuyog Pawar * @param[in] pu1_pred
70*c83a76b0SSuyog Pawar * Prediction 8x8 block
71*c83a76b0SSuyog Pawar *
72*c83a76b0SSuyog Pawar * @param[out] pu1_dst
73*c83a76b0SSuyog Pawar * Output 8x8 block
74*c83a76b0SSuyog Pawar *
75*c83a76b0SSuyog Pawar * @param[in] src_strd
76*c83a76b0SSuyog Pawar * Input stride
77*c83a76b0SSuyog Pawar *
78*c83a76b0SSuyog Pawar * @param[in] pred_strd
79*c83a76b0SSuyog Pawar * Prediction stride
80*c83a76b0SSuyog Pawar *
81*c83a76b0SSuyog Pawar * @param[in] dst_strd
82*c83a76b0SSuyog Pawar * Output Stride
83*c83a76b0SSuyog Pawar *
84*c83a76b0SSuyog Pawar * @param[in] shift
85*c83a76b0SSuyog Pawar * Output shift
86*c83a76b0SSuyog Pawar *
87*c83a76b0SSuyog Pawar * @param[in] zero_cols
88*c83a76b0SSuyog Pawar * Zero columns in pi2_src
89*c83a76b0SSuyog Pawar *
90*c83a76b0SSuyog Pawar * @returns Void
91*c83a76b0SSuyog Pawar *
92*c83a76b0SSuyog Pawar * @remarks
93*c83a76b0SSuyog Pawar * None
94*c83a76b0SSuyog Pawar *
95*c83a76b0SSuyog Pawar *******************************************************************************
96*c83a76b0SSuyog Pawar */
97*c83a76b0SSuyog Pawar
ihevc_itrans_recon_8x8(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)98*c83a76b0SSuyog Pawar void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
99*c83a76b0SSuyog Pawar WORD16 *pi2_tmp,
100*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
101*c83a76b0SSuyog Pawar UWORD8 *pu1_dst,
102*c83a76b0SSuyog Pawar WORD32 src_strd,
103*c83a76b0SSuyog Pawar WORD32 pred_strd,
104*c83a76b0SSuyog Pawar WORD32 dst_strd,
105*c83a76b0SSuyog Pawar WORD32 zero_cols,
106*c83a76b0SSuyog Pawar WORD32 zero_rows)
107*c83a76b0SSuyog Pawar {
108*c83a76b0SSuyog Pawar WORD32 j, k;
109*c83a76b0SSuyog Pawar WORD32 e[4], o[4];
110*c83a76b0SSuyog Pawar WORD32 ee[2], eo[2];
111*c83a76b0SSuyog Pawar WORD32 add;
112*c83a76b0SSuyog Pawar WORD32 shift;
113*c83a76b0SSuyog Pawar WORD16 *pi2_tmp_orig;
114*c83a76b0SSuyog Pawar WORD32 trans_size;
115*c83a76b0SSuyog Pawar WORD32 zero_rows_2nd_stage = zero_cols;
116*c83a76b0SSuyog Pawar WORD32 row_limit_2nd_stage;
117*c83a76b0SSuyog Pawar
118*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_8;
119*c83a76b0SSuyog Pawar
120*c83a76b0SSuyog Pawar pi2_tmp_orig = pi2_tmp;
121*c83a76b0SSuyog Pawar
122*c83a76b0SSuyog Pawar if((zero_cols & 0xF0) == 0xF0)
123*c83a76b0SSuyog Pawar row_limit_2nd_stage = 4;
124*c83a76b0SSuyog Pawar else
125*c83a76b0SSuyog Pawar row_limit_2nd_stage = TRANS_SIZE_8;
126*c83a76b0SSuyog Pawar
127*c83a76b0SSuyog Pawar
128*c83a76b0SSuyog Pawar if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
129*c83a76b0SSuyog Pawar {
130*c83a76b0SSuyog Pawar /************************************************************************************************/
131*c83a76b0SSuyog Pawar /**********************************START - IT_RECON_8x8******************************************/
132*c83a76b0SSuyog Pawar /************************************************************************************************/
133*c83a76b0SSuyog Pawar
134*c83a76b0SSuyog Pawar /* Inverse Transform 1st stage */
135*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_1;
136*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
137*c83a76b0SSuyog Pawar
138*c83a76b0SSuyog Pawar for(j = 0; j < row_limit_2nd_stage; j++)
139*c83a76b0SSuyog Pawar {
140*c83a76b0SSuyog Pawar /* Checking for Zero Cols */
141*c83a76b0SSuyog Pawar if((zero_cols & 1) == 1)
142*c83a76b0SSuyog Pawar {
143*c83a76b0SSuyog Pawar memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144*c83a76b0SSuyog Pawar }
145*c83a76b0SSuyog Pawar else
146*c83a76b0SSuyog Pawar {
147*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
149*c83a76b0SSuyog Pawar {
150*c83a76b0SSuyog Pawar o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
151*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][k]
152*c83a76b0SSuyog Pawar * pi2_src[3 * src_strd];
153*c83a76b0SSuyog Pawar }
154*c83a76b0SSuyog Pawar eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
155*c83a76b0SSuyog Pawar eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
156*c83a76b0SSuyog Pawar ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
157*c83a76b0SSuyog Pawar ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
158*c83a76b0SSuyog Pawar
159*c83a76b0SSuyog Pawar /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
160*c83a76b0SSuyog Pawar e[0] = ee[0] + eo[0];
161*c83a76b0SSuyog Pawar e[3] = ee[0] - eo[0];
162*c83a76b0SSuyog Pawar e[1] = ee[1] + eo[1];
163*c83a76b0SSuyog Pawar e[2] = ee[1] - eo[1];
164*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
165*c83a76b0SSuyog Pawar {
166*c83a76b0SSuyog Pawar pi2_tmp[k] =
167*c83a76b0SSuyog Pawar CLIP_S16(((e[k] + o[k] + add) >> shift));
168*c83a76b0SSuyog Pawar pi2_tmp[k + 4] =
169*c83a76b0SSuyog Pawar CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
170*c83a76b0SSuyog Pawar }
171*c83a76b0SSuyog Pawar }
172*c83a76b0SSuyog Pawar pi2_src++;
173*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
174*c83a76b0SSuyog Pawar zero_cols = zero_cols >> 1;
175*c83a76b0SSuyog Pawar }
176*c83a76b0SSuyog Pawar
177*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
178*c83a76b0SSuyog Pawar
179*c83a76b0SSuyog Pawar /* Inverse Transform 2nd stage */
180*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_2;
181*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
182*c83a76b0SSuyog Pawar if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
183*c83a76b0SSuyog Pawar {
184*c83a76b0SSuyog Pawar for(j = 0; j < trans_size; j++)
185*c83a76b0SSuyog Pawar {
186*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
187*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
188*c83a76b0SSuyog Pawar {
189*c83a76b0SSuyog Pawar o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
190*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
191*c83a76b0SSuyog Pawar }
192*c83a76b0SSuyog Pawar eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
193*c83a76b0SSuyog Pawar eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
194*c83a76b0SSuyog Pawar ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
195*c83a76b0SSuyog Pawar ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
196*c83a76b0SSuyog Pawar
197*c83a76b0SSuyog Pawar /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
198*c83a76b0SSuyog Pawar e[0] = ee[0] + eo[0];
199*c83a76b0SSuyog Pawar e[3] = ee[0] - eo[0];
200*c83a76b0SSuyog Pawar e[1] = ee[1] + eo[1];
201*c83a76b0SSuyog Pawar e[2] = ee[1] - eo[1];
202*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
203*c83a76b0SSuyog Pawar {
204*c83a76b0SSuyog Pawar WORD32 itrans_out;
205*c83a76b0SSuyog Pawar itrans_out =
206*c83a76b0SSuyog Pawar CLIP_S16(((e[k] + o[k] + add) >> shift));
207*c83a76b0SSuyog Pawar pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
208*c83a76b0SSuyog Pawar itrans_out =
209*c83a76b0SSuyog Pawar CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
210*c83a76b0SSuyog Pawar pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
211*c83a76b0SSuyog Pawar }
212*c83a76b0SSuyog Pawar pi2_tmp++;
213*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
214*c83a76b0SSuyog Pawar pu1_dst += dst_strd;
215*c83a76b0SSuyog Pawar }
216*c83a76b0SSuyog Pawar }
217*c83a76b0SSuyog Pawar else /* All rows of output of 1st stage are non-zero */
218*c83a76b0SSuyog Pawar {
219*c83a76b0SSuyog Pawar for(j = 0; j < trans_size; j++)
220*c83a76b0SSuyog Pawar {
221*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
222*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
223*c83a76b0SSuyog Pawar {
224*c83a76b0SSuyog Pawar o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
225*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][k]
226*c83a76b0SSuyog Pawar * pi2_tmp[3 * trans_size]
227*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][k]
228*c83a76b0SSuyog Pawar * pi2_tmp[5 * trans_size]
229*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][k]
230*c83a76b0SSuyog Pawar * pi2_tmp[7 * trans_size];
231*c83a76b0SSuyog Pawar }
232*c83a76b0SSuyog Pawar
233*c83a76b0SSuyog Pawar eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
234*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
235*c83a76b0SSuyog Pawar eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
236*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
237*c83a76b0SSuyog Pawar ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
238*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
239*c83a76b0SSuyog Pawar ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
240*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
241*c83a76b0SSuyog Pawar
242*c83a76b0SSuyog Pawar /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
243*c83a76b0SSuyog Pawar e[0] = ee[0] + eo[0];
244*c83a76b0SSuyog Pawar e[3] = ee[0] - eo[0];
245*c83a76b0SSuyog Pawar e[1] = ee[1] + eo[1];
246*c83a76b0SSuyog Pawar e[2] = ee[1] - eo[1];
247*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
248*c83a76b0SSuyog Pawar {
249*c83a76b0SSuyog Pawar WORD32 itrans_out;
250*c83a76b0SSuyog Pawar itrans_out =
251*c83a76b0SSuyog Pawar CLIP_S16(((e[k] + o[k] + add) >> shift));
252*c83a76b0SSuyog Pawar pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253*c83a76b0SSuyog Pawar itrans_out =
254*c83a76b0SSuyog Pawar CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
255*c83a76b0SSuyog Pawar pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
256*c83a76b0SSuyog Pawar }
257*c83a76b0SSuyog Pawar pi2_tmp++;
258*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
259*c83a76b0SSuyog Pawar pu1_dst += dst_strd;
260*c83a76b0SSuyog Pawar }
261*c83a76b0SSuyog Pawar }
262*c83a76b0SSuyog Pawar /************************************************************************************************/
263*c83a76b0SSuyog Pawar /************************************END - IT_RECON_8x8******************************************/
264*c83a76b0SSuyog Pawar /************************************************************************************************/
265*c83a76b0SSuyog Pawar }
266*c83a76b0SSuyog Pawar else /* All rows of input are non-zero */
267*c83a76b0SSuyog Pawar {
268*c83a76b0SSuyog Pawar /************************************************************************************************/
269*c83a76b0SSuyog Pawar /**********************************START - IT_RECON_8x8******************************************/
270*c83a76b0SSuyog Pawar /************************************************************************************************/
271*c83a76b0SSuyog Pawar
272*c83a76b0SSuyog Pawar /* Inverse Transform 1st stage */
273*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_1;
274*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
275*c83a76b0SSuyog Pawar
276*c83a76b0SSuyog Pawar for(j = 0; j < row_limit_2nd_stage; j++)
277*c83a76b0SSuyog Pawar {
278*c83a76b0SSuyog Pawar /* Checking for Zero Cols */
279*c83a76b0SSuyog Pawar if((zero_cols & 1) == 1)
280*c83a76b0SSuyog Pawar {
281*c83a76b0SSuyog Pawar memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
282*c83a76b0SSuyog Pawar }
283*c83a76b0SSuyog Pawar else
284*c83a76b0SSuyog Pawar {
285*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
286*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
287*c83a76b0SSuyog Pawar {
288*c83a76b0SSuyog Pawar o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
289*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][k]
290*c83a76b0SSuyog Pawar * pi2_src[3 * src_strd]
291*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][k]
292*c83a76b0SSuyog Pawar * pi2_src[5 * src_strd]
293*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][k]
294*c83a76b0SSuyog Pawar * pi2_src[7 * src_strd];
295*c83a76b0SSuyog Pawar }
296*c83a76b0SSuyog Pawar
297*c83a76b0SSuyog Pawar eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
298*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
299*c83a76b0SSuyog Pawar eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
300*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
301*c83a76b0SSuyog Pawar ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
302*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
303*c83a76b0SSuyog Pawar ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
304*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
305*c83a76b0SSuyog Pawar
306*c83a76b0SSuyog Pawar /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
307*c83a76b0SSuyog Pawar e[0] = ee[0] + eo[0];
308*c83a76b0SSuyog Pawar e[3] = ee[0] - eo[0];
309*c83a76b0SSuyog Pawar e[1] = ee[1] + eo[1];
310*c83a76b0SSuyog Pawar e[2] = ee[1] - eo[1];
311*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
312*c83a76b0SSuyog Pawar {
313*c83a76b0SSuyog Pawar pi2_tmp[k] =
314*c83a76b0SSuyog Pawar CLIP_S16(((e[k] + o[k] + add) >> shift));
315*c83a76b0SSuyog Pawar pi2_tmp[k + 4] =
316*c83a76b0SSuyog Pawar CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
317*c83a76b0SSuyog Pawar }
318*c83a76b0SSuyog Pawar }
319*c83a76b0SSuyog Pawar pi2_src++;
320*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
321*c83a76b0SSuyog Pawar zero_cols = zero_cols >> 1;
322*c83a76b0SSuyog Pawar }
323*c83a76b0SSuyog Pawar
324*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
325*c83a76b0SSuyog Pawar
326*c83a76b0SSuyog Pawar /* Inverse Transform 2nd stage */
327*c83a76b0SSuyog Pawar shift = IT_SHIFT_STAGE_2;
328*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
329*c83a76b0SSuyog Pawar if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
330*c83a76b0SSuyog Pawar {
331*c83a76b0SSuyog Pawar for(j = 0; j < trans_size; j++)
332*c83a76b0SSuyog Pawar {
333*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
334*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
335*c83a76b0SSuyog Pawar {
336*c83a76b0SSuyog Pawar o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
337*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
338*c83a76b0SSuyog Pawar }
339*c83a76b0SSuyog Pawar eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
340*c83a76b0SSuyog Pawar eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
341*c83a76b0SSuyog Pawar ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
342*c83a76b0SSuyog Pawar ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
343*c83a76b0SSuyog Pawar
344*c83a76b0SSuyog Pawar /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
345*c83a76b0SSuyog Pawar e[0] = ee[0] + eo[0];
346*c83a76b0SSuyog Pawar e[3] = ee[0] - eo[0];
347*c83a76b0SSuyog Pawar e[1] = ee[1] + eo[1];
348*c83a76b0SSuyog Pawar e[2] = ee[1] - eo[1];
349*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
350*c83a76b0SSuyog Pawar {
351*c83a76b0SSuyog Pawar WORD32 itrans_out;
352*c83a76b0SSuyog Pawar itrans_out =
353*c83a76b0SSuyog Pawar CLIP_S16(((e[k] + o[k] + add) >> shift));
354*c83a76b0SSuyog Pawar pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
355*c83a76b0SSuyog Pawar itrans_out =
356*c83a76b0SSuyog Pawar CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
357*c83a76b0SSuyog Pawar pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
358*c83a76b0SSuyog Pawar }
359*c83a76b0SSuyog Pawar pi2_tmp++;
360*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
361*c83a76b0SSuyog Pawar pu1_dst += dst_strd;
362*c83a76b0SSuyog Pawar }
363*c83a76b0SSuyog Pawar }
364*c83a76b0SSuyog Pawar else /* All rows of output of 1st stage are non-zero */
365*c83a76b0SSuyog Pawar {
366*c83a76b0SSuyog Pawar for(j = 0; j < trans_size; j++)
367*c83a76b0SSuyog Pawar {
368*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
369*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
370*c83a76b0SSuyog Pawar {
371*c83a76b0SSuyog Pawar o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
372*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][k]
373*c83a76b0SSuyog Pawar * pi2_tmp[3 * trans_size]
374*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][k]
375*c83a76b0SSuyog Pawar * pi2_tmp[5 * trans_size]
376*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][k]
377*c83a76b0SSuyog Pawar * pi2_tmp[7 * trans_size];
378*c83a76b0SSuyog Pawar }
379*c83a76b0SSuyog Pawar
380*c83a76b0SSuyog Pawar eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
381*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
382*c83a76b0SSuyog Pawar eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
383*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
384*c83a76b0SSuyog Pawar ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
385*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
386*c83a76b0SSuyog Pawar ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
387*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
388*c83a76b0SSuyog Pawar
389*c83a76b0SSuyog Pawar /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
390*c83a76b0SSuyog Pawar e[0] = ee[0] + eo[0];
391*c83a76b0SSuyog Pawar e[3] = ee[0] - eo[0];
392*c83a76b0SSuyog Pawar e[1] = ee[1] + eo[1];
393*c83a76b0SSuyog Pawar e[2] = ee[1] - eo[1];
394*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
395*c83a76b0SSuyog Pawar {
396*c83a76b0SSuyog Pawar WORD32 itrans_out;
397*c83a76b0SSuyog Pawar itrans_out =
398*c83a76b0SSuyog Pawar CLIP_S16(((e[k] + o[k] + add) >> shift));
399*c83a76b0SSuyog Pawar pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
400*c83a76b0SSuyog Pawar itrans_out =
401*c83a76b0SSuyog Pawar CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
402*c83a76b0SSuyog Pawar pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
403*c83a76b0SSuyog Pawar }
404*c83a76b0SSuyog Pawar pi2_tmp++;
405*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
406*c83a76b0SSuyog Pawar pu1_dst += dst_strd;
407*c83a76b0SSuyog Pawar }
408*c83a76b0SSuyog Pawar }
409*c83a76b0SSuyog Pawar /************************************************************************************************/
410*c83a76b0SSuyog Pawar /************************************END - IT_RECON_8x8******************************************/
411*c83a76b0SSuyog Pawar /************************************************************************************************/
412*c83a76b0SSuyog Pawar }
413*c83a76b0SSuyog Pawar }
414*c83a76b0SSuyog Pawar
415