1*a97c2a1fSXin Li /******************************************************************************
2*a97c2a1fSXin Li *
3*a97c2a1fSXin Li * Copyright (C) 2015 The Android Open Source Project
4*a97c2a1fSXin Li *
5*a97c2a1fSXin Li * Licensed under the Apache License, Version 2.0 (the "License");
6*a97c2a1fSXin Li * you may not use this file except in compliance with the License.
7*a97c2a1fSXin Li * You may obtain a copy of the License at:
8*a97c2a1fSXin Li *
9*a97c2a1fSXin Li * http://www.apache.org/licenses/LICENSE-2.0
10*a97c2a1fSXin Li *
11*a97c2a1fSXin Li * Unless required by applicable law or agreed to in writing, software
12*a97c2a1fSXin Li * distributed under the License is distributed on an "AS IS" BASIS,
13*a97c2a1fSXin Li * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*a97c2a1fSXin Li * See the License for the specific language governing permissions and
15*a97c2a1fSXin Li * limitations under the License.
16*a97c2a1fSXin Li *
17*a97c2a1fSXin Li *****************************************************************************
18*a97c2a1fSXin Li * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*a97c2a1fSXin Li */
20*a97c2a1fSXin Li /*****************************************************************************/
21*a97c2a1fSXin Li /* */
22*a97c2a1fSXin Li /* File Name : impeg2_idct.c */
23*a97c2a1fSXin Li /* */
24*a97c2a1fSXin Li /* Description : Contains 2d idct and invese quantization functions */
25*a97c2a1fSXin Li /* */
26*a97c2a1fSXin Li /* List of Functions : impeg2_idct_recon_dc() */
27*a97c2a1fSXin Li /* impeg2_idct_recon_dc_mismatch() */
28*a97c2a1fSXin Li /* impeg2_idct_recon() */
29*a97c2a1fSXin Li /* */
30*a97c2a1fSXin Li /* Issues / Problems : None */
31*a97c2a1fSXin Li /* */
32*a97c2a1fSXin Li /* Revision History : */
33*a97c2a1fSXin Li /* */
34*a97c2a1fSXin Li /* DD MM YYYY Author(s) Changes */
35*a97c2a1fSXin Li /* 10 09 2005 Hairsh M First Version */
36*a97c2a1fSXin Li /* */
37*a97c2a1fSXin Li /*****************************************************************************/
38*a97c2a1fSXin Li /*
39*a97c2a1fSXin Li IEEE - 1180 results for this IDCT
40*a97c2a1fSXin Li L 256 256 5 5 300 300 384 384 Thresholds
41*a97c2a1fSXin Li H 255 255 5 5 300 300 383 383
42*a97c2a1fSXin Li sign 1 -1 1 -1 1 -1 1 -1
43*a97c2a1fSXin Li Peak Error 1 1 1 1 1 1 1 1 1
44*a97c2a1fSXin Li Peak Mean Square Error 0.0191 0.0188 0.0108 0.0111 0.0176 0.0188 0.0165 0.0177 0.06
45*a97c2a1fSXin Li Overall Mean Square Error 0.01566406 0.01597656 0.0091875 0.00908906 0.01499063 0.01533281 0.01432344 0.01412344 0.02
46*a97c2a1fSXin Li Peak Mean Error 0.0027 0.0026 0.0028 0.002 0.0017 0.0033 0.0031 0.0025 0.015
47*a97c2a1fSXin Li Overall Mean Error 0.00002656 -0.00031406 0.00016875 0.00005469 -0.00003125 0.00011406 0.00009219 0.00004219 0.0015
48*a97c2a1fSXin Li */
49*a97c2a1fSXin Li #include <stdio.h>
50*a97c2a1fSXin Li #include <string.h>
51*a97c2a1fSXin Li
52*a97c2a1fSXin Li #include "iv_datatypedef.h"
53*a97c2a1fSXin Li #include "iv.h"
54*a97c2a1fSXin Li #include "impeg2_defs.h"
55*a97c2a1fSXin Li #include "impeg2_platform_macros.h"
56*a97c2a1fSXin Li
57*a97c2a1fSXin Li #include "impeg2_macros.h"
58*a97c2a1fSXin Li #include "impeg2_globals.h"
59*a97c2a1fSXin Li #include "impeg2_idct.h"
60*a97c2a1fSXin Li
61*a97c2a1fSXin Li
impeg2_idct_recon_dc(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 i4_src_strd,WORD32 i4_pred_strd,WORD32 i4_dst_strd,WORD32 i4_zero_cols,WORD32 i4_zero_rows)62*a97c2a1fSXin Li void impeg2_idct_recon_dc(WORD16 *pi2_src,
63*a97c2a1fSXin Li WORD16 *pi2_tmp,
64*a97c2a1fSXin Li UWORD8 *pu1_pred,
65*a97c2a1fSXin Li UWORD8 *pu1_dst,
66*a97c2a1fSXin Li WORD32 i4_src_strd,
67*a97c2a1fSXin Li WORD32 i4_pred_strd,
68*a97c2a1fSXin Li WORD32 i4_dst_strd,
69*a97c2a1fSXin Li WORD32 i4_zero_cols,
70*a97c2a1fSXin Li WORD32 i4_zero_rows)
71*a97c2a1fSXin Li {
72*a97c2a1fSXin Li WORD32 i4_val, i, j;
73*a97c2a1fSXin Li
74*a97c2a1fSXin Li UNUSED(pi2_tmp);
75*a97c2a1fSXin Li UNUSED(i4_src_strd);
76*a97c2a1fSXin Li UNUSED(i4_zero_cols);
77*a97c2a1fSXin Li UNUSED(i4_zero_rows);
78*a97c2a1fSXin Li
79*a97c2a1fSXin Li i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
80*a97c2a1fSXin Li i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
81*a97c2a1fSXin Li i4_val = i4_val * gai2_impeg2_idct_q11[0];
82*a97c2a1fSXin Li i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
83*a97c2a1fSXin Li
84*a97c2a1fSXin Li for(i = 0; i < TRANS_SIZE_8; i++)
85*a97c2a1fSXin Li {
86*a97c2a1fSXin Li for(j = 0; j < TRANS_SIZE_8; j++)
87*a97c2a1fSXin Li {
88*a97c2a1fSXin Li pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
89*a97c2a1fSXin Li }
90*a97c2a1fSXin Li pu1_dst += i4_dst_strd;
91*a97c2a1fSXin Li pu1_pred += i4_pred_strd;
92*a97c2a1fSXin Li }
93*a97c2a1fSXin Li }
impeg2_idct_recon_dc_mismatch(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 i4_src_strd,WORD32 i4_pred_strd,WORD32 i4_dst_strd,WORD32 i4_zero_cols,WORD32 i4_zero_rows)94*a97c2a1fSXin Li void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
95*a97c2a1fSXin Li WORD16 *pi2_tmp,
96*a97c2a1fSXin Li UWORD8 *pu1_pred,
97*a97c2a1fSXin Li UWORD8 *pu1_dst,
98*a97c2a1fSXin Li WORD32 i4_src_strd,
99*a97c2a1fSXin Li WORD32 i4_pred_strd,
100*a97c2a1fSXin Li WORD32 i4_dst_strd,
101*a97c2a1fSXin Li WORD32 i4_zero_cols,
102*a97c2a1fSXin Li WORD32 i4_zero_rows)
103*a97c2a1fSXin Li
104*a97c2a1fSXin Li {
105*a97c2a1fSXin Li WORD32 i4_val, i, j;
106*a97c2a1fSXin Li WORD32 i4_count = 0;
107*a97c2a1fSXin Li WORD32 i4_sum;
108*a97c2a1fSXin Li
109*a97c2a1fSXin Li UNUSED(pi2_tmp);
110*a97c2a1fSXin Li UNUSED(i4_src_strd);
111*a97c2a1fSXin Li UNUSED(i4_zero_cols);
112*a97c2a1fSXin Li UNUSED(i4_zero_rows);
113*a97c2a1fSXin Li
114*a97c2a1fSXin Li i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
115*a97c2a1fSXin Li i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
116*a97c2a1fSXin Li
117*a97c2a1fSXin Li i4_val *= gai2_impeg2_idct_q11[0];
118*a97c2a1fSXin Li for(i = 0; i < TRANS_SIZE_8; i++)
119*a97c2a1fSXin Li {
120*a97c2a1fSXin Li for (j = 0; j < TRANS_SIZE_8; j++)
121*a97c2a1fSXin Li {
122*a97c2a1fSXin Li i4_sum = i4_val;
123*a97c2a1fSXin Li i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
124*a97c2a1fSXin Li i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
125*a97c2a1fSXin Li i4_sum += pu1_pred[j];
126*a97c2a1fSXin Li pu1_dst[j] = CLIP_U8(i4_sum);
127*a97c2a1fSXin Li i4_count++;
128*a97c2a1fSXin Li }
129*a97c2a1fSXin Li
130*a97c2a1fSXin Li pu1_dst += i4_dst_strd;
131*a97c2a1fSXin Li pu1_pred += i4_pred_strd;
132*a97c2a1fSXin Li }
133*a97c2a1fSXin Li
134*a97c2a1fSXin Li }
135*a97c2a1fSXin Li /**
136*a97c2a1fSXin Li *******************************************************************************
137*a97c2a1fSXin Li *
138*a97c2a1fSXin Li * @brief
139*a97c2a1fSXin Li * This function performs Inverse transform and reconstruction for 8x8
140*a97c2a1fSXin Li * input block
141*a97c2a1fSXin Li *
142*a97c2a1fSXin Li * @par Description:
143*a97c2a1fSXin Li * Performs inverse transform and adds the prediction data and clips output
144*a97c2a1fSXin Li * to 8 bit
145*a97c2a1fSXin Li *
146*a97c2a1fSXin Li * @param[in] pi2_src
147*a97c2a1fSXin Li * Input 8x8 coefficients
148*a97c2a1fSXin Li *
149*a97c2a1fSXin Li * @param[in] pi2_tmp
150*a97c2a1fSXin Li * Temporary 8x8 buffer for storing inverse
151*a97c2a1fSXin Li *
152*a97c2a1fSXin Li * transform
153*a97c2a1fSXin Li * 1st stage output
154*a97c2a1fSXin Li *
155*a97c2a1fSXin Li * @param[in] pu1_pred
156*a97c2a1fSXin Li * Prediction 8x8 block
157*a97c2a1fSXin Li *
158*a97c2a1fSXin Li * @param[out] pu1_dst
159*a97c2a1fSXin Li * Output 8x8 block
160*a97c2a1fSXin Li *
161*a97c2a1fSXin Li * @param[in] src_strd
162*a97c2a1fSXin Li * Input stride
163*a97c2a1fSXin Li *
164*a97c2a1fSXin Li * @param[in] pred_strd
165*a97c2a1fSXin Li * Prediction stride
166*a97c2a1fSXin Li *
167*a97c2a1fSXin Li * @param[in] dst_strd
168*a97c2a1fSXin Li * Output Stride
169*a97c2a1fSXin Li *
170*a97c2a1fSXin Li * @param[in] shift
171*a97c2a1fSXin Li * Output shift
172*a97c2a1fSXin Li *
173*a97c2a1fSXin Li * @param[in] zero_cols
174*a97c2a1fSXin Li * Zero columns in pi2_src
175*a97c2a1fSXin Li *
176*a97c2a1fSXin Li * @returns Void
177*a97c2a1fSXin Li *
178*a97c2a1fSXin Li * @remarks
179*a97c2a1fSXin Li * None
180*a97c2a1fSXin Li *
181*a97c2a1fSXin Li *******************************************************************************
182*a97c2a1fSXin Li */
183*a97c2a1fSXin Li
impeg2_idct_recon(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 i4_src_strd,WORD32 i4_pred_strd,WORD32 i4_dst_strd,WORD32 i4_zero_cols,WORD32 i4_zero_rows)184*a97c2a1fSXin Li void impeg2_idct_recon(WORD16 *pi2_src,
185*a97c2a1fSXin Li WORD16 *pi2_tmp,
186*a97c2a1fSXin Li UWORD8 *pu1_pred,
187*a97c2a1fSXin Li UWORD8 *pu1_dst,
188*a97c2a1fSXin Li WORD32 i4_src_strd,
189*a97c2a1fSXin Li WORD32 i4_pred_strd,
190*a97c2a1fSXin Li WORD32 i4_dst_strd,
191*a97c2a1fSXin Li WORD32 i4_zero_cols,
192*a97c2a1fSXin Li WORD32 i4_zero_rows)
193*a97c2a1fSXin Li {
194*a97c2a1fSXin Li WORD32 j, k;
195*a97c2a1fSXin Li WORD32 ai4_e[4], ai4_o[4];
196*a97c2a1fSXin Li WORD32 ai4_ee[2], ai4_eo[2];
197*a97c2a1fSXin Li WORD32 i4_add;
198*a97c2a1fSXin Li WORD32 i4_shift;
199*a97c2a1fSXin Li WORD16 *pi2_tmp_orig;
200*a97c2a1fSXin Li WORD32 i4_trans_size;
201*a97c2a1fSXin Li WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
202*a97c2a1fSXin Li WORD32 i4_row_limit_2nd_stage;
203*a97c2a1fSXin Li
204*a97c2a1fSXin Li i4_trans_size = TRANS_SIZE_8;
205*a97c2a1fSXin Li
206*a97c2a1fSXin Li pi2_tmp_orig = pi2_tmp;
207*a97c2a1fSXin Li
208*a97c2a1fSXin Li if((i4_zero_cols & 0xF0) == 0xF0)
209*a97c2a1fSXin Li i4_row_limit_2nd_stage = 4;
210*a97c2a1fSXin Li else
211*a97c2a1fSXin Li i4_row_limit_2nd_stage = TRANS_SIZE_8;
212*a97c2a1fSXin Li
213*a97c2a1fSXin Li
214*a97c2a1fSXin Li if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
215*a97c2a1fSXin Li {
216*a97c2a1fSXin Li /************************************************************************************************/
217*a97c2a1fSXin Li /**********************************START - IT_RECON_8x8******************************************/
218*a97c2a1fSXin Li /************************************************************************************************/
219*a97c2a1fSXin Li
220*a97c2a1fSXin Li /* Inverse Transform 1st stage */
221*a97c2a1fSXin Li i4_shift = IDCT_STG1_SHIFT;
222*a97c2a1fSXin Li i4_add = 1 << (i4_shift - 1);
223*a97c2a1fSXin Li
224*a97c2a1fSXin Li for(j = 0; j < i4_row_limit_2nd_stage; j++)
225*a97c2a1fSXin Li {
226*a97c2a1fSXin Li /* Checking for Zero Cols */
227*a97c2a1fSXin Li if((i4_zero_cols & 1) == 1)
228*a97c2a1fSXin Li {
229*a97c2a1fSXin Li memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
230*a97c2a1fSXin Li }
231*a97c2a1fSXin Li else
232*a97c2a1fSXin Li {
233*a97c2a1fSXin Li /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
234*a97c2a1fSXin Li for(k = 0; k < 4; k++)
235*a97c2a1fSXin Li {
236*a97c2a1fSXin Li ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
237*a97c2a1fSXin Li + gai2_impeg2_idct_q15[3 * 8 + k]
238*a97c2a1fSXin Li * pi2_src[3 * i4_src_strd];
239*a97c2a1fSXin Li }
240*a97c2a1fSXin Li ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
241*a97c2a1fSXin Li ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
242*a97c2a1fSXin Li ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
243*a97c2a1fSXin Li ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
244*a97c2a1fSXin Li
245*a97c2a1fSXin Li /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
246*a97c2a1fSXin Li ai4_e[0] = ai4_ee[0] + ai4_eo[0];
247*a97c2a1fSXin Li ai4_e[3] = ai4_ee[0] - ai4_eo[0];
248*a97c2a1fSXin Li ai4_e[1] = ai4_ee[1] + ai4_eo[1];
249*a97c2a1fSXin Li ai4_e[2] = ai4_ee[1] - ai4_eo[1];
250*a97c2a1fSXin Li for(k = 0; k < 4; k++)
251*a97c2a1fSXin Li {
252*a97c2a1fSXin Li pi2_tmp[k] =
253*a97c2a1fSXin Li CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
254*a97c2a1fSXin Li pi2_tmp[k + 4] =
255*a97c2a1fSXin Li CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
256*a97c2a1fSXin Li }
257*a97c2a1fSXin Li }
258*a97c2a1fSXin Li pi2_src++;
259*a97c2a1fSXin Li pi2_tmp += i4_trans_size;
260*a97c2a1fSXin Li i4_zero_cols = i4_zero_cols >> 1;
261*a97c2a1fSXin Li }
262*a97c2a1fSXin Li
263*a97c2a1fSXin Li pi2_tmp = pi2_tmp_orig;
264*a97c2a1fSXin Li
265*a97c2a1fSXin Li /* Inverse Transform 2nd stage */
266*a97c2a1fSXin Li i4_shift = IDCT_STG2_SHIFT;
267*a97c2a1fSXin Li i4_add = 1 << (i4_shift - 1);
268*a97c2a1fSXin Li if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
269*a97c2a1fSXin Li {
270*a97c2a1fSXin Li for(j = 0; j < i4_trans_size; j++)
271*a97c2a1fSXin Li {
272*a97c2a1fSXin Li /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
273*a97c2a1fSXin Li for(k = 0; k < 4; k++)
274*a97c2a1fSXin Li {
275*a97c2a1fSXin Li ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
276*a97c2a1fSXin Li + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
277*a97c2a1fSXin Li }
278*a97c2a1fSXin Li ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
279*a97c2a1fSXin Li ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
280*a97c2a1fSXin Li ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
281*a97c2a1fSXin Li ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
282*a97c2a1fSXin Li
283*a97c2a1fSXin Li /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
284*a97c2a1fSXin Li ai4_e[0] = ai4_ee[0] + ai4_eo[0];
285*a97c2a1fSXin Li ai4_e[3] = ai4_ee[0] - ai4_eo[0];
286*a97c2a1fSXin Li ai4_e[1] = ai4_ee[1] + ai4_eo[1];
287*a97c2a1fSXin Li ai4_e[2] = ai4_ee[1] - ai4_eo[1];
288*a97c2a1fSXin Li for(k = 0; k < 4; k++)
289*a97c2a1fSXin Li {
290*a97c2a1fSXin Li WORD32 itrans_out;
291*a97c2a1fSXin Li itrans_out =
292*a97c2a1fSXin Li CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
293*a97c2a1fSXin Li pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
294*a97c2a1fSXin Li itrans_out =
295*a97c2a1fSXin Li CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
296*a97c2a1fSXin Li pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
297*a97c2a1fSXin Li }
298*a97c2a1fSXin Li pi2_tmp++;
299*a97c2a1fSXin Li pu1_pred += i4_pred_strd;
300*a97c2a1fSXin Li pu1_dst += i4_dst_strd;
301*a97c2a1fSXin Li }
302*a97c2a1fSXin Li }
303*a97c2a1fSXin Li else /* All rows of output of 1st stage are non-zero */
304*a97c2a1fSXin Li {
305*a97c2a1fSXin Li for(j = 0; j < i4_trans_size; j++)
306*a97c2a1fSXin Li {
307*a97c2a1fSXin Li /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
308*a97c2a1fSXin Li for(k = 0; k < 4; k++)
309*a97c2a1fSXin Li {
310*a97c2a1fSXin Li ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
311*a97c2a1fSXin Li + gai2_impeg2_idct_q11[3 * 8 + k]
312*a97c2a1fSXin Li * pi2_tmp[3 * i4_trans_size]
313*a97c2a1fSXin Li + gai2_impeg2_idct_q11[5 * 8 + k]
314*a97c2a1fSXin Li * pi2_tmp[5 * i4_trans_size]
315*a97c2a1fSXin Li + gai2_impeg2_idct_q11[7 * 8 + k]
316*a97c2a1fSXin Li * pi2_tmp[7 * i4_trans_size];
317*a97c2a1fSXin Li }
318*a97c2a1fSXin Li
319*a97c2a1fSXin Li ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
320*a97c2a1fSXin Li + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
321*a97c2a1fSXin Li ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
322*a97c2a1fSXin Li + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
323*a97c2a1fSXin Li ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
324*a97c2a1fSXin Li + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
325*a97c2a1fSXin Li ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
326*a97c2a1fSXin Li + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
327*a97c2a1fSXin Li
328*a97c2a1fSXin Li /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
329*a97c2a1fSXin Li ai4_e[0] = ai4_ee[0] + ai4_eo[0];
330*a97c2a1fSXin Li ai4_e[3] = ai4_ee[0] - ai4_eo[0];
331*a97c2a1fSXin Li ai4_e[1] = ai4_ee[1] + ai4_eo[1];
332*a97c2a1fSXin Li ai4_e[2] = ai4_ee[1] - ai4_eo[1];
333*a97c2a1fSXin Li for(k = 0; k < 4; k++)
334*a97c2a1fSXin Li {
335*a97c2a1fSXin Li WORD32 itrans_out;
336*a97c2a1fSXin Li itrans_out =
337*a97c2a1fSXin Li CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
338*a97c2a1fSXin Li pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
339*a97c2a1fSXin Li itrans_out =
340*a97c2a1fSXin Li CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
341*a97c2a1fSXin Li pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
342*a97c2a1fSXin Li }
343*a97c2a1fSXin Li pi2_tmp++;
344*a97c2a1fSXin Li pu1_pred += i4_pred_strd;
345*a97c2a1fSXin Li pu1_dst += i4_dst_strd;
346*a97c2a1fSXin Li }
347*a97c2a1fSXin Li }
348*a97c2a1fSXin Li /************************************************************************************************/
349*a97c2a1fSXin Li /************************************END - IT_RECON_8x8******************************************/
350*a97c2a1fSXin Li /************************************************************************************************/
351*a97c2a1fSXin Li }
352*a97c2a1fSXin Li else /* All rows of input are non-zero */
353*a97c2a1fSXin Li {
354*a97c2a1fSXin Li /************************************************************************************************/
355*a97c2a1fSXin Li /**********************************START - IT_RECON_8x8******************************************/
356*a97c2a1fSXin Li /************************************************************************************************/
357*a97c2a1fSXin Li
358*a97c2a1fSXin Li /* Inverse Transform 1st stage */
359*a97c2a1fSXin Li i4_shift = IDCT_STG1_SHIFT;
360*a97c2a1fSXin Li i4_add = 1 << (i4_shift - 1);
361*a97c2a1fSXin Li
362*a97c2a1fSXin Li for(j = 0; j < i4_row_limit_2nd_stage; j++)
363*a97c2a1fSXin Li {
364*a97c2a1fSXin Li /* Checking for Zero Cols */
365*a97c2a1fSXin Li if((i4_zero_cols & 1) == 1)
366*a97c2a1fSXin Li {
367*a97c2a1fSXin Li memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
368*a97c2a1fSXin Li }
369*a97c2a1fSXin Li else
370*a97c2a1fSXin Li {
371*a97c2a1fSXin Li /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
372*a97c2a1fSXin Li for(k = 0; k < 4; k++)
373*a97c2a1fSXin Li {
374*a97c2a1fSXin Li ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
375*a97c2a1fSXin Li + gai2_impeg2_idct_q15[3 * 8 + k]
376*a97c2a1fSXin Li * pi2_src[3 * i4_src_strd]
377*a97c2a1fSXin Li + gai2_impeg2_idct_q15[5 * 8 + k]
378*a97c2a1fSXin Li * pi2_src[5 * i4_src_strd]
379*a97c2a1fSXin Li + gai2_impeg2_idct_q15[7 * 8 + k]
380*a97c2a1fSXin Li * pi2_src[7 * i4_src_strd];
381*a97c2a1fSXin Li }
382*a97c2a1fSXin Li
383*a97c2a1fSXin Li ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
384*a97c2a1fSXin Li + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
385*a97c2a1fSXin Li ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
386*a97c2a1fSXin Li + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
387*a97c2a1fSXin Li ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
388*a97c2a1fSXin Li + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
389*a97c2a1fSXin Li ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
390*a97c2a1fSXin Li + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
391*a97c2a1fSXin Li
392*a97c2a1fSXin Li /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
393*a97c2a1fSXin Li ai4_e[0] = ai4_ee[0] + ai4_eo[0];
394*a97c2a1fSXin Li ai4_e[3] = ai4_ee[0] - ai4_eo[0];
395*a97c2a1fSXin Li ai4_e[1] = ai4_ee[1] + ai4_eo[1];
396*a97c2a1fSXin Li ai4_e[2] = ai4_ee[1] - ai4_eo[1];
397*a97c2a1fSXin Li for(k = 0; k < 4; k++)
398*a97c2a1fSXin Li {
399*a97c2a1fSXin Li pi2_tmp[k] =
400*a97c2a1fSXin Li CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
401*a97c2a1fSXin Li pi2_tmp[k + 4] =
402*a97c2a1fSXin Li CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
403*a97c2a1fSXin Li }
404*a97c2a1fSXin Li }
405*a97c2a1fSXin Li pi2_src++;
406*a97c2a1fSXin Li pi2_tmp += i4_trans_size;
407*a97c2a1fSXin Li i4_zero_cols = i4_zero_cols >> 1;
408*a97c2a1fSXin Li }
409*a97c2a1fSXin Li
410*a97c2a1fSXin Li pi2_tmp = pi2_tmp_orig;
411*a97c2a1fSXin Li
412*a97c2a1fSXin Li /* Inverse Transform 2nd stage */
413*a97c2a1fSXin Li i4_shift = IDCT_STG2_SHIFT;
414*a97c2a1fSXin Li i4_add = 1 << (i4_shift - 1);
415*a97c2a1fSXin Li if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
416*a97c2a1fSXin Li {
417*a97c2a1fSXin Li for(j = 0; j < i4_trans_size; j++)
418*a97c2a1fSXin Li {
419*a97c2a1fSXin Li /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
420*a97c2a1fSXin Li for(k = 0; k < 4; k++)
421*a97c2a1fSXin Li {
422*a97c2a1fSXin Li ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
423*a97c2a1fSXin Li + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
424*a97c2a1fSXin Li }
425*a97c2a1fSXin Li ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
426*a97c2a1fSXin Li ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
427*a97c2a1fSXin Li ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
428*a97c2a1fSXin Li ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
429*a97c2a1fSXin Li
430*a97c2a1fSXin Li /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
431*a97c2a1fSXin Li ai4_e[0] = ai4_ee[0] + ai4_eo[0];
432*a97c2a1fSXin Li ai4_e[3] = ai4_ee[0] - ai4_eo[0];
433*a97c2a1fSXin Li ai4_e[1] = ai4_ee[1] + ai4_eo[1];
434*a97c2a1fSXin Li ai4_e[2] = ai4_ee[1] - ai4_eo[1];
435*a97c2a1fSXin Li for(k = 0; k < 4; k++)
436*a97c2a1fSXin Li {
437*a97c2a1fSXin Li WORD32 itrans_out;
438*a97c2a1fSXin Li itrans_out =
439*a97c2a1fSXin Li CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
440*a97c2a1fSXin Li pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
441*a97c2a1fSXin Li itrans_out =
442*a97c2a1fSXin Li CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
443*a97c2a1fSXin Li pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
444*a97c2a1fSXin Li }
445*a97c2a1fSXin Li pi2_tmp++;
446*a97c2a1fSXin Li pu1_pred += i4_pred_strd;
447*a97c2a1fSXin Li pu1_dst += i4_dst_strd;
448*a97c2a1fSXin Li }
449*a97c2a1fSXin Li }
450*a97c2a1fSXin Li else /* All rows of output of 1st stage are non-zero */
451*a97c2a1fSXin Li {
452*a97c2a1fSXin Li for(j = 0; j < i4_trans_size; j++)
453*a97c2a1fSXin Li {
454*a97c2a1fSXin Li /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
455*a97c2a1fSXin Li for(k = 0; k < 4; k++)
456*a97c2a1fSXin Li {
457*a97c2a1fSXin Li ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
458*a97c2a1fSXin Li + gai2_impeg2_idct_q11[3 * 8 + k]
459*a97c2a1fSXin Li * pi2_tmp[3 * i4_trans_size]
460*a97c2a1fSXin Li + gai2_impeg2_idct_q11[5 * 8 + k]
461*a97c2a1fSXin Li * pi2_tmp[5 * i4_trans_size]
462*a97c2a1fSXin Li + gai2_impeg2_idct_q11[7 * 8 + k]
463*a97c2a1fSXin Li * pi2_tmp[7 * i4_trans_size];
464*a97c2a1fSXin Li }
465*a97c2a1fSXin Li
466*a97c2a1fSXin Li ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
467*a97c2a1fSXin Li + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
468*a97c2a1fSXin Li ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
469*a97c2a1fSXin Li + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
470*a97c2a1fSXin Li ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
471*a97c2a1fSXin Li + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
472*a97c2a1fSXin Li ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
473*a97c2a1fSXin Li + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
474*a97c2a1fSXin Li
475*a97c2a1fSXin Li /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
476*a97c2a1fSXin Li ai4_e[0] = ai4_ee[0] + ai4_eo[0];
477*a97c2a1fSXin Li ai4_e[3] = ai4_ee[0] - ai4_eo[0];
478*a97c2a1fSXin Li ai4_e[1] = ai4_ee[1] + ai4_eo[1];
479*a97c2a1fSXin Li ai4_e[2] = ai4_ee[1] - ai4_eo[1];
480*a97c2a1fSXin Li for(k = 0; k < 4; k++)
481*a97c2a1fSXin Li {
482*a97c2a1fSXin Li WORD32 itrans_out;
483*a97c2a1fSXin Li itrans_out =
484*a97c2a1fSXin Li CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
485*a97c2a1fSXin Li pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
486*a97c2a1fSXin Li itrans_out =
487*a97c2a1fSXin Li CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
488*a97c2a1fSXin Li pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
489*a97c2a1fSXin Li }
490*a97c2a1fSXin Li pi2_tmp++;
491*a97c2a1fSXin Li pu1_pred += i4_pred_strd;
492*a97c2a1fSXin Li pu1_dst += i4_dst_strd;
493*a97c2a1fSXin Li }
494*a97c2a1fSXin Li }
495*a97c2a1fSXin Li /************************************************************************************************/
496*a97c2a1fSXin Li /************************************END - IT_RECON_8x8******************************************/
497*a97c2a1fSXin Li /************************************************************************************************/
498*a97c2a1fSXin Li }
499*a97c2a1fSXin Li }
500*a97c2a1fSXin Li
501