xref: /aosp_15_r20/external/libmpeg2/common/impeg2_idct.c (revision a97c2a1f0a796dc32bed80d3353c69c5fc07c750)
1*a97c2a1fSXin Li /******************************************************************************
2*a97c2a1fSXin Li  *
3*a97c2a1fSXin Li  * Copyright (C) 2015 The Android Open Source Project
4*a97c2a1fSXin Li  *
5*a97c2a1fSXin Li  * Licensed under the Apache License, Version 2.0 (the "License");
6*a97c2a1fSXin Li  * you may not use this file except in compliance with the License.
7*a97c2a1fSXin Li  * You may obtain a copy of the License at:
8*a97c2a1fSXin Li  *
9*a97c2a1fSXin Li  * http://www.apache.org/licenses/LICENSE-2.0
10*a97c2a1fSXin Li  *
11*a97c2a1fSXin Li  * Unless required by applicable law or agreed to in writing, software
12*a97c2a1fSXin Li  * distributed under the License is distributed on an "AS IS" BASIS,
13*a97c2a1fSXin Li  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*a97c2a1fSXin Li  * See the License for the specific language governing permissions and
15*a97c2a1fSXin Li  * limitations under the License.
16*a97c2a1fSXin Li  *
17*a97c2a1fSXin Li  *****************************************************************************
18*a97c2a1fSXin Li  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*a97c2a1fSXin Li */
20*a97c2a1fSXin Li /*****************************************************************************/
21*a97c2a1fSXin Li /*                                                                           */
22*a97c2a1fSXin Li /*  File Name         : impeg2_idct.c                                        */
23*a97c2a1fSXin Li /*                                                                           */
24*a97c2a1fSXin Li /*  Description       : Contains 2d idct and invese quantization functions   */
25*a97c2a1fSXin Li /*                                                                           */
26*a97c2a1fSXin Li /*  List of Functions : impeg2_idct_recon_dc()                               */
27*a97c2a1fSXin Li /*                      impeg2_idct_recon_dc_mismatch()                      */
28*a97c2a1fSXin Li /*                      impeg2_idct_recon()                                  */
29*a97c2a1fSXin Li /*                                                                           */
30*a97c2a1fSXin Li /*  Issues / Problems : None                                                 */
31*a97c2a1fSXin Li /*                                                                           */
32*a97c2a1fSXin Li /*  Revision History  :                                                      */
33*a97c2a1fSXin Li /*                                                                           */
34*a97c2a1fSXin Li /*         DD MM YYYY   Author(s)       Changes                              */
35*a97c2a1fSXin Li /*         10 09 2005   Hairsh M        First Version                        */
36*a97c2a1fSXin Li /*                                                                           */
37*a97c2a1fSXin Li /*****************************************************************************/
38*a97c2a1fSXin Li /*
39*a97c2a1fSXin Li   IEEE - 1180 results for this IDCT
40*a97c2a1fSXin Li   L                           256         256         5           5           300         300         384         384         Thresholds
41*a97c2a1fSXin Li   H                           255         255         5           5           300         300         383         383
42*a97c2a1fSXin Li   sign                        1           -1          1           -1          1           -1          1           -1
43*a97c2a1fSXin Li   Peak Error                  1           1           1           1           1           1           1           1           1
44*a97c2a1fSXin Li   Peak Mean Square Error      0.0191      0.0188      0.0108      0.0111      0.0176      0.0188      0.0165      0.0177      0.06
45*a97c2a1fSXin Li   Overall Mean Square Error   0.01566406  0.01597656  0.0091875   0.00908906  0.01499063  0.01533281  0.01432344  0.01412344  0.02
46*a97c2a1fSXin Li   Peak Mean Error             0.0027      0.0026      0.0028      0.002       0.0017      0.0033      0.0031      0.0025      0.015
47*a97c2a1fSXin Li   Overall Mean Error          0.00002656  -0.00031406 0.00016875  0.00005469  -0.00003125 0.00011406  0.00009219  0.00004219  0.0015
48*a97c2a1fSXin Li   */
49*a97c2a1fSXin Li #include <stdio.h>
50*a97c2a1fSXin Li #include <string.h>
51*a97c2a1fSXin Li 
52*a97c2a1fSXin Li #include "iv_datatypedef.h"
53*a97c2a1fSXin Li #include "iv.h"
54*a97c2a1fSXin Li #include "impeg2_defs.h"
55*a97c2a1fSXin Li #include "impeg2_platform_macros.h"
56*a97c2a1fSXin Li 
57*a97c2a1fSXin Li #include "impeg2_macros.h"
58*a97c2a1fSXin Li #include "impeg2_globals.h"
59*a97c2a1fSXin Li #include "impeg2_idct.h"
60*a97c2a1fSXin Li 
61*a97c2a1fSXin Li 
impeg2_idct_recon_dc(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 i4_src_strd,WORD32 i4_pred_strd,WORD32 i4_dst_strd,WORD32 i4_zero_cols,WORD32 i4_zero_rows)62*a97c2a1fSXin Li void impeg2_idct_recon_dc(WORD16 *pi2_src,
63*a97c2a1fSXin Li                             WORD16 *pi2_tmp,
64*a97c2a1fSXin Li                             UWORD8 *pu1_pred,
65*a97c2a1fSXin Li                             UWORD8 *pu1_dst,
66*a97c2a1fSXin Li                             WORD32 i4_src_strd,
67*a97c2a1fSXin Li                             WORD32 i4_pred_strd,
68*a97c2a1fSXin Li                             WORD32 i4_dst_strd,
69*a97c2a1fSXin Li                             WORD32 i4_zero_cols,
70*a97c2a1fSXin Li                             WORD32 i4_zero_rows)
71*a97c2a1fSXin Li {
72*a97c2a1fSXin Li     WORD32 i4_val, i, j;
73*a97c2a1fSXin Li 
74*a97c2a1fSXin Li     UNUSED(pi2_tmp);
75*a97c2a1fSXin Li     UNUSED(i4_src_strd);
76*a97c2a1fSXin Li     UNUSED(i4_zero_cols);
77*a97c2a1fSXin Li     UNUSED(i4_zero_rows);
78*a97c2a1fSXin Li 
79*a97c2a1fSXin Li     i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
80*a97c2a1fSXin Li     i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
81*a97c2a1fSXin Li     i4_val = i4_val * gai2_impeg2_idct_q11[0];
82*a97c2a1fSXin Li     i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
83*a97c2a1fSXin Li 
84*a97c2a1fSXin Li     for(i = 0; i < TRANS_SIZE_8; i++)
85*a97c2a1fSXin Li     {
86*a97c2a1fSXin Li         for(j = 0; j < TRANS_SIZE_8; j++)
87*a97c2a1fSXin Li         {
88*a97c2a1fSXin Li             pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
89*a97c2a1fSXin Li         }
90*a97c2a1fSXin Li         pu1_dst  += i4_dst_strd;
91*a97c2a1fSXin Li         pu1_pred += i4_pred_strd;
92*a97c2a1fSXin Li     }
93*a97c2a1fSXin Li }
impeg2_idct_recon_dc_mismatch(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 i4_src_strd,WORD32 i4_pred_strd,WORD32 i4_dst_strd,WORD32 i4_zero_cols,WORD32 i4_zero_rows)94*a97c2a1fSXin Li void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
95*a97c2a1fSXin Li                             WORD16 *pi2_tmp,
96*a97c2a1fSXin Li                             UWORD8 *pu1_pred,
97*a97c2a1fSXin Li                             UWORD8 *pu1_dst,
98*a97c2a1fSXin Li                             WORD32 i4_src_strd,
99*a97c2a1fSXin Li                             WORD32 i4_pred_strd,
100*a97c2a1fSXin Li                             WORD32 i4_dst_strd,
101*a97c2a1fSXin Li                             WORD32 i4_zero_cols,
102*a97c2a1fSXin Li                             WORD32 i4_zero_rows)
103*a97c2a1fSXin Li 
104*a97c2a1fSXin Li {
105*a97c2a1fSXin Li     WORD32 i4_val, i, j;
106*a97c2a1fSXin Li     WORD32 i4_count = 0;
107*a97c2a1fSXin Li     WORD32 i4_sum;
108*a97c2a1fSXin Li 
109*a97c2a1fSXin Li     UNUSED(pi2_tmp);
110*a97c2a1fSXin Li     UNUSED(i4_src_strd);
111*a97c2a1fSXin Li     UNUSED(i4_zero_cols);
112*a97c2a1fSXin Li     UNUSED(i4_zero_rows);
113*a97c2a1fSXin Li 
114*a97c2a1fSXin Li     i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
115*a97c2a1fSXin Li     i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
116*a97c2a1fSXin Li 
117*a97c2a1fSXin Li     i4_val *= gai2_impeg2_idct_q11[0];
118*a97c2a1fSXin Li     for(i = 0; i < TRANS_SIZE_8; i++)
119*a97c2a1fSXin Li     {
120*a97c2a1fSXin Li         for (j = 0; j < TRANS_SIZE_8; j++)
121*a97c2a1fSXin Li         {
122*a97c2a1fSXin Li             i4_sum = i4_val;
123*a97c2a1fSXin Li             i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
124*a97c2a1fSXin Li             i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
125*a97c2a1fSXin Li             i4_sum += pu1_pred[j];
126*a97c2a1fSXin Li             pu1_dst[j] = CLIP_U8(i4_sum);
127*a97c2a1fSXin Li             i4_count++;
128*a97c2a1fSXin Li         }
129*a97c2a1fSXin Li 
130*a97c2a1fSXin Li         pu1_dst  += i4_dst_strd;
131*a97c2a1fSXin Li         pu1_pred += i4_pred_strd;
132*a97c2a1fSXin Li     }
133*a97c2a1fSXin Li 
134*a97c2a1fSXin Li }
135*a97c2a1fSXin Li /**
136*a97c2a1fSXin Li  *******************************************************************************
137*a97c2a1fSXin Li  *
138*a97c2a1fSXin Li  * @brief
139*a97c2a1fSXin Li  *  This function performs Inverse transform  and reconstruction for 8x8
140*a97c2a1fSXin Li  * input block
141*a97c2a1fSXin Li  *
142*a97c2a1fSXin Li  * @par Description:
143*a97c2a1fSXin Li  *  Performs inverse transform and adds the prediction  data and clips output
144*a97c2a1fSXin Li  * to 8 bit
145*a97c2a1fSXin Li  *
146*a97c2a1fSXin Li  * @param[in] pi2_src
147*a97c2a1fSXin Li  *  Input 8x8 coefficients
148*a97c2a1fSXin Li  *
149*a97c2a1fSXin Li  * @param[in] pi2_tmp
150*a97c2a1fSXin Li  *  Temporary 8x8 buffer for storing inverse
151*a97c2a1fSXin Li  *
152*a97c2a1fSXin Li  *  transform
153*a97c2a1fSXin Li  *  1st stage output
154*a97c2a1fSXin Li  *
155*a97c2a1fSXin Li  * @param[in] pu1_pred
156*a97c2a1fSXin Li  *  Prediction 8x8 block
157*a97c2a1fSXin Li  *
158*a97c2a1fSXin Li  * @param[out] pu1_dst
159*a97c2a1fSXin Li  *  Output 8x8 block
160*a97c2a1fSXin Li  *
161*a97c2a1fSXin Li  * @param[in] src_strd
162*a97c2a1fSXin Li  *  Input stride
163*a97c2a1fSXin Li  *
164*a97c2a1fSXin Li  * @param[in] pred_strd
165*a97c2a1fSXin Li  *  Prediction stride
166*a97c2a1fSXin Li  *
167*a97c2a1fSXin Li  * @param[in] dst_strd
168*a97c2a1fSXin Li  *  Output Stride
169*a97c2a1fSXin Li  *
170*a97c2a1fSXin Li  * @param[in] shift
171*a97c2a1fSXin Li  *  Output shift
172*a97c2a1fSXin Li  *
173*a97c2a1fSXin Li  * @param[in] zero_cols
174*a97c2a1fSXin Li  *  Zero columns in pi2_src
175*a97c2a1fSXin Li  *
176*a97c2a1fSXin Li  * @returns  Void
177*a97c2a1fSXin Li  *
178*a97c2a1fSXin Li  * @remarks
179*a97c2a1fSXin Li  *  None
180*a97c2a1fSXin Li  *
181*a97c2a1fSXin Li  *******************************************************************************
182*a97c2a1fSXin Li  */
183*a97c2a1fSXin Li 
impeg2_idct_recon(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 i4_src_strd,WORD32 i4_pred_strd,WORD32 i4_dst_strd,WORD32 i4_zero_cols,WORD32 i4_zero_rows)184*a97c2a1fSXin Li void impeg2_idct_recon(WORD16 *pi2_src,
185*a97c2a1fSXin Li                         WORD16 *pi2_tmp,
186*a97c2a1fSXin Li                         UWORD8 *pu1_pred,
187*a97c2a1fSXin Li                         UWORD8 *pu1_dst,
188*a97c2a1fSXin Li                         WORD32 i4_src_strd,
189*a97c2a1fSXin Li                         WORD32 i4_pred_strd,
190*a97c2a1fSXin Li                         WORD32 i4_dst_strd,
191*a97c2a1fSXin Li                         WORD32 i4_zero_cols,
192*a97c2a1fSXin Li                         WORD32 i4_zero_rows)
193*a97c2a1fSXin Li {
194*a97c2a1fSXin Li     WORD32 j, k;
195*a97c2a1fSXin Li     WORD32 ai4_e[4], ai4_o[4];
196*a97c2a1fSXin Li     WORD32 ai4_ee[2], ai4_eo[2];
197*a97c2a1fSXin Li     WORD32 i4_add;
198*a97c2a1fSXin Li     WORD32 i4_shift;
199*a97c2a1fSXin Li     WORD16 *pi2_tmp_orig;
200*a97c2a1fSXin Li     WORD32 i4_trans_size;
201*a97c2a1fSXin Li     WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
202*a97c2a1fSXin Li     WORD32 i4_row_limit_2nd_stage;
203*a97c2a1fSXin Li 
204*a97c2a1fSXin Li     i4_trans_size = TRANS_SIZE_8;
205*a97c2a1fSXin Li 
206*a97c2a1fSXin Li     pi2_tmp_orig = pi2_tmp;
207*a97c2a1fSXin Li 
208*a97c2a1fSXin Li     if((i4_zero_cols & 0xF0) == 0xF0)
209*a97c2a1fSXin Li         i4_row_limit_2nd_stage = 4;
210*a97c2a1fSXin Li     else
211*a97c2a1fSXin Li         i4_row_limit_2nd_stage = TRANS_SIZE_8;
212*a97c2a1fSXin Li 
213*a97c2a1fSXin Li 
214*a97c2a1fSXin Li     if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
215*a97c2a1fSXin Li     {
216*a97c2a1fSXin Li         /************************************************************************************************/
217*a97c2a1fSXin Li         /**********************************START - IT_RECON_8x8******************************************/
218*a97c2a1fSXin Li         /************************************************************************************************/
219*a97c2a1fSXin Li 
220*a97c2a1fSXin Li         /* Inverse Transform 1st stage */
221*a97c2a1fSXin Li         i4_shift = IDCT_STG1_SHIFT;
222*a97c2a1fSXin Li         i4_add = 1 << (i4_shift - 1);
223*a97c2a1fSXin Li 
224*a97c2a1fSXin Li         for(j = 0; j < i4_row_limit_2nd_stage; j++)
225*a97c2a1fSXin Li         {
226*a97c2a1fSXin Li             /* Checking for Zero Cols */
227*a97c2a1fSXin Li             if((i4_zero_cols & 1) == 1)
228*a97c2a1fSXin Li             {
229*a97c2a1fSXin Li                 memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
230*a97c2a1fSXin Li             }
231*a97c2a1fSXin Li             else
232*a97c2a1fSXin Li             {
233*a97c2a1fSXin Li                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
234*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
235*a97c2a1fSXin Li                 {
236*a97c2a1fSXin Li                     ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
237*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q15[3 * 8 + k]
238*a97c2a1fSXin Li                                                     * pi2_src[3 * i4_src_strd];
239*a97c2a1fSXin Li                 }
240*a97c2a1fSXin Li                 ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
241*a97c2a1fSXin Li                 ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
242*a97c2a1fSXin Li                 ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
243*a97c2a1fSXin Li                 ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
244*a97c2a1fSXin Li 
245*a97c2a1fSXin Li                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
246*a97c2a1fSXin Li                 ai4_e[0] = ai4_ee[0] + ai4_eo[0];
247*a97c2a1fSXin Li                 ai4_e[3] = ai4_ee[0] - ai4_eo[0];
248*a97c2a1fSXin Li                 ai4_e[1] = ai4_ee[1] + ai4_eo[1];
249*a97c2a1fSXin Li                 ai4_e[2] = ai4_ee[1] - ai4_eo[1];
250*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
251*a97c2a1fSXin Li                 {
252*a97c2a1fSXin Li                     pi2_tmp[k] =
253*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
254*a97c2a1fSXin Li                     pi2_tmp[k + 4] =
255*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
256*a97c2a1fSXin Li                 }
257*a97c2a1fSXin Li             }
258*a97c2a1fSXin Li             pi2_src++;
259*a97c2a1fSXin Li             pi2_tmp += i4_trans_size;
260*a97c2a1fSXin Li             i4_zero_cols = i4_zero_cols >> 1;
261*a97c2a1fSXin Li         }
262*a97c2a1fSXin Li 
263*a97c2a1fSXin Li         pi2_tmp = pi2_tmp_orig;
264*a97c2a1fSXin Li 
265*a97c2a1fSXin Li         /* Inverse Transform 2nd stage */
266*a97c2a1fSXin Li         i4_shift = IDCT_STG2_SHIFT;
267*a97c2a1fSXin Li         i4_add = 1 << (i4_shift - 1);
268*a97c2a1fSXin Li         if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
269*a97c2a1fSXin Li         {
270*a97c2a1fSXin Li             for(j = 0; j < i4_trans_size; j++)
271*a97c2a1fSXin Li             {
272*a97c2a1fSXin Li                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
273*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
274*a97c2a1fSXin Li                 {
275*a97c2a1fSXin Li                     ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
276*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
277*a97c2a1fSXin Li                 }
278*a97c2a1fSXin Li                 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
279*a97c2a1fSXin Li                 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
280*a97c2a1fSXin Li                 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
281*a97c2a1fSXin Li                 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
282*a97c2a1fSXin Li 
283*a97c2a1fSXin Li                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
284*a97c2a1fSXin Li                 ai4_e[0] = ai4_ee[0] + ai4_eo[0];
285*a97c2a1fSXin Li                 ai4_e[3] = ai4_ee[0] - ai4_eo[0];
286*a97c2a1fSXin Li                 ai4_e[1] = ai4_ee[1] + ai4_eo[1];
287*a97c2a1fSXin Li                 ai4_e[2] = ai4_ee[1] - ai4_eo[1];
288*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
289*a97c2a1fSXin Li                 {
290*a97c2a1fSXin Li                     WORD32 itrans_out;
291*a97c2a1fSXin Li                     itrans_out =
292*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
293*a97c2a1fSXin Li                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
294*a97c2a1fSXin Li                     itrans_out =
295*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
296*a97c2a1fSXin Li                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
297*a97c2a1fSXin Li                 }
298*a97c2a1fSXin Li                 pi2_tmp++;
299*a97c2a1fSXin Li                 pu1_pred += i4_pred_strd;
300*a97c2a1fSXin Li                 pu1_dst += i4_dst_strd;
301*a97c2a1fSXin Li             }
302*a97c2a1fSXin Li         }
303*a97c2a1fSXin Li         else /* All rows of output of 1st stage are non-zero */
304*a97c2a1fSXin Li         {
305*a97c2a1fSXin Li             for(j = 0; j < i4_trans_size; j++)
306*a97c2a1fSXin Li             {
307*a97c2a1fSXin Li                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
308*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
309*a97c2a1fSXin Li                 {
310*a97c2a1fSXin Li                     ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
311*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[3 * 8 + k]
312*a97c2a1fSXin Li                                                     * pi2_tmp[3 * i4_trans_size]
313*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[5 * 8 + k]
314*a97c2a1fSXin Li                                                     * pi2_tmp[5 * i4_trans_size]
315*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[7 * 8 + k]
316*a97c2a1fSXin Li                                                     * pi2_tmp[7 * i4_trans_size];
317*a97c2a1fSXin Li                 }
318*a97c2a1fSXin Li 
319*a97c2a1fSXin Li                 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
320*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
321*a97c2a1fSXin Li                 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
322*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
323*a97c2a1fSXin Li                 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
324*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
325*a97c2a1fSXin Li                 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
326*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
327*a97c2a1fSXin Li 
328*a97c2a1fSXin Li                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
329*a97c2a1fSXin Li                 ai4_e[0] = ai4_ee[0] + ai4_eo[0];
330*a97c2a1fSXin Li                 ai4_e[3] = ai4_ee[0] - ai4_eo[0];
331*a97c2a1fSXin Li                 ai4_e[1] = ai4_ee[1] + ai4_eo[1];
332*a97c2a1fSXin Li                 ai4_e[2] = ai4_ee[1] - ai4_eo[1];
333*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
334*a97c2a1fSXin Li                 {
335*a97c2a1fSXin Li                     WORD32 itrans_out;
336*a97c2a1fSXin Li                     itrans_out =
337*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
338*a97c2a1fSXin Li                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
339*a97c2a1fSXin Li                     itrans_out =
340*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
341*a97c2a1fSXin Li                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
342*a97c2a1fSXin Li                 }
343*a97c2a1fSXin Li                 pi2_tmp++;
344*a97c2a1fSXin Li                 pu1_pred += i4_pred_strd;
345*a97c2a1fSXin Li                 pu1_dst += i4_dst_strd;
346*a97c2a1fSXin Li             }
347*a97c2a1fSXin Li         }
348*a97c2a1fSXin Li         /************************************************************************************************/
349*a97c2a1fSXin Li         /************************************END - IT_RECON_8x8******************************************/
350*a97c2a1fSXin Li         /************************************************************************************************/
351*a97c2a1fSXin Li     }
352*a97c2a1fSXin Li     else /* All rows of input are non-zero */
353*a97c2a1fSXin Li     {
354*a97c2a1fSXin Li         /************************************************************************************************/
355*a97c2a1fSXin Li         /**********************************START - IT_RECON_8x8******************************************/
356*a97c2a1fSXin Li         /************************************************************************************************/
357*a97c2a1fSXin Li 
358*a97c2a1fSXin Li         /* Inverse Transform 1st stage */
359*a97c2a1fSXin Li         i4_shift = IDCT_STG1_SHIFT;
360*a97c2a1fSXin Li         i4_add = 1 << (i4_shift - 1);
361*a97c2a1fSXin Li 
362*a97c2a1fSXin Li         for(j = 0; j < i4_row_limit_2nd_stage; j++)
363*a97c2a1fSXin Li         {
364*a97c2a1fSXin Li             /* Checking for Zero Cols */
365*a97c2a1fSXin Li             if((i4_zero_cols & 1) == 1)
366*a97c2a1fSXin Li             {
367*a97c2a1fSXin Li                 memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
368*a97c2a1fSXin Li             }
369*a97c2a1fSXin Li             else
370*a97c2a1fSXin Li             {
371*a97c2a1fSXin Li                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
372*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
373*a97c2a1fSXin Li                 {
374*a97c2a1fSXin Li                     ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
375*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q15[3 * 8 + k]
376*a97c2a1fSXin Li                                                     * pi2_src[3 * i4_src_strd]
377*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q15[5 * 8 + k]
378*a97c2a1fSXin Li                                                     * pi2_src[5 * i4_src_strd]
379*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q15[7 * 8 + k]
380*a97c2a1fSXin Li                                                     * pi2_src[7 * i4_src_strd];
381*a97c2a1fSXin Li                 }
382*a97c2a1fSXin Li 
383*a97c2a1fSXin Li                 ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
384*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
385*a97c2a1fSXin Li                 ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
386*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
387*a97c2a1fSXin Li                 ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
388*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
389*a97c2a1fSXin Li                 ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
390*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
391*a97c2a1fSXin Li 
392*a97c2a1fSXin Li                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
393*a97c2a1fSXin Li                 ai4_e[0] = ai4_ee[0] + ai4_eo[0];
394*a97c2a1fSXin Li                 ai4_e[3] = ai4_ee[0] - ai4_eo[0];
395*a97c2a1fSXin Li                 ai4_e[1] = ai4_ee[1] + ai4_eo[1];
396*a97c2a1fSXin Li                 ai4_e[2] = ai4_ee[1] - ai4_eo[1];
397*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
398*a97c2a1fSXin Li                 {
399*a97c2a1fSXin Li                     pi2_tmp[k] =
400*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
401*a97c2a1fSXin Li                     pi2_tmp[k + 4] =
402*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
403*a97c2a1fSXin Li                 }
404*a97c2a1fSXin Li             }
405*a97c2a1fSXin Li             pi2_src++;
406*a97c2a1fSXin Li             pi2_tmp += i4_trans_size;
407*a97c2a1fSXin Li             i4_zero_cols = i4_zero_cols >> 1;
408*a97c2a1fSXin Li         }
409*a97c2a1fSXin Li 
410*a97c2a1fSXin Li         pi2_tmp = pi2_tmp_orig;
411*a97c2a1fSXin Li 
412*a97c2a1fSXin Li         /* Inverse Transform 2nd stage */
413*a97c2a1fSXin Li         i4_shift = IDCT_STG2_SHIFT;
414*a97c2a1fSXin Li         i4_add = 1 << (i4_shift - 1);
415*a97c2a1fSXin Li         if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
416*a97c2a1fSXin Li         {
417*a97c2a1fSXin Li             for(j = 0; j < i4_trans_size; j++)
418*a97c2a1fSXin Li             {
419*a97c2a1fSXin Li                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
420*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
421*a97c2a1fSXin Li                 {
422*a97c2a1fSXin Li                     ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
423*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
424*a97c2a1fSXin Li                 }
425*a97c2a1fSXin Li                 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
426*a97c2a1fSXin Li                 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
427*a97c2a1fSXin Li                 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
428*a97c2a1fSXin Li                 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
429*a97c2a1fSXin Li 
430*a97c2a1fSXin Li                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
431*a97c2a1fSXin Li                 ai4_e[0] = ai4_ee[0] + ai4_eo[0];
432*a97c2a1fSXin Li                 ai4_e[3] = ai4_ee[0] - ai4_eo[0];
433*a97c2a1fSXin Li                 ai4_e[1] = ai4_ee[1] + ai4_eo[1];
434*a97c2a1fSXin Li                 ai4_e[2] = ai4_ee[1] - ai4_eo[1];
435*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
436*a97c2a1fSXin Li                 {
437*a97c2a1fSXin Li                     WORD32 itrans_out;
438*a97c2a1fSXin Li                     itrans_out =
439*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
440*a97c2a1fSXin Li                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
441*a97c2a1fSXin Li                     itrans_out =
442*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
443*a97c2a1fSXin Li                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
444*a97c2a1fSXin Li                 }
445*a97c2a1fSXin Li                 pi2_tmp++;
446*a97c2a1fSXin Li                 pu1_pred += i4_pred_strd;
447*a97c2a1fSXin Li                 pu1_dst += i4_dst_strd;
448*a97c2a1fSXin Li             }
449*a97c2a1fSXin Li         }
450*a97c2a1fSXin Li         else /* All rows of output of 1st stage are non-zero */
451*a97c2a1fSXin Li         {
452*a97c2a1fSXin Li             for(j = 0; j < i4_trans_size; j++)
453*a97c2a1fSXin Li             {
454*a97c2a1fSXin Li                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
455*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
456*a97c2a1fSXin Li                 {
457*a97c2a1fSXin Li                     ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
458*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[3 * 8 + k]
459*a97c2a1fSXin Li                                                     * pi2_tmp[3 * i4_trans_size]
460*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[5 * 8 + k]
461*a97c2a1fSXin Li                                                     * pi2_tmp[5 * i4_trans_size]
462*a97c2a1fSXin Li                                     + gai2_impeg2_idct_q11[7 * 8 + k]
463*a97c2a1fSXin Li                                                     * pi2_tmp[7 * i4_trans_size];
464*a97c2a1fSXin Li                 }
465*a97c2a1fSXin Li 
466*a97c2a1fSXin Li                 ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
467*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
468*a97c2a1fSXin Li                 ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
469*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
470*a97c2a1fSXin Li                 ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
471*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
472*a97c2a1fSXin Li                 ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
473*a97c2a1fSXin Li                                 + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
474*a97c2a1fSXin Li 
475*a97c2a1fSXin Li                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
476*a97c2a1fSXin Li                 ai4_e[0] = ai4_ee[0] + ai4_eo[0];
477*a97c2a1fSXin Li                 ai4_e[3] = ai4_ee[0] - ai4_eo[0];
478*a97c2a1fSXin Li                 ai4_e[1] = ai4_ee[1] + ai4_eo[1];
479*a97c2a1fSXin Li                 ai4_e[2] = ai4_ee[1] - ai4_eo[1];
480*a97c2a1fSXin Li                 for(k = 0; k < 4; k++)
481*a97c2a1fSXin Li                 {
482*a97c2a1fSXin Li                     WORD32 itrans_out;
483*a97c2a1fSXin Li                     itrans_out =
484*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
485*a97c2a1fSXin Li                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
486*a97c2a1fSXin Li                     itrans_out =
487*a97c2a1fSXin Li                                     CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
488*a97c2a1fSXin Li                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
489*a97c2a1fSXin Li                 }
490*a97c2a1fSXin Li                 pi2_tmp++;
491*a97c2a1fSXin Li                 pu1_pred += i4_pred_strd;
492*a97c2a1fSXin Li                 pu1_dst += i4_dst_strd;
493*a97c2a1fSXin Li             }
494*a97c2a1fSXin Li         }
495*a97c2a1fSXin Li         /************************************************************************************************/
496*a97c2a1fSXin Li         /************************************END - IT_RECON_8x8******************************************/
497*a97c2a1fSXin Li         /************************************************************************************************/
498*a97c2a1fSXin Li     }
499*a97c2a1fSXin Li }
500*a97c2a1fSXin Li 
501