xref: /aosp_15_r20/external/libhevc/common/ihevc_itrans_recon_8x8.c (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar ******************************************************************************/
18*c83a76b0SSuyog Pawar /**
19*c83a76b0SSuyog Pawar  *******************************************************************************
20*c83a76b0SSuyog Pawar  * @file
21*c83a76b0SSuyog Pawar  *  ihevc_itrans_recon_8x8.c
22*c83a76b0SSuyog Pawar  *
23*c83a76b0SSuyog Pawar  * @brief
24*c83a76b0SSuyog Pawar  *  Contains function definitions for inverse transform  and reconstruction 8x8
25*c83a76b0SSuyog Pawar  *
26*c83a76b0SSuyog Pawar  *
27*c83a76b0SSuyog Pawar  * @author
28*c83a76b0SSuyog Pawar  *  100470
29*c83a76b0SSuyog Pawar  *
30*c83a76b0SSuyog Pawar  * @par List of Functions:
31*c83a76b0SSuyog Pawar  *  - ihevc_itrans_recon_8x8()
32*c83a76b0SSuyog Pawar  *
33*c83a76b0SSuyog Pawar  * @remarks
34*c83a76b0SSuyog Pawar  *  None
35*c83a76b0SSuyog Pawar  *
36*c83a76b0SSuyog Pawar  *******************************************************************************
37*c83a76b0SSuyog Pawar  */
38*c83a76b0SSuyog Pawar #include <stdio.h>
39*c83a76b0SSuyog Pawar #include <string.h>
40*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
41*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
42*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
43*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
44*c83a76b0SSuyog Pawar #include "ihevc_trans_tables.h"
45*c83a76b0SSuyog Pawar #include "ihevc_itrans_recon.h"
46*c83a76b0SSuyog Pawar #include "ihevc_func_selector.h"
47*c83a76b0SSuyog Pawar #include "ihevc_trans_macros.h"
48*c83a76b0SSuyog Pawar 
49*c83a76b0SSuyog Pawar /**
50*c83a76b0SSuyog Pawar  *******************************************************************************
51*c83a76b0SSuyog Pawar  *
52*c83a76b0SSuyog Pawar  * @brief
53*c83a76b0SSuyog Pawar  *  This function performs Inverse transform  and reconstruction for 8x8
54*c83a76b0SSuyog Pawar  * input block
55*c83a76b0SSuyog Pawar  *
56*c83a76b0SSuyog Pawar  * @par Description:
57*c83a76b0SSuyog Pawar  *  Performs inverse transform and adds the prediction  data and clips output
58*c83a76b0SSuyog Pawar  * to 8 bit
59*c83a76b0SSuyog Pawar  *
60*c83a76b0SSuyog Pawar  * @param[in] pi2_src
61*c83a76b0SSuyog Pawar  *  Input 8x8 coefficients
62*c83a76b0SSuyog Pawar  *
63*c83a76b0SSuyog Pawar  * @param[in] pi2_tmp
64*c83a76b0SSuyog Pawar  *  Temporary 8x8 buffer for storing inverse
65*c83a76b0SSuyog Pawar  *
66*c83a76b0SSuyog Pawar  *  transform
67*c83a76b0SSuyog Pawar  *  1st stage output
68*c83a76b0SSuyog Pawar  *
69*c83a76b0SSuyog Pawar  * @param[in] pu1_pred
70*c83a76b0SSuyog Pawar  *  Prediction 8x8 block
71*c83a76b0SSuyog Pawar  *
72*c83a76b0SSuyog Pawar  * @param[out] pu1_dst
73*c83a76b0SSuyog Pawar  *  Output 8x8 block
74*c83a76b0SSuyog Pawar  *
75*c83a76b0SSuyog Pawar  * @param[in] src_strd
76*c83a76b0SSuyog Pawar  *  Input stride
77*c83a76b0SSuyog Pawar  *
78*c83a76b0SSuyog Pawar  * @param[in] pred_strd
79*c83a76b0SSuyog Pawar  *  Prediction stride
80*c83a76b0SSuyog Pawar  *
81*c83a76b0SSuyog Pawar  * @param[in] dst_strd
82*c83a76b0SSuyog Pawar  *  Output Stride
83*c83a76b0SSuyog Pawar  *
84*c83a76b0SSuyog Pawar  * @param[in] shift
85*c83a76b0SSuyog Pawar  *  Output shift
86*c83a76b0SSuyog Pawar  *
87*c83a76b0SSuyog Pawar  * @param[in] zero_cols
88*c83a76b0SSuyog Pawar  *  Zero columns in pi2_src
89*c83a76b0SSuyog Pawar  *
90*c83a76b0SSuyog Pawar  * @returns  Void
91*c83a76b0SSuyog Pawar  *
92*c83a76b0SSuyog Pawar  * @remarks
93*c83a76b0SSuyog Pawar  *  None
94*c83a76b0SSuyog Pawar  *
95*c83a76b0SSuyog Pawar  *******************************************************************************
96*c83a76b0SSuyog Pawar  */
97*c83a76b0SSuyog Pawar 
ihevc_itrans_recon_8x8(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)98*c83a76b0SSuyog Pawar void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
99*c83a76b0SSuyog Pawar                             WORD16 *pi2_tmp,
100*c83a76b0SSuyog Pawar                             UWORD8 *pu1_pred,
101*c83a76b0SSuyog Pawar                             UWORD8 *pu1_dst,
102*c83a76b0SSuyog Pawar                             WORD32 src_strd,
103*c83a76b0SSuyog Pawar                             WORD32 pred_strd,
104*c83a76b0SSuyog Pawar                             WORD32 dst_strd,
105*c83a76b0SSuyog Pawar                             WORD32 zero_cols,
106*c83a76b0SSuyog Pawar                             WORD32 zero_rows)
107*c83a76b0SSuyog Pawar {
108*c83a76b0SSuyog Pawar     WORD32 j, k;
109*c83a76b0SSuyog Pawar     WORD32 e[4], o[4];
110*c83a76b0SSuyog Pawar     WORD32 ee[2], eo[2];
111*c83a76b0SSuyog Pawar     WORD32 add;
112*c83a76b0SSuyog Pawar     WORD32 shift;
113*c83a76b0SSuyog Pawar     WORD16 *pi2_tmp_orig;
114*c83a76b0SSuyog Pawar     WORD32 trans_size;
115*c83a76b0SSuyog Pawar     WORD32 zero_rows_2nd_stage = zero_cols;
116*c83a76b0SSuyog Pawar     WORD32 row_limit_2nd_stage;
117*c83a76b0SSuyog Pawar 
118*c83a76b0SSuyog Pawar     trans_size = TRANS_SIZE_8;
119*c83a76b0SSuyog Pawar 
120*c83a76b0SSuyog Pawar     pi2_tmp_orig = pi2_tmp;
121*c83a76b0SSuyog Pawar 
122*c83a76b0SSuyog Pawar     if((zero_cols & 0xF0) == 0xF0)
123*c83a76b0SSuyog Pawar         row_limit_2nd_stage = 4;
124*c83a76b0SSuyog Pawar     else
125*c83a76b0SSuyog Pawar         row_limit_2nd_stage = TRANS_SIZE_8;
126*c83a76b0SSuyog Pawar 
127*c83a76b0SSuyog Pawar 
128*c83a76b0SSuyog Pawar     if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
129*c83a76b0SSuyog Pawar     {
130*c83a76b0SSuyog Pawar         /************************************************************************************************/
131*c83a76b0SSuyog Pawar         /**********************************START - IT_RECON_8x8******************************************/
132*c83a76b0SSuyog Pawar         /************************************************************************************************/
133*c83a76b0SSuyog Pawar 
134*c83a76b0SSuyog Pawar         /* Inverse Transform 1st stage */
135*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_1;
136*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
137*c83a76b0SSuyog Pawar 
138*c83a76b0SSuyog Pawar         for(j = 0; j < row_limit_2nd_stage; j++)
139*c83a76b0SSuyog Pawar         {
140*c83a76b0SSuyog Pawar             /* Checking for Zero Cols */
141*c83a76b0SSuyog Pawar             if((zero_cols & 1) == 1)
142*c83a76b0SSuyog Pawar             {
143*c83a76b0SSuyog Pawar                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144*c83a76b0SSuyog Pawar             }
145*c83a76b0SSuyog Pawar             else
146*c83a76b0SSuyog Pawar             {
147*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
149*c83a76b0SSuyog Pawar                 {
150*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
151*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[3][k]
152*c83a76b0SSuyog Pawar                                                     * pi2_src[3 * src_strd];
153*c83a76b0SSuyog Pawar                 }
154*c83a76b0SSuyog Pawar                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
155*c83a76b0SSuyog Pawar                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
156*c83a76b0SSuyog Pawar                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
157*c83a76b0SSuyog Pawar                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
158*c83a76b0SSuyog Pawar 
159*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
160*c83a76b0SSuyog Pawar                 e[0] = ee[0] + eo[0];
161*c83a76b0SSuyog Pawar                 e[3] = ee[0] - eo[0];
162*c83a76b0SSuyog Pawar                 e[1] = ee[1] + eo[1];
163*c83a76b0SSuyog Pawar                 e[2] = ee[1] - eo[1];
164*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
165*c83a76b0SSuyog Pawar                 {
166*c83a76b0SSuyog Pawar                     pi2_tmp[k] =
167*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
168*c83a76b0SSuyog Pawar                     pi2_tmp[k + 4] =
169*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
170*c83a76b0SSuyog Pawar                 }
171*c83a76b0SSuyog Pawar             }
172*c83a76b0SSuyog Pawar             pi2_src++;
173*c83a76b0SSuyog Pawar             pi2_tmp += trans_size;
174*c83a76b0SSuyog Pawar             zero_cols = zero_cols >> 1;
175*c83a76b0SSuyog Pawar         }
176*c83a76b0SSuyog Pawar 
177*c83a76b0SSuyog Pawar         pi2_tmp = pi2_tmp_orig;
178*c83a76b0SSuyog Pawar 
179*c83a76b0SSuyog Pawar         /* Inverse Transform 2nd stage */
180*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_2;
181*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
182*c83a76b0SSuyog Pawar         if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
183*c83a76b0SSuyog Pawar         {
184*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
185*c83a76b0SSuyog Pawar             {
186*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
187*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
188*c83a76b0SSuyog Pawar                 {
189*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
190*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
191*c83a76b0SSuyog Pawar                 }
192*c83a76b0SSuyog Pawar                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
193*c83a76b0SSuyog Pawar                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
194*c83a76b0SSuyog Pawar                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
195*c83a76b0SSuyog Pawar                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
196*c83a76b0SSuyog Pawar 
197*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
198*c83a76b0SSuyog Pawar                 e[0] = ee[0] + eo[0];
199*c83a76b0SSuyog Pawar                 e[3] = ee[0] - eo[0];
200*c83a76b0SSuyog Pawar                 e[1] = ee[1] + eo[1];
201*c83a76b0SSuyog Pawar                 e[2] = ee[1] - eo[1];
202*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
203*c83a76b0SSuyog Pawar                 {
204*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
205*c83a76b0SSuyog Pawar                     itrans_out =
206*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
207*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
208*c83a76b0SSuyog Pawar                     itrans_out =
209*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
210*c83a76b0SSuyog Pawar                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
211*c83a76b0SSuyog Pawar                 }
212*c83a76b0SSuyog Pawar                 pi2_tmp++;
213*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
214*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
215*c83a76b0SSuyog Pawar             }
216*c83a76b0SSuyog Pawar         }
217*c83a76b0SSuyog Pawar         else /* All rows of output of 1st stage are non-zero */
218*c83a76b0SSuyog Pawar         {
219*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
220*c83a76b0SSuyog Pawar             {
221*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
222*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
223*c83a76b0SSuyog Pawar                 {
224*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
225*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[3][k]
226*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
227*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[5][k]
228*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
229*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[7][k]
230*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size];
231*c83a76b0SSuyog Pawar                 }
232*c83a76b0SSuyog Pawar 
233*c83a76b0SSuyog Pawar                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
234*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
235*c83a76b0SSuyog Pawar                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
236*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
237*c83a76b0SSuyog Pawar                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
238*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
239*c83a76b0SSuyog Pawar                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
240*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
241*c83a76b0SSuyog Pawar 
242*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
243*c83a76b0SSuyog Pawar                 e[0] = ee[0] + eo[0];
244*c83a76b0SSuyog Pawar                 e[3] = ee[0] - eo[0];
245*c83a76b0SSuyog Pawar                 e[1] = ee[1] + eo[1];
246*c83a76b0SSuyog Pawar                 e[2] = ee[1] - eo[1];
247*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
248*c83a76b0SSuyog Pawar                 {
249*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
250*c83a76b0SSuyog Pawar                     itrans_out =
251*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
252*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253*c83a76b0SSuyog Pawar                     itrans_out =
254*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
255*c83a76b0SSuyog Pawar                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
256*c83a76b0SSuyog Pawar                 }
257*c83a76b0SSuyog Pawar                 pi2_tmp++;
258*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
259*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
260*c83a76b0SSuyog Pawar             }
261*c83a76b0SSuyog Pawar         }
262*c83a76b0SSuyog Pawar         /************************************************************************************************/
263*c83a76b0SSuyog Pawar         /************************************END - IT_RECON_8x8******************************************/
264*c83a76b0SSuyog Pawar         /************************************************************************************************/
265*c83a76b0SSuyog Pawar     }
266*c83a76b0SSuyog Pawar     else /* All rows of input are non-zero */
267*c83a76b0SSuyog Pawar     {
268*c83a76b0SSuyog Pawar         /************************************************************************************************/
269*c83a76b0SSuyog Pawar         /**********************************START - IT_RECON_8x8******************************************/
270*c83a76b0SSuyog Pawar         /************************************************************************************************/
271*c83a76b0SSuyog Pawar 
272*c83a76b0SSuyog Pawar         /* Inverse Transform 1st stage */
273*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_1;
274*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
275*c83a76b0SSuyog Pawar 
276*c83a76b0SSuyog Pawar         for(j = 0; j < row_limit_2nd_stage; j++)
277*c83a76b0SSuyog Pawar         {
278*c83a76b0SSuyog Pawar             /* Checking for Zero Cols */
279*c83a76b0SSuyog Pawar             if((zero_cols & 1) == 1)
280*c83a76b0SSuyog Pawar             {
281*c83a76b0SSuyog Pawar                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
282*c83a76b0SSuyog Pawar             }
283*c83a76b0SSuyog Pawar             else
284*c83a76b0SSuyog Pawar             {
285*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
286*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
287*c83a76b0SSuyog Pawar                 {
288*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
289*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[3][k]
290*c83a76b0SSuyog Pawar                                                     * pi2_src[3 * src_strd]
291*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[5][k]
292*c83a76b0SSuyog Pawar                                                     * pi2_src[5 * src_strd]
293*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[7][k]
294*c83a76b0SSuyog Pawar                                                     * pi2_src[7 * src_strd];
295*c83a76b0SSuyog Pawar                 }
296*c83a76b0SSuyog Pawar 
297*c83a76b0SSuyog Pawar                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
298*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
299*c83a76b0SSuyog Pawar                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
300*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
301*c83a76b0SSuyog Pawar                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
302*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
303*c83a76b0SSuyog Pawar                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
304*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
305*c83a76b0SSuyog Pawar 
306*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
307*c83a76b0SSuyog Pawar                 e[0] = ee[0] + eo[0];
308*c83a76b0SSuyog Pawar                 e[3] = ee[0] - eo[0];
309*c83a76b0SSuyog Pawar                 e[1] = ee[1] + eo[1];
310*c83a76b0SSuyog Pawar                 e[2] = ee[1] - eo[1];
311*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
312*c83a76b0SSuyog Pawar                 {
313*c83a76b0SSuyog Pawar                     pi2_tmp[k] =
314*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
315*c83a76b0SSuyog Pawar                     pi2_tmp[k + 4] =
316*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
317*c83a76b0SSuyog Pawar                 }
318*c83a76b0SSuyog Pawar             }
319*c83a76b0SSuyog Pawar             pi2_src++;
320*c83a76b0SSuyog Pawar             pi2_tmp += trans_size;
321*c83a76b0SSuyog Pawar             zero_cols = zero_cols >> 1;
322*c83a76b0SSuyog Pawar         }
323*c83a76b0SSuyog Pawar 
324*c83a76b0SSuyog Pawar         pi2_tmp = pi2_tmp_orig;
325*c83a76b0SSuyog Pawar 
326*c83a76b0SSuyog Pawar         /* Inverse Transform 2nd stage */
327*c83a76b0SSuyog Pawar         shift = IT_SHIFT_STAGE_2;
328*c83a76b0SSuyog Pawar         add = 1 << (shift - 1);
329*c83a76b0SSuyog Pawar         if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
330*c83a76b0SSuyog Pawar         {
331*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
332*c83a76b0SSuyog Pawar             {
333*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
334*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
335*c83a76b0SSuyog Pawar                 {
336*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
337*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
338*c83a76b0SSuyog Pawar                 }
339*c83a76b0SSuyog Pawar                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
340*c83a76b0SSuyog Pawar                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
341*c83a76b0SSuyog Pawar                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
342*c83a76b0SSuyog Pawar                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
343*c83a76b0SSuyog Pawar 
344*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
345*c83a76b0SSuyog Pawar                 e[0] = ee[0] + eo[0];
346*c83a76b0SSuyog Pawar                 e[3] = ee[0] - eo[0];
347*c83a76b0SSuyog Pawar                 e[1] = ee[1] + eo[1];
348*c83a76b0SSuyog Pawar                 e[2] = ee[1] - eo[1];
349*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
350*c83a76b0SSuyog Pawar                 {
351*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
352*c83a76b0SSuyog Pawar                     itrans_out =
353*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
354*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
355*c83a76b0SSuyog Pawar                     itrans_out =
356*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
357*c83a76b0SSuyog Pawar                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
358*c83a76b0SSuyog Pawar                 }
359*c83a76b0SSuyog Pawar                 pi2_tmp++;
360*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
361*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
362*c83a76b0SSuyog Pawar             }
363*c83a76b0SSuyog Pawar         }
364*c83a76b0SSuyog Pawar         else /* All rows of output of 1st stage are non-zero */
365*c83a76b0SSuyog Pawar         {
366*c83a76b0SSuyog Pawar             for(j = 0; j < trans_size; j++)
367*c83a76b0SSuyog Pawar             {
368*c83a76b0SSuyog Pawar                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
369*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
370*c83a76b0SSuyog Pawar                 {
371*c83a76b0SSuyog Pawar                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
372*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[3][k]
373*c83a76b0SSuyog Pawar                                                     * pi2_tmp[3 * trans_size]
374*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[5][k]
375*c83a76b0SSuyog Pawar                                                     * pi2_tmp[5 * trans_size]
376*c83a76b0SSuyog Pawar                                     + g_ai2_ihevc_trans_8[7][k]
377*c83a76b0SSuyog Pawar                                                     * pi2_tmp[7 * trans_size];
378*c83a76b0SSuyog Pawar                 }
379*c83a76b0SSuyog Pawar 
380*c83a76b0SSuyog Pawar                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
381*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
382*c83a76b0SSuyog Pawar                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
383*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
384*c83a76b0SSuyog Pawar                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
385*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
386*c83a76b0SSuyog Pawar                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
387*c83a76b0SSuyog Pawar                                 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
388*c83a76b0SSuyog Pawar 
389*c83a76b0SSuyog Pawar                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
390*c83a76b0SSuyog Pawar                 e[0] = ee[0] + eo[0];
391*c83a76b0SSuyog Pawar                 e[3] = ee[0] - eo[0];
392*c83a76b0SSuyog Pawar                 e[1] = ee[1] + eo[1];
393*c83a76b0SSuyog Pawar                 e[2] = ee[1] - eo[1];
394*c83a76b0SSuyog Pawar                 for(k = 0; k < 4; k++)
395*c83a76b0SSuyog Pawar                 {
396*c83a76b0SSuyog Pawar                     WORD32 itrans_out;
397*c83a76b0SSuyog Pawar                     itrans_out =
398*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
399*c83a76b0SSuyog Pawar                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
400*c83a76b0SSuyog Pawar                     itrans_out =
401*c83a76b0SSuyog Pawar                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
402*c83a76b0SSuyog Pawar                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
403*c83a76b0SSuyog Pawar                 }
404*c83a76b0SSuyog Pawar                 pi2_tmp++;
405*c83a76b0SSuyog Pawar                 pu1_pred += pred_strd;
406*c83a76b0SSuyog Pawar                 pu1_dst += dst_strd;
407*c83a76b0SSuyog Pawar             }
408*c83a76b0SSuyog Pawar         }
409*c83a76b0SSuyog Pawar         /************************************************************************************************/
410*c83a76b0SSuyog Pawar         /************************************END - IT_RECON_8x8******************************************/
411*c83a76b0SSuyog Pawar         /************************************************************************************************/
412*c83a76b0SSuyog Pawar     }
413*c83a76b0SSuyog Pawar }
414*c83a76b0SSuyog Pawar 
415