1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /**
21*c83a76b0SSuyog Pawar *******************************************************************************
22*c83a76b0SSuyog Pawar * @file
23*c83a76b0SSuyog Pawar * ihevc_resi_trans.c
24*c83a76b0SSuyog Pawar *
25*c83a76b0SSuyog Pawar * @brief
26*c83a76b0SSuyog Pawar * Contains function definitions for residual and forward transform
27*c83a76b0SSuyog Pawar *
28*c83a76b0SSuyog Pawar * @author
29*c83a76b0SSuyog Pawar * 100470
30*c83a76b0SSuyog Pawar *
31*c83a76b0SSuyog Pawar * @par List of Functions:
32*c83a76b0SSuyog Pawar * - ihevc_resi_trans_4x4_ttype1()
33*c83a76b0SSuyog Pawar * - ihevc_resi_trans_4x4()
34*c83a76b0SSuyog Pawar * - ihevc_resi_trans_8x8()
35*c83a76b0SSuyog Pawar * - ihevc_resi_trans_16x16()
36*c83a76b0SSuyog Pawar * - ihevc_resi_trans_32x32()
37*c83a76b0SSuyog Pawar *
38*c83a76b0SSuyog Pawar * @remarks
39*c83a76b0SSuyog Pawar * None
40*c83a76b0SSuyog Pawar *
41*c83a76b0SSuyog Pawar *******************************************************************************
42*c83a76b0SSuyog Pawar */
43*c83a76b0SSuyog Pawar #include <stdio.h>
44*c83a76b0SSuyog Pawar #include <string.h>
45*c83a76b0SSuyog Pawar #include <stdlib.h>
46*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
47*c83a76b0SSuyog Pawar #include "ihevc_macros.h"
48*c83a76b0SSuyog Pawar #include "ihevc_platform_macros.h"
49*c83a76b0SSuyog Pawar #include "ihevc_defs.h"
50*c83a76b0SSuyog Pawar #include "ihevc_trans_tables.h"
51*c83a76b0SSuyog Pawar #include "ihevc_resi_trans.h"
52*c83a76b0SSuyog Pawar #include "ihevc_func_selector.h"
53*c83a76b0SSuyog Pawar #include "ihevc_trans_macros.h"
54*c83a76b0SSuyog Pawar
55*c83a76b0SSuyog Pawar /**
56*c83a76b0SSuyog Pawar *******************************************************************************
57*c83a76b0SSuyog Pawar *
58*c83a76b0SSuyog Pawar * @brief
59*c83a76b0SSuyog Pawar * This function performs residue calculation and forward transform type 1
60*c83a76b0SSuyog Pawar * on input pixels
61*c83a76b0SSuyog Pawar *
62*c83a76b0SSuyog Pawar * @par Description:
63*c83a76b0SSuyog Pawar * Performs residue calculation by subtracting source and prediction and
64*c83a76b0SSuyog Pawar * followed by forward transform
65*c83a76b0SSuyog Pawar *
66*c83a76b0SSuyog Pawar * @param[in] pu1_src
67*c83a76b0SSuyog Pawar * Input 4x4 pixels
68*c83a76b0SSuyog Pawar *
69*c83a76b0SSuyog Pawar * @param[in] pu1_pred
70*c83a76b0SSuyog Pawar * Prediction data
71*c83a76b0SSuyog Pawar *
72*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
73*c83a76b0SSuyog Pawar * Temporary buffer of size 4x4
74*c83a76b0SSuyog Pawar *
75*c83a76b0SSuyog Pawar * @param[out] pi2_dst
76*c83a76b0SSuyog Pawar * Output 4x4 coefficients
77*c83a76b0SSuyog Pawar *
78*c83a76b0SSuyog Pawar * @param[in] src_strd
79*c83a76b0SSuyog Pawar * Input stride
80*c83a76b0SSuyog Pawar *
81*c83a76b0SSuyog Pawar * @param[in] pred_strd
82*c83a76b0SSuyog Pawar * Prediction Stride
83*c83a76b0SSuyog Pawar *
84*c83a76b0SSuyog Pawar * @param[in] dst_strd
85*c83a76b0SSuyog Pawar * Output Stride
86*c83a76b0SSuyog Pawar *
87*c83a76b0SSuyog Pawar * @param[in] e_chroma_plane
88*c83a76b0SSuyog Pawar * Enum singalling chroma plane
89*c83a76b0SSuyog Pawar *
90*c83a76b0SSuyog Pawar *
91*c83a76b0SSuyog Pawar * @returns Void
92*c83a76b0SSuyog Pawar *
93*c83a76b0SSuyog Pawar * @remarks
94*c83a76b0SSuyog Pawar * None
95*c83a76b0SSuyog Pawar *
96*c83a76b0SSuyog Pawar *******************************************************************************
97*c83a76b0SSuyog Pawar */
98*c83a76b0SSuyog Pawar
ihevc_resi_trans_4x4_ttype1(UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD32 * pi4_temp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,CHROMA_PLANE_ID_T e_chroma_plane)99*c83a76b0SSuyog Pawar UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src,
100*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
101*c83a76b0SSuyog Pawar WORD32 *pi4_temp,
102*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
103*c83a76b0SSuyog Pawar WORD32 src_strd,
104*c83a76b0SSuyog Pawar WORD32 pred_strd,
105*c83a76b0SSuyog Pawar WORD32 dst_strd,
106*c83a76b0SSuyog Pawar CHROMA_PLANE_ID_T e_chroma_plane)
107*c83a76b0SSuyog Pawar {
108*c83a76b0SSuyog Pawar WORD32 i, c[4];
109*c83a76b0SSuyog Pawar WORD32 add, shift;
110*c83a76b0SSuyog Pawar WORD32 trans_size;
111*c83a76b0SSuyog Pawar WORD32 *pi4_tmp_orig;
112*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
113*c83a76b0SSuyog Pawar UWORD32 u4_blk_sad = 0;
114*c83a76b0SSuyog Pawar UNUSED(e_chroma_plane);
115*c83a76b0SSuyog Pawar
116*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
117*c83a76b0SSuyog Pawar pi4_tmp_orig = pi4_temp;
118*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_4;
119*c83a76b0SSuyog Pawar
120*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
121*c83a76b0SSuyog Pawar shift = 1; // log2(iWidth) - 1 + g_uiBitIncrement
122*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
123*c83a76b0SSuyog Pawar
124*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
125*c83a76b0SSuyog Pawar {
126*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2, resi_tmp_3;
127*c83a76b0SSuyog Pawar
128*c83a76b0SSuyog Pawar // Intermediate Variables
129*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[0] - pu1_pred[0];
130*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[3] - pu1_pred[3];
131*c83a76b0SSuyog Pawar c[0] = resi_tmp_1 + resi_tmp_2;
132*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
133*c83a76b0SSuyog Pawar
134*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[1] - pu1_pred[1];
135*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[3] - pu1_pred[3];
136*c83a76b0SSuyog Pawar c[1] = resi_tmp_1 + resi_tmp_2;
137*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1);
138*c83a76b0SSuyog Pawar
139*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[0] - pu1_pred[0];
140*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[1] - pu1_pred[1];
141*c83a76b0SSuyog Pawar c[2] = resi_tmp_1 - resi_tmp_2;
142*c83a76b0SSuyog Pawar
143*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[2] - pu1_pred[2];
144*c83a76b0SSuyog Pawar c[3] = 74 * resi_tmp_1;
145*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1);
146*c83a76b0SSuyog Pawar
147*c83a76b0SSuyog Pawar pi4_temp[0] = (29 * c[0] + 55 * c[1] + c[3] + add) >> shift;
148*c83a76b0SSuyog Pawar
149*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[0] - pu1_pred[0];
150*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[1] - pu1_pred[1];
151*c83a76b0SSuyog Pawar resi_tmp_3 = pu1_src[3] - pu1_pred[3];
152*c83a76b0SSuyog Pawar pi4_temp[trans_size] =
153*c83a76b0SSuyog Pawar (74 * (resi_tmp_1 + resi_tmp_2 - resi_tmp_3) + add)
154*c83a76b0SSuyog Pawar >> shift;
155*c83a76b0SSuyog Pawar pi4_temp[2 * trans_size] = (29 * c[2] + 55 * c[0] - c[3] + add) >> shift;
156*c83a76b0SSuyog Pawar pi4_temp[3 * trans_size] = (55 * c[2] - 29 * c[1] + c[3] + add) >> shift;
157*c83a76b0SSuyog Pawar
158*c83a76b0SSuyog Pawar pu1_src += src_strd;
159*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
160*c83a76b0SSuyog Pawar pi4_temp++;
161*c83a76b0SSuyog Pawar }
162*c83a76b0SSuyog Pawar
163*c83a76b0SSuyog Pawar pi4_temp = pi4_tmp_orig;
164*c83a76b0SSuyog Pawar
165*c83a76b0SSuyog Pawar /* Forward transform 2nd stage */
166*c83a76b0SSuyog Pawar shift = 8; // log2(iHeight) + 6
167*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
168*c83a76b0SSuyog Pawar
169*c83a76b0SSuyog Pawar for(i = 0; i < TRANS_SIZE_4; i++)
170*c83a76b0SSuyog Pawar {
171*c83a76b0SSuyog Pawar // Intermediate Variables
172*c83a76b0SSuyog Pawar c[0] = pi4_temp[0] + pi4_temp[3];
173*c83a76b0SSuyog Pawar c[1] = pi4_temp[1] + pi4_temp[3];
174*c83a76b0SSuyog Pawar c[2] = pi4_temp[0] - pi4_temp[1];
175*c83a76b0SSuyog Pawar c[3] = 74 * pi4_temp[2];
176*c83a76b0SSuyog Pawar
177*c83a76b0SSuyog Pawar pi2_dst[0] = (29 * c[0] + 55 * c[1] + c[3] + add) >> shift;
178*c83a76b0SSuyog Pawar pi2_dst[dst_strd] = (74 * (pi4_temp[0] + pi4_temp[1] - pi4_temp[3]) + add)
179*c83a76b0SSuyog Pawar >> shift;
180*c83a76b0SSuyog Pawar pi2_dst[2 * dst_strd] = (29 * c[2] + 55 * c[0] - c[3] + add) >> shift;
181*c83a76b0SSuyog Pawar pi2_dst[3 * dst_strd] = (55 * c[2] - 29 * c[1] + c[3] + add) >> shift;
182*c83a76b0SSuyog Pawar
183*c83a76b0SSuyog Pawar pi4_temp += trans_size;
184*c83a76b0SSuyog Pawar pi2_dst++;
185*c83a76b0SSuyog Pawar }
186*c83a76b0SSuyog Pawar
187*c83a76b0SSuyog Pawar return u4_blk_sad;
188*c83a76b0SSuyog Pawar }
189*c83a76b0SSuyog Pawar
190*c83a76b0SSuyog Pawar /**
191*c83a76b0SSuyog Pawar *******************************************************************************
192*c83a76b0SSuyog Pawar *
193*c83a76b0SSuyog Pawar * @brief
194*c83a76b0SSuyog Pawar * This function performs residue calculation and forward transform on
195*c83a76b0SSuyog Pawar * input pixels
196*c83a76b0SSuyog Pawar *
197*c83a76b0SSuyog Pawar * @par Description:
198*c83a76b0SSuyog Pawar * Performs residue calculation by subtracting source and prediction and
199*c83a76b0SSuyog Pawar * followed by forward transform
200*c83a76b0SSuyog Pawar *
201*c83a76b0SSuyog Pawar * @param[in] pu1_src
202*c83a76b0SSuyog Pawar * Input 4x4 pixels
203*c83a76b0SSuyog Pawar *
204*c83a76b0SSuyog Pawar * @param[in] pu1_pred
205*c83a76b0SSuyog Pawar * Prediction data
206*c83a76b0SSuyog Pawar *
207*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
208*c83a76b0SSuyog Pawar * Temporary buffer of size 4x4
209*c83a76b0SSuyog Pawar *
210*c83a76b0SSuyog Pawar * @param[out] pi2_dst
211*c83a76b0SSuyog Pawar * Output 4x4 coefficients
212*c83a76b0SSuyog Pawar *
213*c83a76b0SSuyog Pawar * @param[in] src_strd
214*c83a76b0SSuyog Pawar * Input stride
215*c83a76b0SSuyog Pawar *
216*c83a76b0SSuyog Pawar * @param[in] pred_strd
217*c83a76b0SSuyog Pawar * Prediction Stride
218*c83a76b0SSuyog Pawar *
219*c83a76b0SSuyog Pawar * @param[in] dst_strd
220*c83a76b0SSuyog Pawar * Output Stride
221*c83a76b0SSuyog Pawar *
222*c83a76b0SSuyog Pawar * @param[in] e_chroma_plane
223*c83a76b0SSuyog Pawar * Enum singalling chroma plane
224*c83a76b0SSuyog Pawar *
225*c83a76b0SSuyog Pawar * @returns Void
226*c83a76b0SSuyog Pawar *
227*c83a76b0SSuyog Pawar * @remarks
228*c83a76b0SSuyog Pawar * None
229*c83a76b0SSuyog Pawar *
230*c83a76b0SSuyog Pawar *******************************************************************************
231*c83a76b0SSuyog Pawar */
232*c83a76b0SSuyog Pawar
ihevc_resi_trans_4x4(UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD32 * pi4_temp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,CHROMA_PLANE_ID_T e_chroma_plane)233*c83a76b0SSuyog Pawar UWORD32 ihevc_resi_trans_4x4(UWORD8 *pu1_src,
234*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
235*c83a76b0SSuyog Pawar WORD32 *pi4_temp,
236*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
237*c83a76b0SSuyog Pawar WORD32 src_strd,
238*c83a76b0SSuyog Pawar WORD32 pred_strd,
239*c83a76b0SSuyog Pawar WORD32 dst_strd,
240*c83a76b0SSuyog Pawar CHROMA_PLANE_ID_T e_chroma_plane)
241*c83a76b0SSuyog Pawar {
242*c83a76b0SSuyog Pawar WORD32 i;
243*c83a76b0SSuyog Pawar WORD32 e[2], o[2];
244*c83a76b0SSuyog Pawar WORD32 add, shift;
245*c83a76b0SSuyog Pawar WORD32 trans_size;
246*c83a76b0SSuyog Pawar WORD32 *pi4_tmp_orig;
247*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
248*c83a76b0SSuyog Pawar UWORD32 u4_blk_sad=0;
249*c83a76b0SSuyog Pawar WORD32 chroma_flag = 0;
250*c83a76b0SSuyog Pawar
251*c83a76b0SSuyog Pawar if (e_chroma_plane != NULL_PLANE)
252*c83a76b0SSuyog Pawar {
253*c83a76b0SSuyog Pawar chroma_flag = 1;
254*c83a76b0SSuyog Pawar pu1_src += e_chroma_plane;
255*c83a76b0SSuyog Pawar pu1_pred += e_chroma_plane;
256*c83a76b0SSuyog Pawar }
257*c83a76b0SSuyog Pawar
258*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
259*c83a76b0SSuyog Pawar pi4_tmp_orig = pi4_temp;
260*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_4;
261*c83a76b0SSuyog Pawar
262*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
263*c83a76b0SSuyog Pawar shift = 1; // log2(iWidth) - 1 + g_uiBitIncrement
264*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
265*c83a76b0SSuyog Pawar
266*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
267*c83a76b0SSuyog Pawar {
268*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
269*c83a76b0SSuyog Pawar
270*c83a76b0SSuyog Pawar /* e and o */
271*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[0 + 0*chroma_flag] - pu1_pred[0 + 0*chroma_flag];
272*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[3 + 3*chroma_flag] - pu1_pred[3 + 3*chroma_flag];
273*c83a76b0SSuyog Pawar e[0] = resi_tmp_1 + resi_tmp_2;
274*c83a76b0SSuyog Pawar o[0] = resi_tmp_1 - resi_tmp_2;
275*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1);
276*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_2);
277*c83a76b0SSuyog Pawar
278*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[1 + 1*chroma_flag] - pu1_pred[1 + 1*chroma_flag];
279*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[2 + 2*chroma_flag] - pu1_pred[2 + 2*chroma_flag];
280*c83a76b0SSuyog Pawar e[1] = resi_tmp_1 + resi_tmp_2;
281*c83a76b0SSuyog Pawar o[1] = resi_tmp_1 - resi_tmp_2;
282*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1);
283*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_2);
284*c83a76b0SSuyog Pawar
285*c83a76b0SSuyog Pawar pi4_temp[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
286*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[0][1] * e[1]);// + add) >> shift;
287*c83a76b0SSuyog Pawar pi4_temp[2 * trans_size] = (g_ai2_ihevc_trans_4[2][0] * e[0]
288*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][1] * e[1]);// + add) >> shift;
289*c83a76b0SSuyog Pawar pi4_temp[trans_size] = (g_ai2_ihevc_trans_4[1][0] * o[0]
290*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[1][1] * o[1]);// + add) >> shift;
291*c83a76b0SSuyog Pawar pi4_temp[3 * trans_size] = (g_ai2_ihevc_trans_4[3][0] * o[0]
292*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][1] * o[1]);// + add) >> shift;
293*c83a76b0SSuyog Pawar
294*c83a76b0SSuyog Pawar pu1_src += src_strd;
295*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
296*c83a76b0SSuyog Pawar pi4_temp++;
297*c83a76b0SSuyog Pawar }
298*c83a76b0SSuyog Pawar
299*c83a76b0SSuyog Pawar pi4_temp = pi4_tmp_orig;
300*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
301*c83a76b0SSuyog Pawar shift = 9; // log2(iHeight) + 6
302*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
303*c83a76b0SSuyog Pawar
304*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
305*c83a76b0SSuyog Pawar {
306*c83a76b0SSuyog Pawar
307*c83a76b0SSuyog Pawar /* e and o */
308*c83a76b0SSuyog Pawar e[0] = pi4_temp[0] + pi4_temp[3];
309*c83a76b0SSuyog Pawar o[0] = pi4_temp[0] - pi4_temp[3];
310*c83a76b0SSuyog Pawar e[1] = pi4_temp[1] + pi4_temp[2];
311*c83a76b0SSuyog Pawar o[1] = pi4_temp[1] - pi4_temp[2];
312*c83a76b0SSuyog Pawar
313*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
314*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[0][1] * e[1] + add) >> shift;
315*c83a76b0SSuyog Pawar pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_4[2][0] * e[0]
316*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][1] * e[1] + add) >> shift;
317*c83a76b0SSuyog Pawar pi2_dst[dst_strd] = (g_ai2_ihevc_trans_4[1][0] * o[0]
318*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[1][1] * o[1] + add) >> shift;
319*c83a76b0SSuyog Pawar pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_4[3][0] * o[0]
320*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][1] * o[1] + add) >> shift;
321*c83a76b0SSuyog Pawar
322*c83a76b0SSuyog Pawar pi4_temp += trans_size;
323*c83a76b0SSuyog Pawar pi2_dst++;
324*c83a76b0SSuyog Pawar }
325*c83a76b0SSuyog Pawar
326*c83a76b0SSuyog Pawar return u4_blk_sad;
327*c83a76b0SSuyog Pawar }
328*c83a76b0SSuyog Pawar
ihevc_resi_trans_4x4_16bit(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_tmp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd)329*c83a76b0SSuyog Pawar void ihevc_resi_trans_4x4_16bit(WORD16 *pi2_src,
330*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
331*c83a76b0SSuyog Pawar WORD16 *pi2_tmp,
332*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
333*c83a76b0SSuyog Pawar WORD32 src_strd,
334*c83a76b0SSuyog Pawar WORD32 pred_strd,
335*c83a76b0SSuyog Pawar WORD32 dst_strd)
336*c83a76b0SSuyog Pawar {
337*c83a76b0SSuyog Pawar WORD32 i;
338*c83a76b0SSuyog Pawar WORD32 e[2], o[2];
339*c83a76b0SSuyog Pawar WORD32 add, shift;
340*c83a76b0SSuyog Pawar WORD32 trans_size;
341*c83a76b0SSuyog Pawar WORD16 *pi2_tmp_orig;
342*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
343*c83a76b0SSuyog Pawar
344*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
345*c83a76b0SSuyog Pawar pi2_tmp_orig = pi2_tmp;
346*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_4;
347*c83a76b0SSuyog Pawar
348*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
349*c83a76b0SSuyog Pawar shift = 1; // log2(iWidth) - 1 + g_uiBitIncrement
350*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
351*c83a76b0SSuyog Pawar
352*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
353*c83a76b0SSuyog Pawar {
354*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
355*c83a76b0SSuyog Pawar
356*c83a76b0SSuyog Pawar /* e and o */
357*c83a76b0SSuyog Pawar resi_tmp_1 = pi2_src[0] - pu1_pred[0];
358*c83a76b0SSuyog Pawar resi_tmp_2 = pi2_src[3] - pu1_pred[3];
359*c83a76b0SSuyog Pawar e[0] = resi_tmp_1 + resi_tmp_2;
360*c83a76b0SSuyog Pawar o[0] = resi_tmp_1 - resi_tmp_2;
361*c83a76b0SSuyog Pawar
362*c83a76b0SSuyog Pawar resi_tmp_1 = pi2_src[1] - pu1_pred[1];
363*c83a76b0SSuyog Pawar resi_tmp_2 = pi2_src[2] - pu1_pred[2];
364*c83a76b0SSuyog Pawar e[1] = resi_tmp_1 + resi_tmp_2;
365*c83a76b0SSuyog Pawar o[1] = resi_tmp_1 - resi_tmp_2;
366*c83a76b0SSuyog Pawar
367*c83a76b0SSuyog Pawar pi2_tmp[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
368*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[0][1] * e[1] + add) >> shift;
369*c83a76b0SSuyog Pawar pi2_tmp[2 * trans_size] = (g_ai2_ihevc_trans_4[2][0] * e[0]
370*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][1] * e[1] + add) >> shift;
371*c83a76b0SSuyog Pawar pi2_tmp[trans_size] = (g_ai2_ihevc_trans_4[1][0] * o[0]
372*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[1][1] * o[1] + add) >> shift;
373*c83a76b0SSuyog Pawar pi2_tmp[3 * trans_size] = (g_ai2_ihevc_trans_4[3][0] * o[0]
374*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][1] * o[1] + add) >> shift;
375*c83a76b0SSuyog Pawar
376*c83a76b0SSuyog Pawar pi2_src += src_strd;
377*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
378*c83a76b0SSuyog Pawar pi2_tmp++;
379*c83a76b0SSuyog Pawar }
380*c83a76b0SSuyog Pawar
381*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
382*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
383*c83a76b0SSuyog Pawar shift = 8; // log2(iHeight) + 6
384*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
385*c83a76b0SSuyog Pawar
386*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
387*c83a76b0SSuyog Pawar {
388*c83a76b0SSuyog Pawar
389*c83a76b0SSuyog Pawar /* e and o */
390*c83a76b0SSuyog Pawar e[0] = pi2_tmp[0] + pi2_tmp[3];
391*c83a76b0SSuyog Pawar o[0] = pi2_tmp[0] - pi2_tmp[3];
392*c83a76b0SSuyog Pawar e[1] = pi2_tmp[1] + pi2_tmp[2];
393*c83a76b0SSuyog Pawar o[1] = pi2_tmp[1] - pi2_tmp[2];
394*c83a76b0SSuyog Pawar
395*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
396*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[0][1] * e[1] + add) >> shift;
397*c83a76b0SSuyog Pawar pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_4[2][0] * e[0]
398*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[2][1] * e[1] + add) >> shift;
399*c83a76b0SSuyog Pawar pi2_dst[dst_strd] = (g_ai2_ihevc_trans_4[1][0] * o[0]
400*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[1][1] * o[1] + add) >> shift;
401*c83a76b0SSuyog Pawar pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_4[3][0] * o[0]
402*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_4[3][1] * o[1] + add) >> shift;
403*c83a76b0SSuyog Pawar
404*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
405*c83a76b0SSuyog Pawar pi2_dst++;
406*c83a76b0SSuyog Pawar }
407*c83a76b0SSuyog Pawar }
408*c83a76b0SSuyog Pawar /**
409*c83a76b0SSuyog Pawar *******************************************************************************
410*c83a76b0SSuyog Pawar *
411*c83a76b0SSuyog Pawar * @brief
412*c83a76b0SSuyog Pawar * This function performs residue calculation and forward transform on
413*c83a76b0SSuyog Pawar * input pixels
414*c83a76b0SSuyog Pawar *
415*c83a76b0SSuyog Pawar * @par Description:
416*c83a76b0SSuyog Pawar * Performs residue calculation by subtracting source and prediction and
417*c83a76b0SSuyog Pawar * followed by forward transform
418*c83a76b0SSuyog Pawar *
419*c83a76b0SSuyog Pawar * @param[in] pu1_src
420*c83a76b0SSuyog Pawar * Input 8x8 pixels
421*c83a76b0SSuyog Pawar *
422*c83a76b0SSuyog Pawar * @param[in] pu1_pred
423*c83a76b0SSuyog Pawar * Prediction data
424*c83a76b0SSuyog Pawar *
425*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
426*c83a76b0SSuyog Pawar * Temporary buffer of size 8x8
427*c83a76b0SSuyog Pawar *
428*c83a76b0SSuyog Pawar * @param[out] pi2_dst
429*c83a76b0SSuyog Pawar * Output 8x8 coefficients
430*c83a76b0SSuyog Pawar *
431*c83a76b0SSuyog Pawar * @param[in] src_strd
432*c83a76b0SSuyog Pawar * Input stride
433*c83a76b0SSuyog Pawar *
434*c83a76b0SSuyog Pawar * @param[in] pred_strd
435*c83a76b0SSuyog Pawar * Prediction Stride
436*c83a76b0SSuyog Pawar *
437*c83a76b0SSuyog Pawar * @param[in] dst_strd
438*c83a76b0SSuyog Pawar * Output Stride
439*c83a76b0SSuyog Pawar *
440*c83a76b0SSuyog Pawar * @param[in] e_chroma_plane
441*c83a76b0SSuyog Pawar * Enum singalling chroma plane
442*c83a76b0SSuyog Pawar *
443*c83a76b0SSuyog Pawar * @returns Void
444*c83a76b0SSuyog Pawar *
445*c83a76b0SSuyog Pawar * @remarks
446*c83a76b0SSuyog Pawar * None
447*c83a76b0SSuyog Pawar *
448*c83a76b0SSuyog Pawar *******************************************************************************
449*c83a76b0SSuyog Pawar */
450*c83a76b0SSuyog Pawar
ihevc_resi_trans_8x8(UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD32 * pi4_temp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,CHROMA_PLANE_ID_T e_chroma_plane)451*c83a76b0SSuyog Pawar UWORD32 ihevc_resi_trans_8x8(UWORD8 *pu1_src,
452*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
453*c83a76b0SSuyog Pawar WORD32 *pi4_temp,
454*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
455*c83a76b0SSuyog Pawar WORD32 src_strd,
456*c83a76b0SSuyog Pawar WORD32 pred_strd,
457*c83a76b0SSuyog Pawar WORD32 dst_strd,
458*c83a76b0SSuyog Pawar CHROMA_PLANE_ID_T e_chroma_plane)
459*c83a76b0SSuyog Pawar {
460*c83a76b0SSuyog Pawar WORD32 i, k;
461*c83a76b0SSuyog Pawar WORD32 e[4], o[4];
462*c83a76b0SSuyog Pawar WORD32 ee[2], eo[2];
463*c83a76b0SSuyog Pawar WORD32 add, shift;
464*c83a76b0SSuyog Pawar WORD32 trans_size;
465*c83a76b0SSuyog Pawar WORD32 *pi4_tmp_orig;
466*c83a76b0SSuyog Pawar // WORD16 *pi2_tmp;
467*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
468*c83a76b0SSuyog Pawar UWORD32 u4_blk_sad=0;
469*c83a76b0SSuyog Pawar WORD32 chroma_flag = 0;
470*c83a76b0SSuyog Pawar
471*c83a76b0SSuyog Pawar if (e_chroma_plane != NULL_PLANE)
472*c83a76b0SSuyog Pawar {
473*c83a76b0SSuyog Pawar chroma_flag = 1;
474*c83a76b0SSuyog Pawar pu1_src += e_chroma_plane;
475*c83a76b0SSuyog Pawar pu1_pred += e_chroma_plane;
476*c83a76b0SSuyog Pawar }
477*c83a76b0SSuyog Pawar
478*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
479*c83a76b0SSuyog Pawar pi4_tmp_orig = pi4_temp;
480*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_8;
481*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
482*c83a76b0SSuyog Pawar shift = 2; // log2(iWidth) - 1 + g_uiBitIncrement
483*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
484*c83a76b0SSuyog Pawar
485*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
486*c83a76b0SSuyog Pawar {
487*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
488*c83a76b0SSuyog Pawar
489*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
490*c83a76b0SSuyog Pawar /* e and o*/
491*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
492*c83a76b0SSuyog Pawar {
493*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[k*(1+chroma_flag)] - pu1_pred[k*(1+chroma_flag)];
494*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[(7-k)*(1+chroma_flag)] - pu1_pred[(7-k)*(1+chroma_flag)];
495*c83a76b0SSuyog Pawar e[k] = resi_tmp_1 + resi_tmp_2;
496*c83a76b0SSuyog Pawar o[k] = resi_tmp_1 - resi_tmp_2;
497*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
498*c83a76b0SSuyog Pawar }
499*c83a76b0SSuyog Pawar /* ee and eo */
500*c83a76b0SSuyog Pawar ee[0] = e[0] + e[3];
501*c83a76b0SSuyog Pawar eo[0] = e[0] - e[3];
502*c83a76b0SSuyog Pawar ee[1] = e[1] + e[2];
503*c83a76b0SSuyog Pawar eo[1] = e[1] - e[2];
504*c83a76b0SSuyog Pawar
505*c83a76b0SSuyog Pawar pi4_temp[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
506*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[0][1] * ee[1]);// + add) >> shift;
507*c83a76b0SSuyog Pawar pi4_temp[4 * trans_size] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
508*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][1] * ee[1]);// + add) >> shift;
509*c83a76b0SSuyog Pawar pi4_temp[2 * trans_size] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
510*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[2][1] * eo[1]);// + add) >> shift;
511*c83a76b0SSuyog Pawar pi4_temp[6 * trans_size] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
512*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][1] * eo[1]);// + add) >> shift;
513*c83a76b0SSuyog Pawar
514*c83a76b0SSuyog Pawar pi4_temp[trans_size] = (g_ai2_ihevc_trans_8[1][0] * o[0]
515*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][1] * o[1]
516*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][2] * o[2]
517*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][3] * o[3]);// + add) >> shift;
518*c83a76b0SSuyog Pawar pi4_temp[3 * trans_size] = (g_ai2_ihevc_trans_8[3][0] * o[0]
519*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][1] * o[1]
520*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][2] * o[2]
521*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][3] * o[3]);// + add) >> shift;
522*c83a76b0SSuyog Pawar pi4_temp[5 * trans_size] = (g_ai2_ihevc_trans_8[5][0] * o[0]
523*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][1] * o[1]
524*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][2] * o[2]
525*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][3] * o[3]);// + add) >> shift;
526*c83a76b0SSuyog Pawar pi4_temp[7 * trans_size] = (g_ai2_ihevc_trans_8[7][0] * o[0]
527*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][1] * o[1]
528*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][2] * o[2]
529*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][3] * o[3]);// + add) >> shift;
530*c83a76b0SSuyog Pawar
531*c83a76b0SSuyog Pawar pu1_src += src_strd;
532*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
533*c83a76b0SSuyog Pawar pi4_temp++;
534*c83a76b0SSuyog Pawar }
535*c83a76b0SSuyog Pawar
536*c83a76b0SSuyog Pawar pi4_temp = pi4_tmp_orig;
537*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
538*c83a76b0SSuyog Pawar shift = 11; // log2(iHeight) + 6
539*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
540*c83a76b0SSuyog Pawar
541*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
542*c83a76b0SSuyog Pawar {
543*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
544*c83a76b0SSuyog Pawar /* e and o*/
545*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
546*c83a76b0SSuyog Pawar {
547*c83a76b0SSuyog Pawar e[k] = pi4_temp[k] + pi4_temp[7 - k];
548*c83a76b0SSuyog Pawar o[k] = pi4_temp[k] - pi4_temp[7 - k];
549*c83a76b0SSuyog Pawar }
550*c83a76b0SSuyog Pawar /* ee and eo */
551*c83a76b0SSuyog Pawar ee[0] = e[0] + e[3];
552*c83a76b0SSuyog Pawar eo[0] = e[0] - e[3];
553*c83a76b0SSuyog Pawar ee[1] = e[1] + e[2];
554*c83a76b0SSuyog Pawar eo[1] = e[1] - e[2];
555*c83a76b0SSuyog Pawar
556*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
557*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[0][1] * ee[1] + add) >> shift;
558*c83a76b0SSuyog Pawar pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
559*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][1] * ee[1] + add) >> shift;
560*c83a76b0SSuyog Pawar pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
561*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[2][1] * eo[1] + add) >> shift;
562*c83a76b0SSuyog Pawar pi2_dst[6 * dst_strd] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
563*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][1] * eo[1] + add) >> shift;
564*c83a76b0SSuyog Pawar
565*c83a76b0SSuyog Pawar pi2_dst[dst_strd] = (g_ai2_ihevc_trans_8[1][0] * o[0]
566*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][1] * o[1]
567*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][2] * o[2]
568*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][3] * o[3] + add) >> shift;
569*c83a76b0SSuyog Pawar pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_8[3][0] * o[0]
570*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][1] * o[1]
571*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][2] * o[2]
572*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][3] * o[3] + add) >> shift;
573*c83a76b0SSuyog Pawar pi2_dst[5 * dst_strd] = (g_ai2_ihevc_trans_8[5][0] * o[0]
574*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][1] * o[1]
575*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][2] * o[2]
576*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][3] * o[3] + add) >> shift;
577*c83a76b0SSuyog Pawar pi2_dst[7 * dst_strd] = (g_ai2_ihevc_trans_8[7][0] * o[0]
578*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][1] * o[1]
579*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][2] * o[2]
580*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][3] * o[3] + add) >> shift;
581*c83a76b0SSuyog Pawar
582*c83a76b0SSuyog Pawar pi4_temp += trans_size;
583*c83a76b0SSuyog Pawar pi2_dst++;
584*c83a76b0SSuyog Pawar }
585*c83a76b0SSuyog Pawar
586*c83a76b0SSuyog Pawar return u4_blk_sad;
587*c83a76b0SSuyog Pawar }
588*c83a76b0SSuyog Pawar
ihevc_resi_trans_8x8_16bit(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_tmp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd)589*c83a76b0SSuyog Pawar void ihevc_resi_trans_8x8_16bit(WORD16 *pi2_src,
590*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
591*c83a76b0SSuyog Pawar WORD16 *pi2_tmp,
592*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
593*c83a76b0SSuyog Pawar WORD32 src_strd,
594*c83a76b0SSuyog Pawar WORD32 pred_strd,
595*c83a76b0SSuyog Pawar WORD32 dst_strd)
596*c83a76b0SSuyog Pawar {
597*c83a76b0SSuyog Pawar WORD32 i, k;
598*c83a76b0SSuyog Pawar WORD32 e[4], o[4];
599*c83a76b0SSuyog Pawar WORD32 ee[2], eo[2];
600*c83a76b0SSuyog Pawar WORD32 add, shift;
601*c83a76b0SSuyog Pawar WORD32 trans_size;
602*c83a76b0SSuyog Pawar WORD16 *pi2_tmp_orig;
603*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
604*c83a76b0SSuyog Pawar
605*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
606*c83a76b0SSuyog Pawar pi2_tmp_orig = pi2_tmp;
607*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_8;
608*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
609*c83a76b0SSuyog Pawar shift = 2; // log2(iWidth) - 1 + g_uiBitIncrement
610*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
611*c83a76b0SSuyog Pawar
612*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
613*c83a76b0SSuyog Pawar {
614*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
615*c83a76b0SSuyog Pawar
616*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
617*c83a76b0SSuyog Pawar /* e and o*/
618*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
619*c83a76b0SSuyog Pawar {
620*c83a76b0SSuyog Pawar resi_tmp_1 = pi2_src[k] - pu1_pred[k];
621*c83a76b0SSuyog Pawar resi_tmp_2 = pi2_src[7 - k] - pu1_pred[7 - k];
622*c83a76b0SSuyog Pawar e[k] = resi_tmp_1 + resi_tmp_2;
623*c83a76b0SSuyog Pawar o[k] = resi_tmp_1 - resi_tmp_2;
624*c83a76b0SSuyog Pawar }
625*c83a76b0SSuyog Pawar /* ee and eo */
626*c83a76b0SSuyog Pawar ee[0] = e[0] + e[3];
627*c83a76b0SSuyog Pawar eo[0] = e[0] - e[3];
628*c83a76b0SSuyog Pawar ee[1] = e[1] + e[2];
629*c83a76b0SSuyog Pawar eo[1] = e[1] - e[2];
630*c83a76b0SSuyog Pawar
631*c83a76b0SSuyog Pawar pi2_tmp[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
632*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[0][1] * ee[1] + add) >> shift;
633*c83a76b0SSuyog Pawar pi2_tmp[4 * trans_size] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
634*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][1] * ee[1] + add) >> shift;
635*c83a76b0SSuyog Pawar pi2_tmp[2 * trans_size] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
636*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[2][1] * eo[1] + add) >> shift;
637*c83a76b0SSuyog Pawar pi2_tmp[6 * trans_size] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
638*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][1] * eo[1] + add) >> shift;
639*c83a76b0SSuyog Pawar
640*c83a76b0SSuyog Pawar pi2_tmp[trans_size] = (g_ai2_ihevc_trans_8[1][0] * o[0]
641*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][1] * o[1]
642*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][2] * o[2]
643*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][3] * o[3] + add) >> shift;
644*c83a76b0SSuyog Pawar pi2_tmp[3 * trans_size] = (g_ai2_ihevc_trans_8[3][0] * o[0]
645*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][1] * o[1]
646*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][2] * o[2]
647*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][3] * o[3] + add) >> shift;
648*c83a76b0SSuyog Pawar pi2_tmp[5 * trans_size] = (g_ai2_ihevc_trans_8[5][0] * o[0]
649*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][1] * o[1]
650*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][2] * o[2]
651*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][3] * o[3] + add) >> shift;
652*c83a76b0SSuyog Pawar pi2_tmp[7 * trans_size] = (g_ai2_ihevc_trans_8[7][0] * o[0]
653*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][1] * o[1]
654*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][2] * o[2]
655*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][3] * o[3] + add) >> shift;
656*c83a76b0SSuyog Pawar
657*c83a76b0SSuyog Pawar pi2_src += src_strd;
658*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
659*c83a76b0SSuyog Pawar pi2_tmp++;
660*c83a76b0SSuyog Pawar }
661*c83a76b0SSuyog Pawar
662*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
663*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
664*c83a76b0SSuyog Pawar shift = 9; // log2(iHeight) + 6
665*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
666*c83a76b0SSuyog Pawar
667*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
668*c83a76b0SSuyog Pawar {
669*c83a76b0SSuyog Pawar /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
670*c83a76b0SSuyog Pawar /* e and o*/
671*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
672*c83a76b0SSuyog Pawar {
673*c83a76b0SSuyog Pawar e[k] = pi2_tmp[k] + pi2_tmp[7 - k];
674*c83a76b0SSuyog Pawar o[k] = pi2_tmp[k] - pi2_tmp[7 - k];
675*c83a76b0SSuyog Pawar }
676*c83a76b0SSuyog Pawar /* ee and eo */
677*c83a76b0SSuyog Pawar ee[0] = e[0] + e[3];
678*c83a76b0SSuyog Pawar eo[0] = e[0] - e[3];
679*c83a76b0SSuyog Pawar ee[1] = e[1] + e[2];
680*c83a76b0SSuyog Pawar eo[1] = e[1] - e[2];
681*c83a76b0SSuyog Pawar
682*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
683*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[0][1] * ee[1] + add) >> shift;
684*c83a76b0SSuyog Pawar pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
685*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[4][1] * ee[1] + add) >> shift;
686*c83a76b0SSuyog Pawar pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
687*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[2][1] * eo[1] + add) >> shift;
688*c83a76b0SSuyog Pawar pi2_dst[6 * dst_strd] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
689*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[6][1] * eo[1] + add) >> shift;
690*c83a76b0SSuyog Pawar
691*c83a76b0SSuyog Pawar pi2_dst[dst_strd] = (g_ai2_ihevc_trans_8[1][0] * o[0]
692*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][1] * o[1]
693*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][2] * o[2]
694*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[1][3] * o[3] + add) >> shift;
695*c83a76b0SSuyog Pawar pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_8[3][0] * o[0]
696*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][1] * o[1]
697*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][2] * o[2]
698*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[3][3] * o[3] + add) >> shift;
699*c83a76b0SSuyog Pawar pi2_dst[5 * dst_strd] = (g_ai2_ihevc_trans_8[5][0] * o[0]
700*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][1] * o[1]
701*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][2] * o[2]
702*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[5][3] * o[3] + add) >> shift;
703*c83a76b0SSuyog Pawar pi2_dst[7 * dst_strd] = (g_ai2_ihevc_trans_8[7][0] * o[0]
704*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][1] * o[1]
705*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][2] * o[2]
706*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_8[7][3] * o[3] + add) >> shift;
707*c83a76b0SSuyog Pawar
708*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
709*c83a76b0SSuyog Pawar pi2_dst++;
710*c83a76b0SSuyog Pawar }
711*c83a76b0SSuyog Pawar }
712*c83a76b0SSuyog Pawar /**
713*c83a76b0SSuyog Pawar *******************************************************************************
714*c83a76b0SSuyog Pawar *
715*c83a76b0SSuyog Pawar * @brief
716*c83a76b0SSuyog Pawar * This function performs residue calculation and forward transform on
717*c83a76b0SSuyog Pawar * input pixels
718*c83a76b0SSuyog Pawar *
719*c83a76b0SSuyog Pawar * @par Description:
720*c83a76b0SSuyog Pawar * Performs residue calculation by subtracting source and prediction and
721*c83a76b0SSuyog Pawar * followed by forward transform
722*c83a76b0SSuyog Pawar *
723*c83a76b0SSuyog Pawar * @param[in] pu1_src
724*c83a76b0SSuyog Pawar * Input 16x16 pixels
725*c83a76b0SSuyog Pawar *
726*c83a76b0SSuyog Pawar * @param[in] pu1_pred
727*c83a76b0SSuyog Pawar * Prediction data
728*c83a76b0SSuyog Pawar *
729*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
730*c83a76b0SSuyog Pawar * Temporary buffer of size 16x16
731*c83a76b0SSuyog Pawar *
732*c83a76b0SSuyog Pawar * @param[out] pi2_dst
733*c83a76b0SSuyog Pawar * Output 16x16 coefficients
734*c83a76b0SSuyog Pawar *
735*c83a76b0SSuyog Pawar * @param[in] src_strd
736*c83a76b0SSuyog Pawar * Input stride
737*c83a76b0SSuyog Pawar *
738*c83a76b0SSuyog Pawar * @param[in] pred_strd
739*c83a76b0SSuyog Pawar * Prediction Stride
740*c83a76b0SSuyog Pawar *
741*c83a76b0SSuyog Pawar * @param[in] dst_strd
742*c83a76b0SSuyog Pawar * Output Stride
743*c83a76b0SSuyog Pawar *
744*c83a76b0SSuyog Pawar * @param[in] e_chroma_plane
745*c83a76b0SSuyog Pawar * Enum singalling chroma plane
746*c83a76b0SSuyog Pawar *
747*c83a76b0SSuyog Pawar * @returns Void
748*c83a76b0SSuyog Pawar *
749*c83a76b0SSuyog Pawar * @remarks
750*c83a76b0SSuyog Pawar * None
751*c83a76b0SSuyog Pawar *
752*c83a76b0SSuyog Pawar *******************************************************************************
753*c83a76b0SSuyog Pawar */
754*c83a76b0SSuyog Pawar
ihevc_resi_trans_16x16(UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD32 * pi4_temp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,CHROMA_PLANE_ID_T e_chroma_plane)755*c83a76b0SSuyog Pawar UWORD32 ihevc_resi_trans_16x16(UWORD8 *pu1_src,
756*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
757*c83a76b0SSuyog Pawar WORD32 *pi4_temp,
758*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
759*c83a76b0SSuyog Pawar WORD32 src_strd,
760*c83a76b0SSuyog Pawar WORD32 pred_strd,
761*c83a76b0SSuyog Pawar WORD32 dst_strd,
762*c83a76b0SSuyog Pawar CHROMA_PLANE_ID_T e_chroma_plane)
763*c83a76b0SSuyog Pawar {
764*c83a76b0SSuyog Pawar WORD32 i, k;
765*c83a76b0SSuyog Pawar WORD32 e[8], o[8];
766*c83a76b0SSuyog Pawar WORD32 ee[4], eo[4];
767*c83a76b0SSuyog Pawar WORD32 eee[2], eeo[2];
768*c83a76b0SSuyog Pawar WORD32 add, shift;
769*c83a76b0SSuyog Pawar WORD32 trans_size;
770*c83a76b0SSuyog Pawar WORD32 *pi4_tmp_orig;
771*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
772*c83a76b0SSuyog Pawar UWORD32 u4_blk_sad = 0;
773*c83a76b0SSuyog Pawar WORD32 chroma_flag = 0;
774*c83a76b0SSuyog Pawar
775*c83a76b0SSuyog Pawar if (e_chroma_plane != NULL_PLANE)
776*c83a76b0SSuyog Pawar {
777*c83a76b0SSuyog Pawar chroma_flag = 1;
778*c83a76b0SSuyog Pawar pu1_src += e_chroma_plane;
779*c83a76b0SSuyog Pawar pu1_pred += e_chroma_plane;
780*c83a76b0SSuyog Pawar }
781*c83a76b0SSuyog Pawar
782*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
783*c83a76b0SSuyog Pawar pi4_tmp_orig = pi4_temp;
784*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_16;
785*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
786*c83a76b0SSuyog Pawar shift = 3; // log2(iWidth) - 1 + g_uiBitIncrement
787*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
788*c83a76b0SSuyog Pawar
789*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
790*c83a76b0SSuyog Pawar {
791*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
792*c83a76b0SSuyog Pawar /* e and o*/
793*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
794*c83a76b0SSuyog Pawar {
795*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[k*(1+chroma_flag)] - pu1_pred[k*(1+chroma_flag)];
796*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[(15-k)*(1+chroma_flag)] - pu1_pred[(15-k)*(1+chroma_flag)];
797*c83a76b0SSuyog Pawar e[k] = resi_tmp_1 + resi_tmp_2;
798*c83a76b0SSuyog Pawar o[k] = resi_tmp_1 - resi_tmp_2;
799*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
800*c83a76b0SSuyog Pawar }
801*c83a76b0SSuyog Pawar /* ee and eo */
802*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
803*c83a76b0SSuyog Pawar {
804*c83a76b0SSuyog Pawar ee[k] = e[k] + e[7 - k];
805*c83a76b0SSuyog Pawar eo[k] = e[k] - e[7 - k];
806*c83a76b0SSuyog Pawar }
807*c83a76b0SSuyog Pawar /* eee and eeo */
808*c83a76b0SSuyog Pawar eee[0] = ee[0] + ee[3];
809*c83a76b0SSuyog Pawar eeo[0] = ee[0] - ee[3];
810*c83a76b0SSuyog Pawar eee[1] = ee[1] + ee[2];
811*c83a76b0SSuyog Pawar eeo[1] = ee[1] - ee[2];
812*c83a76b0SSuyog Pawar
813*c83a76b0SSuyog Pawar pi4_temp[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
814*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[0][1] * eee[1]);// + add) >> shift;
815*c83a76b0SSuyog Pawar pi4_temp[8 * trans_size] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
816*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[8][1] * eee[1]);// + add) >> shift;
817*c83a76b0SSuyog Pawar pi4_temp[4 * trans_size] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
818*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[4][1] * eeo[1]);// + add) >> shift;
819*c83a76b0SSuyog Pawar pi4_temp[12 * trans_size] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
820*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[12][1] * eeo[1]);// + add) >> shift;
821*c83a76b0SSuyog Pawar
822*c83a76b0SSuyog Pawar for(k = 2; k < 16; k += 4)
823*c83a76b0SSuyog Pawar {
824*c83a76b0SSuyog Pawar pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
825*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * eo[1]
826*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * eo[2]
827*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * eo[3]);// + add)>> shift;
828*c83a76b0SSuyog Pawar
829*c83a76b0SSuyog Pawar }
830*c83a76b0SSuyog Pawar
831*c83a76b0SSuyog Pawar for(k = 1; k < 16; k += 2)
832*c83a76b0SSuyog Pawar {
833*c83a76b0SSuyog Pawar pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * o[0]
834*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * o[1]
835*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * o[2]
836*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * o[3]
837*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][4] * o[4]
838*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][5] * o[5]
839*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][6] * o[6]
840*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][7] * o[7]);// + add) >> shift;
841*c83a76b0SSuyog Pawar }
842*c83a76b0SSuyog Pawar pu1_src += src_strd;
843*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
844*c83a76b0SSuyog Pawar pi4_temp++;
845*c83a76b0SSuyog Pawar }
846*c83a76b0SSuyog Pawar
847*c83a76b0SSuyog Pawar pi4_temp = pi4_tmp_orig;
848*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
849*c83a76b0SSuyog Pawar shift = 13; // log2(iHeight) + 6
850*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
851*c83a76b0SSuyog Pawar
852*c83a76b0SSuyog Pawar for(i = 0; i < TRANS_SIZE_16; i++)
853*c83a76b0SSuyog Pawar {
854*c83a76b0SSuyog Pawar /* e and o*/
855*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
856*c83a76b0SSuyog Pawar {
857*c83a76b0SSuyog Pawar e[k] = pi4_temp[k] + pi4_temp[15 - k];
858*c83a76b0SSuyog Pawar o[k] = pi4_temp[k] - pi4_temp[15 - k];
859*c83a76b0SSuyog Pawar }
860*c83a76b0SSuyog Pawar /* ee and eo */
861*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
862*c83a76b0SSuyog Pawar {
863*c83a76b0SSuyog Pawar ee[k] = e[k] + e[7 - k];
864*c83a76b0SSuyog Pawar eo[k] = e[k] - e[7 - k];
865*c83a76b0SSuyog Pawar }
866*c83a76b0SSuyog Pawar /* eee and eeo */
867*c83a76b0SSuyog Pawar eee[0] = ee[0] + ee[3];
868*c83a76b0SSuyog Pawar eeo[0] = ee[0] - ee[3];
869*c83a76b0SSuyog Pawar eee[1] = ee[1] + ee[2];
870*c83a76b0SSuyog Pawar eeo[1] = ee[1] - ee[2];
871*c83a76b0SSuyog Pawar
872*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
873*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[0][1] * eee[1] + add) >> shift;
874*c83a76b0SSuyog Pawar pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
875*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[8][1] * eee[1] + add) >> shift;
876*c83a76b0SSuyog Pawar pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
877*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[4][1] * eeo[1] + add) >> shift;
878*c83a76b0SSuyog Pawar pi2_dst[12 * dst_strd] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
879*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[12][1] * eeo[1] + add) >> shift;
880*c83a76b0SSuyog Pawar
881*c83a76b0SSuyog Pawar for(k = 2; k < 16; k += 4)
882*c83a76b0SSuyog Pawar {
883*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
884*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * eo[1]
885*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * eo[2]
886*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * eo[3] + add)
887*c83a76b0SSuyog Pawar >> shift;
888*c83a76b0SSuyog Pawar }
889*c83a76b0SSuyog Pawar
890*c83a76b0SSuyog Pawar for(k = 1; k < 16; k += 2)
891*c83a76b0SSuyog Pawar {
892*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * o[0]
893*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * o[1]
894*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * o[2]
895*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * o[3]
896*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][4] * o[4]
897*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][5] * o[5]
898*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][6] * o[6]
899*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][7] * o[7] + add) >> shift;
900*c83a76b0SSuyog Pawar }
901*c83a76b0SSuyog Pawar
902*c83a76b0SSuyog Pawar pi4_temp += trans_size;
903*c83a76b0SSuyog Pawar pi2_dst++;
904*c83a76b0SSuyog Pawar }
905*c83a76b0SSuyog Pawar
906*c83a76b0SSuyog Pawar return u4_blk_sad;
907*c83a76b0SSuyog Pawar }
908*c83a76b0SSuyog Pawar
909*c83a76b0SSuyog Pawar
ihevc_resi_trans_16x16_16bit(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_tmp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd)910*c83a76b0SSuyog Pawar void ihevc_resi_trans_16x16_16bit(WORD16 *pi2_src,
911*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
912*c83a76b0SSuyog Pawar WORD16 *pi2_tmp,
913*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
914*c83a76b0SSuyog Pawar WORD32 src_strd,
915*c83a76b0SSuyog Pawar WORD32 pred_strd,
916*c83a76b0SSuyog Pawar WORD32 dst_strd)
917*c83a76b0SSuyog Pawar {
918*c83a76b0SSuyog Pawar WORD32 i, k;
919*c83a76b0SSuyog Pawar WORD32 e[8], o[8];
920*c83a76b0SSuyog Pawar WORD32 ee[4], eo[4];
921*c83a76b0SSuyog Pawar WORD32 eee[2], eeo[2];
922*c83a76b0SSuyog Pawar WORD32 add, shift;
923*c83a76b0SSuyog Pawar WORD32 trans_size;
924*c83a76b0SSuyog Pawar WORD16 *pi2_tmp_orig;
925*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
926*c83a76b0SSuyog Pawar
927*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
928*c83a76b0SSuyog Pawar pi2_tmp_orig = pi2_tmp;
929*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_16;
930*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
931*c83a76b0SSuyog Pawar shift = 3; // log2(iWidth) - 1 + g_uiBitIncrement
932*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
933*c83a76b0SSuyog Pawar
934*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
935*c83a76b0SSuyog Pawar {
936*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
937*c83a76b0SSuyog Pawar /* e and o*/
938*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
939*c83a76b0SSuyog Pawar {
940*c83a76b0SSuyog Pawar resi_tmp_1 = pi2_src[k] - pu1_pred[k];
941*c83a76b0SSuyog Pawar resi_tmp_2 = pi2_src[15 - k] - pu1_pred[15 - k];
942*c83a76b0SSuyog Pawar e[k] = resi_tmp_1 + resi_tmp_2;
943*c83a76b0SSuyog Pawar o[k] = resi_tmp_1 - resi_tmp_2;
944*c83a76b0SSuyog Pawar }
945*c83a76b0SSuyog Pawar /* ee and eo */
946*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
947*c83a76b0SSuyog Pawar {
948*c83a76b0SSuyog Pawar ee[k] = e[k] + e[7 - k];
949*c83a76b0SSuyog Pawar eo[k] = e[k] - e[7 - k];
950*c83a76b0SSuyog Pawar }
951*c83a76b0SSuyog Pawar /* eee and eeo */
952*c83a76b0SSuyog Pawar eee[0] = ee[0] + ee[3];
953*c83a76b0SSuyog Pawar eeo[0] = ee[0] - ee[3];
954*c83a76b0SSuyog Pawar eee[1] = ee[1] + ee[2];
955*c83a76b0SSuyog Pawar eeo[1] = ee[1] - ee[2];
956*c83a76b0SSuyog Pawar
957*c83a76b0SSuyog Pawar pi2_tmp[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
958*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[0][1] * eee[1] + add) >> shift;
959*c83a76b0SSuyog Pawar pi2_tmp[8 * trans_size] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
960*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[8][1] * eee[1] + add) >> shift;
961*c83a76b0SSuyog Pawar pi2_tmp[4 * trans_size] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
962*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[4][1] * eeo[1] + add) >> shift;
963*c83a76b0SSuyog Pawar pi2_tmp[12 * trans_size] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
964*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[12][1] * eeo[1] + add) >> shift;
965*c83a76b0SSuyog Pawar
966*c83a76b0SSuyog Pawar for(k = 2; k < 16; k += 4)
967*c83a76b0SSuyog Pawar {
968*c83a76b0SSuyog Pawar pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
969*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * eo[1]
970*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * eo[2]
971*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * eo[3] + add)
972*c83a76b0SSuyog Pawar >> shift;
973*c83a76b0SSuyog Pawar }
974*c83a76b0SSuyog Pawar
975*c83a76b0SSuyog Pawar for(k = 1; k < 16; k += 2)
976*c83a76b0SSuyog Pawar {
977*c83a76b0SSuyog Pawar pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * o[0]
978*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * o[1]
979*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * o[2]
980*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * o[3]
981*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][4] * o[4]
982*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][5] * o[5]
983*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][6] * o[6]
984*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][7] * o[7] + add) >> shift;
985*c83a76b0SSuyog Pawar }
986*c83a76b0SSuyog Pawar pi2_src += src_strd;
987*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
988*c83a76b0SSuyog Pawar pi2_tmp++;
989*c83a76b0SSuyog Pawar }
990*c83a76b0SSuyog Pawar
991*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
992*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
993*c83a76b0SSuyog Pawar shift = 10; // log2(iHeight) + 6
994*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
995*c83a76b0SSuyog Pawar
996*c83a76b0SSuyog Pawar for(i = 0; i < TRANS_SIZE_16; i++)
997*c83a76b0SSuyog Pawar {
998*c83a76b0SSuyog Pawar /* e and o*/
999*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
1000*c83a76b0SSuyog Pawar {
1001*c83a76b0SSuyog Pawar e[k] = pi2_tmp[k] + pi2_tmp[15 - k];
1002*c83a76b0SSuyog Pawar o[k] = pi2_tmp[k] - pi2_tmp[15 - k];
1003*c83a76b0SSuyog Pawar }
1004*c83a76b0SSuyog Pawar /* ee and eo */
1005*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
1006*c83a76b0SSuyog Pawar {
1007*c83a76b0SSuyog Pawar ee[k] = e[k] + e[7 - k];
1008*c83a76b0SSuyog Pawar eo[k] = e[k] - e[7 - k];
1009*c83a76b0SSuyog Pawar }
1010*c83a76b0SSuyog Pawar /* eee and eeo */
1011*c83a76b0SSuyog Pawar eee[0] = ee[0] + ee[3];
1012*c83a76b0SSuyog Pawar eeo[0] = ee[0] - ee[3];
1013*c83a76b0SSuyog Pawar eee[1] = ee[1] + ee[2];
1014*c83a76b0SSuyog Pawar eeo[1] = ee[1] - ee[2];
1015*c83a76b0SSuyog Pawar
1016*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
1017*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[0][1] * eee[1] + add) >> shift;
1018*c83a76b0SSuyog Pawar pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
1019*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[8][1] * eee[1] + add) >> shift;
1020*c83a76b0SSuyog Pawar pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
1021*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[4][1] * eeo[1] + add) >> shift;
1022*c83a76b0SSuyog Pawar pi2_dst[12 * dst_strd] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
1023*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[12][1] * eeo[1] + add) >> shift;
1024*c83a76b0SSuyog Pawar
1025*c83a76b0SSuyog Pawar for(k = 2; k < 16; k += 4)
1026*c83a76b0SSuyog Pawar {
1027*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
1028*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * eo[1]
1029*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * eo[2]
1030*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * eo[3] + add)
1031*c83a76b0SSuyog Pawar >> shift;
1032*c83a76b0SSuyog Pawar }
1033*c83a76b0SSuyog Pawar
1034*c83a76b0SSuyog Pawar for(k = 1; k < 16; k += 2)
1035*c83a76b0SSuyog Pawar {
1036*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * o[0]
1037*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][1] * o[1]
1038*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][2] * o[2]
1039*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][3] * o[3]
1040*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][4] * o[4]
1041*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][5] * o[5]
1042*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][6] * o[6]
1043*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_16[k][7] * o[7] + add) >> shift;
1044*c83a76b0SSuyog Pawar }
1045*c83a76b0SSuyog Pawar
1046*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
1047*c83a76b0SSuyog Pawar pi2_dst++;
1048*c83a76b0SSuyog Pawar }
1049*c83a76b0SSuyog Pawar }
1050*c83a76b0SSuyog Pawar
1051*c83a76b0SSuyog Pawar /**
1052*c83a76b0SSuyog Pawar *******************************************************************************
1053*c83a76b0SSuyog Pawar *
1054*c83a76b0SSuyog Pawar * @brief
1055*c83a76b0SSuyog Pawar * This function performs residue calculation and forward transform on
1056*c83a76b0SSuyog Pawar * input pixels
1057*c83a76b0SSuyog Pawar *
1058*c83a76b0SSuyog Pawar * @par Description:
1059*c83a76b0SSuyog Pawar * Performs residue calculation by subtracting source and prediction and
1060*c83a76b0SSuyog Pawar * followed by forward transform
1061*c83a76b0SSuyog Pawar *
1062*c83a76b0SSuyog Pawar * @param[in] pu1_src
1063*c83a76b0SSuyog Pawar * Input 32x32 pixels
1064*c83a76b0SSuyog Pawar *
1065*c83a76b0SSuyog Pawar * @param[in] pu1_pred
1066*c83a76b0SSuyog Pawar * Prediction data
1067*c83a76b0SSuyog Pawar *
1068*c83a76b0SSuyog Pawar * @param[in] pi2_tmp
1069*c83a76b0SSuyog Pawar * Temporary buffer of size 32x32
1070*c83a76b0SSuyog Pawar *
1071*c83a76b0SSuyog Pawar * @param[out] pi2_dst
1072*c83a76b0SSuyog Pawar * Output 32x32 coefficients
1073*c83a76b0SSuyog Pawar *
1074*c83a76b0SSuyog Pawar * @param[in] src_strd
1075*c83a76b0SSuyog Pawar * Input stride
1076*c83a76b0SSuyog Pawar *
1077*c83a76b0SSuyog Pawar * @param[in] pred_strd
1078*c83a76b0SSuyog Pawar * Prediction Stride
1079*c83a76b0SSuyog Pawar *
1080*c83a76b0SSuyog Pawar * @param[in] dst_strd
1081*c83a76b0SSuyog Pawar * Output Stride
1082*c83a76b0SSuyog Pawar *
1083*c83a76b0SSuyog Pawar * @param[in] e_chroma_plane
1084*c83a76b0SSuyog Pawar * Enum singalling chroma plane
1085*c83a76b0SSuyog Pawar *
1086*c83a76b0SSuyog Pawar * @returns Void
1087*c83a76b0SSuyog Pawar *
1088*c83a76b0SSuyog Pawar * @remarks
1089*c83a76b0SSuyog Pawar * None
1090*c83a76b0SSuyog Pawar *
1091*c83a76b0SSuyog Pawar *******************************************************************************
1092*c83a76b0SSuyog Pawar */
1093*c83a76b0SSuyog Pawar
ihevc_resi_trans_32x32(UWORD8 * pu1_src,UWORD8 * pu1_pred,WORD32 * pi4_temp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,CHROMA_PLANE_ID_T e_chroma_plane)1094*c83a76b0SSuyog Pawar UWORD32 ihevc_resi_trans_32x32(UWORD8 *pu1_src,
1095*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
1096*c83a76b0SSuyog Pawar WORD32 *pi4_temp,
1097*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
1098*c83a76b0SSuyog Pawar WORD32 src_strd,
1099*c83a76b0SSuyog Pawar WORD32 pred_strd,
1100*c83a76b0SSuyog Pawar WORD32 dst_strd,
1101*c83a76b0SSuyog Pawar CHROMA_PLANE_ID_T e_chroma_plane)
1102*c83a76b0SSuyog Pawar {
1103*c83a76b0SSuyog Pawar WORD32 i, k;
1104*c83a76b0SSuyog Pawar WORD32 e[16], o[16];
1105*c83a76b0SSuyog Pawar WORD32 ee[8], eo[8];
1106*c83a76b0SSuyog Pawar WORD32 eee[4], eeo[4];
1107*c83a76b0SSuyog Pawar WORD32 eeee[2], eeeo[2];
1108*c83a76b0SSuyog Pawar WORD32 add, shift;
1109*c83a76b0SSuyog Pawar WORD32 trans_size;
1110*c83a76b0SSuyog Pawar WORD32 *pi4_tmp_orig;
1111*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
1112*c83a76b0SSuyog Pawar UWORD32 u4_blk_sad = 0 ;
1113*c83a76b0SSuyog Pawar UNUSED(e_chroma_plane);
1114*c83a76b0SSuyog Pawar
1115*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
1116*c83a76b0SSuyog Pawar pi4_tmp_orig = pi4_temp;
1117*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_32;
1118*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
1119*c83a76b0SSuyog Pawar /* Made to zero to match with intrinsics */
1120*c83a76b0SSuyog Pawar shift = 0; // 4 : log2(iWidth) - 1 + g_uiBitIncrement
1121*c83a76b0SSuyog Pawar add = 0 ; //1 << (shift - 1);
1122*c83a76b0SSuyog Pawar
1123*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
1124*c83a76b0SSuyog Pawar {
1125*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
1126*c83a76b0SSuyog Pawar /* e and o*/
1127*c83a76b0SSuyog Pawar for(k = 0; k < 16; k++)
1128*c83a76b0SSuyog Pawar {
1129*c83a76b0SSuyog Pawar resi_tmp_1 = pu1_src[k] - pu1_pred[k];
1130*c83a76b0SSuyog Pawar resi_tmp_2 = pu1_src[31 - k] - pu1_pred[31 - k];
1131*c83a76b0SSuyog Pawar e[k] = resi_tmp_1 + resi_tmp_2;
1132*c83a76b0SSuyog Pawar o[k] = resi_tmp_1 - resi_tmp_2;
1133*c83a76b0SSuyog Pawar u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
1134*c83a76b0SSuyog Pawar }
1135*c83a76b0SSuyog Pawar /* ee and eo */
1136*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
1137*c83a76b0SSuyog Pawar {
1138*c83a76b0SSuyog Pawar ee[k] = e[k] + e[15 - k];
1139*c83a76b0SSuyog Pawar eo[k] = e[k] - e[15 - k];
1140*c83a76b0SSuyog Pawar }
1141*c83a76b0SSuyog Pawar /* eee and eeo */
1142*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
1143*c83a76b0SSuyog Pawar {
1144*c83a76b0SSuyog Pawar eee[k] = ee[k] + ee[7 - k];
1145*c83a76b0SSuyog Pawar eeo[k] = ee[k] - ee[7 - k];
1146*c83a76b0SSuyog Pawar }
1147*c83a76b0SSuyog Pawar /* eeee and eeeo */
1148*c83a76b0SSuyog Pawar eeee[0] = eee[0] + eee[3];
1149*c83a76b0SSuyog Pawar eeeo[0] = eee[0] - eee[3];
1150*c83a76b0SSuyog Pawar eeee[1] = eee[1] + eee[2];
1151*c83a76b0SSuyog Pawar eeeo[1] = eee[1] - eee[2];
1152*c83a76b0SSuyog Pawar
1153*c83a76b0SSuyog Pawar pi4_temp[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
1154*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[0][1] * eeee[1]);// + add) >> shift;
1155*c83a76b0SSuyog Pawar pi4_temp[16 * trans_size] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
1156*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[16][1] * eeee[1]);// + add) >> shift;
1157*c83a76b0SSuyog Pawar pi4_temp[8 * trans_size] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
1158*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[8][1] * eeeo[1]);// + add) >> shift;
1159*c83a76b0SSuyog Pawar pi4_temp[24 * trans_size] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
1160*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[24][1] * eeeo[1]);// + add) >> shift;
1161*c83a76b0SSuyog Pawar for(k = 4; k < 32; k += 8)
1162*c83a76b0SSuyog Pawar {
1163*c83a76b0SSuyog Pawar pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
1164*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eeo[1]
1165*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eeo[2]
1166*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eeo[3]);// + add)>> shift;
1167*c83a76b0SSuyog Pawar }
1168*c83a76b0SSuyog Pawar for(k = 2; k < 32; k += 4)
1169*c83a76b0SSuyog Pawar {
1170*c83a76b0SSuyog Pawar pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
1171*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eo[1]
1172*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eo[2]
1173*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eo[3]
1174*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * eo[4]
1175*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * eo[5]
1176*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * eo[6]
1177*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * eo[7]);// + add)>> shift;
1178*c83a76b0SSuyog Pawar }
1179*c83a76b0SSuyog Pawar for(k = 1; k < 32; k += 2)
1180*c83a76b0SSuyog Pawar {
1181*c83a76b0SSuyog Pawar pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * o[0]
1182*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * o[1]
1183*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * o[2]
1184*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * o[3]
1185*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * o[4]
1186*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * o[5]
1187*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * o[6]
1188*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * o[7]
1189*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][8] * o[8]
1190*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][9] * o[9]
1191*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][10] * o[10]
1192*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][11] * o[11]
1193*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][12] * o[12]
1194*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][13] * o[13]
1195*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][14] * o[14]
1196*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][15] * o[15]);// + add) >> shift;
1197*c83a76b0SSuyog Pawar }
1198*c83a76b0SSuyog Pawar pu1_src += src_strd;
1199*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
1200*c83a76b0SSuyog Pawar pi4_temp++;
1201*c83a76b0SSuyog Pawar }
1202*c83a76b0SSuyog Pawar
1203*c83a76b0SSuyog Pawar pi4_temp = pi4_tmp_orig;
1204*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
1205*c83a76b0SSuyog Pawar shift = 15; // log2(iHeight) + 6
1206*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
1207*c83a76b0SSuyog Pawar
1208*c83a76b0SSuyog Pawar for(i = 0; i < TRANS_SIZE_32; i++)
1209*c83a76b0SSuyog Pawar {
1210*c83a76b0SSuyog Pawar /* e and o*/
1211*c83a76b0SSuyog Pawar for(k = 0; k < 16; k++)
1212*c83a76b0SSuyog Pawar {
1213*c83a76b0SSuyog Pawar e[k] = pi4_temp[k] + pi4_temp[31 - k];
1214*c83a76b0SSuyog Pawar o[k] = pi4_temp[k] - pi4_temp[31 - k];
1215*c83a76b0SSuyog Pawar }
1216*c83a76b0SSuyog Pawar /* ee and eo */
1217*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
1218*c83a76b0SSuyog Pawar {
1219*c83a76b0SSuyog Pawar ee[k] = e[k] + e[15 - k];
1220*c83a76b0SSuyog Pawar eo[k] = e[k] - e[15 - k];
1221*c83a76b0SSuyog Pawar }
1222*c83a76b0SSuyog Pawar /* eee and eeo */
1223*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
1224*c83a76b0SSuyog Pawar {
1225*c83a76b0SSuyog Pawar eee[k] = ee[k] + ee[7 - k];
1226*c83a76b0SSuyog Pawar eeo[k] = ee[k] - ee[7 - k];
1227*c83a76b0SSuyog Pawar }
1228*c83a76b0SSuyog Pawar /* eeee and eeeo */
1229*c83a76b0SSuyog Pawar eeee[0] = eee[0] + eee[3];
1230*c83a76b0SSuyog Pawar eeeo[0] = eee[0] - eee[3];
1231*c83a76b0SSuyog Pawar eeee[1] = eee[1] + eee[2];
1232*c83a76b0SSuyog Pawar eeeo[1] = eee[1] - eee[2];
1233*c83a76b0SSuyog Pawar
1234*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
1235*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[0][1] * eeee[1] + add) >> shift;
1236*c83a76b0SSuyog Pawar pi2_dst[16 * dst_strd] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
1237*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[16][1] * eeee[1] + add) >> shift;
1238*c83a76b0SSuyog Pawar pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
1239*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[8][1] * eeeo[1] + add) >> shift;
1240*c83a76b0SSuyog Pawar pi2_dst[24 * dst_strd] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
1241*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[24][1] * eeeo[1] + add) >> shift;
1242*c83a76b0SSuyog Pawar for(k = 4; k < 32; k += 8)
1243*c83a76b0SSuyog Pawar {
1244*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
1245*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eeo[1]
1246*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eeo[2]
1247*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eeo[3] + add)
1248*c83a76b0SSuyog Pawar >> shift;
1249*c83a76b0SSuyog Pawar }
1250*c83a76b0SSuyog Pawar for(k = 2; k < 32; k += 4)
1251*c83a76b0SSuyog Pawar {
1252*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
1253*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eo[1]
1254*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eo[2]
1255*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eo[3]
1256*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * eo[4]
1257*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * eo[5]
1258*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * eo[6]
1259*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * eo[7] + add)
1260*c83a76b0SSuyog Pawar >> shift;
1261*c83a76b0SSuyog Pawar }
1262*c83a76b0SSuyog Pawar for(k = 1; k < 32; k += 2)
1263*c83a76b0SSuyog Pawar {
1264*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * o[0]
1265*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * o[1]
1266*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * o[2]
1267*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * o[3]
1268*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * o[4]
1269*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * o[5]
1270*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * o[6]
1271*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * o[7]
1272*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][8] * o[8]
1273*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][9] * o[9]
1274*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][10] * o[10]
1275*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][11] * o[11]
1276*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][12] * o[12]
1277*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][13] * o[13]
1278*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][14] * o[14]
1279*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][15] * o[15] + add)
1280*c83a76b0SSuyog Pawar >> shift;
1281*c83a76b0SSuyog Pawar }
1282*c83a76b0SSuyog Pawar
1283*c83a76b0SSuyog Pawar pi4_temp += trans_size;
1284*c83a76b0SSuyog Pawar pi2_dst++;
1285*c83a76b0SSuyog Pawar }
1286*c83a76b0SSuyog Pawar
1287*c83a76b0SSuyog Pawar return u4_blk_sad;
1288*c83a76b0SSuyog Pawar }
1289*c83a76b0SSuyog Pawar
1290*c83a76b0SSuyog Pawar
1291*c83a76b0SSuyog Pawar
ihevc_resi_trans_32x32_16bit(WORD16 * pi2_src,UWORD8 * pu1_pred,WORD16 * pi2_tmp,WORD16 * pi2_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd)1292*c83a76b0SSuyog Pawar void ihevc_resi_trans_32x32_16bit(WORD16 *pi2_src,
1293*c83a76b0SSuyog Pawar UWORD8 *pu1_pred,
1294*c83a76b0SSuyog Pawar WORD16 *pi2_tmp,
1295*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
1296*c83a76b0SSuyog Pawar WORD32 src_strd,
1297*c83a76b0SSuyog Pawar WORD32 pred_strd,
1298*c83a76b0SSuyog Pawar WORD32 dst_strd)
1299*c83a76b0SSuyog Pawar {
1300*c83a76b0SSuyog Pawar WORD32 i, k;
1301*c83a76b0SSuyog Pawar WORD32 e[16], o[16];
1302*c83a76b0SSuyog Pawar WORD32 ee[8], eo[8];
1303*c83a76b0SSuyog Pawar WORD32 eee[4], eeo[4];
1304*c83a76b0SSuyog Pawar WORD32 eeee[2], eeeo[2];
1305*c83a76b0SSuyog Pawar WORD32 add, shift;
1306*c83a76b0SSuyog Pawar WORD32 trans_size;
1307*c83a76b0SSuyog Pawar WORD16 *pi2_tmp_orig;
1308*c83a76b0SSuyog Pawar WORD16 *pi2_dst_orig;
1309*c83a76b0SSuyog Pawar
1310*c83a76b0SSuyog Pawar pi2_dst_orig = pi2_dst;
1311*c83a76b0SSuyog Pawar pi2_tmp_orig = pi2_tmp;
1312*c83a76b0SSuyog Pawar trans_size = TRANS_SIZE_32;
1313*c83a76b0SSuyog Pawar /* Residue + Forward Transform 1st stage */
1314*c83a76b0SSuyog Pawar shift = 4; // log2(iWidth) - 1 + g_uiBitIncrement
1315*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
1316*c83a76b0SSuyog Pawar
1317*c83a76b0SSuyog Pawar for(i = 0; i < trans_size; i++)
1318*c83a76b0SSuyog Pawar {
1319*c83a76b0SSuyog Pawar WORD32 resi_tmp_1, resi_tmp_2;
1320*c83a76b0SSuyog Pawar /* e and o*/
1321*c83a76b0SSuyog Pawar for(k = 0; k < 16; k++)
1322*c83a76b0SSuyog Pawar {
1323*c83a76b0SSuyog Pawar resi_tmp_1 = pi2_src[k] - pu1_pred[k];
1324*c83a76b0SSuyog Pawar resi_tmp_2 = pi2_src[31 - k] - pu1_pred[31 - k];
1325*c83a76b0SSuyog Pawar e[k] = resi_tmp_1 + resi_tmp_2;
1326*c83a76b0SSuyog Pawar o[k] = resi_tmp_1 - resi_tmp_2;
1327*c83a76b0SSuyog Pawar }
1328*c83a76b0SSuyog Pawar /* ee and eo */
1329*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
1330*c83a76b0SSuyog Pawar {
1331*c83a76b0SSuyog Pawar ee[k] = e[k] + e[15 - k];
1332*c83a76b0SSuyog Pawar eo[k] = e[k] - e[15 - k];
1333*c83a76b0SSuyog Pawar }
1334*c83a76b0SSuyog Pawar /* eee and eeo */
1335*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
1336*c83a76b0SSuyog Pawar {
1337*c83a76b0SSuyog Pawar eee[k] = ee[k] + ee[7 - k];
1338*c83a76b0SSuyog Pawar eeo[k] = ee[k] - ee[7 - k];
1339*c83a76b0SSuyog Pawar }
1340*c83a76b0SSuyog Pawar /* eeee and eeeo */
1341*c83a76b0SSuyog Pawar eeee[0] = eee[0] + eee[3];
1342*c83a76b0SSuyog Pawar eeeo[0] = eee[0] - eee[3];
1343*c83a76b0SSuyog Pawar eeee[1] = eee[1] + eee[2];
1344*c83a76b0SSuyog Pawar eeeo[1] = eee[1] - eee[2];
1345*c83a76b0SSuyog Pawar
1346*c83a76b0SSuyog Pawar pi2_tmp[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
1347*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[0][1] * eeee[1] + add) >> shift;
1348*c83a76b0SSuyog Pawar pi2_tmp[16 * trans_size] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
1349*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[16][1] * eeee[1] + add) >> shift;
1350*c83a76b0SSuyog Pawar pi2_tmp[8 * trans_size] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
1351*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[8][1] * eeeo[1] + add) >> shift;
1352*c83a76b0SSuyog Pawar pi2_tmp[24 * trans_size] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
1353*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[24][1] * eeeo[1] + add) >> shift;
1354*c83a76b0SSuyog Pawar for(k = 4; k < 32; k += 8)
1355*c83a76b0SSuyog Pawar {
1356*c83a76b0SSuyog Pawar pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
1357*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eeo[1]
1358*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eeo[2]
1359*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eeo[3] + add)
1360*c83a76b0SSuyog Pawar >> shift;
1361*c83a76b0SSuyog Pawar }
1362*c83a76b0SSuyog Pawar for(k = 2; k < 32; k += 4)
1363*c83a76b0SSuyog Pawar {
1364*c83a76b0SSuyog Pawar pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
1365*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eo[1]
1366*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eo[2]
1367*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eo[3]
1368*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * eo[4]
1369*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * eo[5]
1370*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * eo[6]
1371*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * eo[7] + add)
1372*c83a76b0SSuyog Pawar >> shift;
1373*c83a76b0SSuyog Pawar }
1374*c83a76b0SSuyog Pawar for(k = 1; k < 32; k += 2)
1375*c83a76b0SSuyog Pawar {
1376*c83a76b0SSuyog Pawar pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * o[0]
1377*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * o[1]
1378*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * o[2]
1379*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * o[3]
1380*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * o[4]
1381*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * o[5]
1382*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * o[6]
1383*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * o[7]
1384*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][8] * o[8]
1385*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][9] * o[9]
1386*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][10] * o[10]
1387*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][11] * o[11]
1388*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][12] * o[12]
1389*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][13] * o[13]
1390*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][14] * o[14]
1391*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][15] * o[15] + add)
1392*c83a76b0SSuyog Pawar >> shift;
1393*c83a76b0SSuyog Pawar }
1394*c83a76b0SSuyog Pawar pi2_src += src_strd;
1395*c83a76b0SSuyog Pawar pu1_pred += pred_strd;
1396*c83a76b0SSuyog Pawar pi2_tmp++;
1397*c83a76b0SSuyog Pawar }
1398*c83a76b0SSuyog Pawar
1399*c83a76b0SSuyog Pawar pi2_tmp = pi2_tmp_orig;
1400*c83a76b0SSuyog Pawar /* Forward Transform 2nd stage */
1401*c83a76b0SSuyog Pawar shift = 11; // log2(iHeight) + 6
1402*c83a76b0SSuyog Pawar add = 1 << (shift - 1);
1403*c83a76b0SSuyog Pawar
1404*c83a76b0SSuyog Pawar for(i = 0; i < TRANS_SIZE_32; i++)
1405*c83a76b0SSuyog Pawar {
1406*c83a76b0SSuyog Pawar /* e and o*/
1407*c83a76b0SSuyog Pawar for(k = 0; k < 16; k++)
1408*c83a76b0SSuyog Pawar {
1409*c83a76b0SSuyog Pawar e[k] = pi2_tmp[k] + pi2_tmp[31 - k];
1410*c83a76b0SSuyog Pawar o[k] = pi2_tmp[k] - pi2_tmp[31 - k];
1411*c83a76b0SSuyog Pawar }
1412*c83a76b0SSuyog Pawar /* ee and eo */
1413*c83a76b0SSuyog Pawar for(k = 0; k < 8; k++)
1414*c83a76b0SSuyog Pawar {
1415*c83a76b0SSuyog Pawar ee[k] = e[k] + e[15 - k];
1416*c83a76b0SSuyog Pawar eo[k] = e[k] - e[15 - k];
1417*c83a76b0SSuyog Pawar }
1418*c83a76b0SSuyog Pawar /* eee and eeo */
1419*c83a76b0SSuyog Pawar for(k = 0; k < 4; k++)
1420*c83a76b0SSuyog Pawar {
1421*c83a76b0SSuyog Pawar eee[k] = ee[k] + ee[7 - k];
1422*c83a76b0SSuyog Pawar eeo[k] = ee[k] - ee[7 - k];
1423*c83a76b0SSuyog Pawar }
1424*c83a76b0SSuyog Pawar /* eeee and eeeo */
1425*c83a76b0SSuyog Pawar eeee[0] = eee[0] + eee[3];
1426*c83a76b0SSuyog Pawar eeeo[0] = eee[0] - eee[3];
1427*c83a76b0SSuyog Pawar eeee[1] = eee[1] + eee[2];
1428*c83a76b0SSuyog Pawar eeeo[1] = eee[1] - eee[2];
1429*c83a76b0SSuyog Pawar
1430*c83a76b0SSuyog Pawar pi2_dst[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
1431*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[0][1] * eeee[1] + add) >> shift;
1432*c83a76b0SSuyog Pawar pi2_dst[16 * dst_strd] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
1433*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[16][1] * eeee[1] + add) >> shift;
1434*c83a76b0SSuyog Pawar pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
1435*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[8][1] * eeeo[1] + add) >> shift;
1436*c83a76b0SSuyog Pawar pi2_dst[24 * dst_strd] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
1437*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[24][1] * eeeo[1] + add) >> shift;
1438*c83a76b0SSuyog Pawar for(k = 4; k < 32; k += 8)
1439*c83a76b0SSuyog Pawar {
1440*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
1441*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eeo[1]
1442*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eeo[2]
1443*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eeo[3] + add)
1444*c83a76b0SSuyog Pawar >> shift;
1445*c83a76b0SSuyog Pawar }
1446*c83a76b0SSuyog Pawar for(k = 2; k < 32; k += 4)
1447*c83a76b0SSuyog Pawar {
1448*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
1449*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * eo[1]
1450*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * eo[2]
1451*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * eo[3]
1452*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * eo[4]
1453*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * eo[5]
1454*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * eo[6]
1455*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * eo[7] + add)
1456*c83a76b0SSuyog Pawar >> shift;
1457*c83a76b0SSuyog Pawar }
1458*c83a76b0SSuyog Pawar for(k = 1; k < 32; k += 2)
1459*c83a76b0SSuyog Pawar {
1460*c83a76b0SSuyog Pawar pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * o[0]
1461*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][1] * o[1]
1462*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][2] * o[2]
1463*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][3] * o[3]
1464*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][4] * o[4]
1465*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][5] * o[5]
1466*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][6] * o[6]
1467*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][7] * o[7]
1468*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][8] * o[8]
1469*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][9] * o[9]
1470*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][10] * o[10]
1471*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][11] * o[11]
1472*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][12] * o[12]
1473*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][13] * o[13]
1474*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][14] * o[14]
1475*c83a76b0SSuyog Pawar + g_ai2_ihevc_trans_32[k][15] * o[15] + add)
1476*c83a76b0SSuyog Pawar >> shift;
1477*c83a76b0SSuyog Pawar }
1478*c83a76b0SSuyog Pawar
1479*c83a76b0SSuyog Pawar pi2_tmp += trans_size;
1480*c83a76b0SSuyog Pawar pi2_dst++;
1481*c83a76b0SSuyog Pawar }
1482*c83a76b0SSuyog Pawar }
1483*c83a76b0SSuyog Pawar
1484