1*c83a76b0SSuyog Pawar /******************************************************************************
2*c83a76b0SSuyog Pawar *
3*c83a76b0SSuyog Pawar * Copyright (C) 2018 The Android Open Source Project
4*c83a76b0SSuyog Pawar *
5*c83a76b0SSuyog Pawar * Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar * you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar * You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar *
9*c83a76b0SSuyog Pawar * http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar *
11*c83a76b0SSuyog Pawar * Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar * distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar * See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar * limitations under the License.
16*c83a76b0SSuyog Pawar *
17*c83a76b0SSuyog Pawar *****************************************************************************
18*c83a76b0SSuyog Pawar * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*c83a76b0SSuyog Pawar */
20*c83a76b0SSuyog Pawar /*!
21*c83a76b0SSuyog Pawar ******************************************************************************
22*c83a76b0SSuyog Pawar * \file ihevce_chroma_had_satd.c
23*c83a76b0SSuyog Pawar *
24*c83a76b0SSuyog Pawar * \brief
25*c83a76b0SSuyog Pawar * This file contains function definitions of chroma HAD SATD functions
26*c83a76b0SSuyog Pawar *
27*c83a76b0SSuyog Pawar * \date
28*c83a76b0SSuyog Pawar * 15/07/2013
29*c83a76b0SSuyog Pawar *
30*c83a76b0SSuyog Pawar * \author
31*c83a76b0SSuyog Pawar * Ittiam
32*c83a76b0SSuyog Pawar *
33*c83a76b0SSuyog Pawar * List of Functions
34*c83a76b0SSuyog Pawar * ihevce_chroma_HAD_4x4_8b()
35*c83a76b0SSuyog Pawar * ihevce_chroma_compute_AC_HAD_4x4_8bit()
36*c83a76b0SSuyog Pawar * ihevce_hbd_chroma_HAD_4x4()
37*c83a76b0SSuyog Pawar * ihevce_hbd_chroma_compute_AC_HAD_4x4()
38*c83a76b0SSuyog Pawar * ihevce_chroma_HAD_8x8_8bit()
39*c83a76b0SSuyog Pawar * ihevce_hbd_chroma_HAD_8x8()
40*c83a76b0SSuyog Pawar * ihevce_chroma_HAD_16x16_8bit()
41*c83a76b0SSuyog Pawar * ihevce_hbd_chroma_HAD_16x16()
42*c83a76b0SSuyog Pawar *
43*c83a76b0SSuyog Pawar ******************************************************************************
44*c83a76b0SSuyog Pawar */
45*c83a76b0SSuyog Pawar
46*c83a76b0SSuyog Pawar /*****************************************************************************/
47*c83a76b0SSuyog Pawar /* File Includes */
48*c83a76b0SSuyog Pawar /*****************************************************************************/
49*c83a76b0SSuyog Pawar /* System include files */
50*c83a76b0SSuyog Pawar #include <stdio.h>
51*c83a76b0SSuyog Pawar #include <string.h>
52*c83a76b0SSuyog Pawar #include <stdlib.h>
53*c83a76b0SSuyog Pawar #include <assert.h>
54*c83a76b0SSuyog Pawar #include <stdarg.h>
55*c83a76b0SSuyog Pawar #include <math.h>
56*c83a76b0SSuyog Pawar
57*c83a76b0SSuyog Pawar /* User include files */
58*c83a76b0SSuyog Pawar #include "ihevc_typedefs.h"
59*c83a76b0SSuyog Pawar #include "ihevc_debug.h"
60*c83a76b0SSuyog Pawar #include "itt_video_api.h"
61*c83a76b0SSuyog Pawar
62*c83a76b0SSuyog Pawar #include "ihevce_api.h"
63*c83a76b0SSuyog Pawar #include "ihevce_defs.h"
64*c83a76b0SSuyog Pawar #include "ihevce_had_satd.h"
65*c83a76b0SSuyog Pawar
66*c83a76b0SSuyog Pawar /*****************************************************************************/
67*c83a76b0SSuyog Pawar /* Function Definitions */
68*c83a76b0SSuyog Pawar /*****************************************************************************/
69*c83a76b0SSuyog Pawar
70*c83a76b0SSuyog Pawar /**
71*c83a76b0SSuyog Pawar *******************************************************************************
72*c83a76b0SSuyog Pawar *
73*c83a76b0SSuyog Pawar * @brief
74*c83a76b0SSuyog Pawar * Chroma Hadamard Transform of 4x4 block (8-bit input)
75*c83a76b0SSuyog Pawar *
76*c83a76b0SSuyog Pawar * @par Description:
77*c83a76b0SSuyog Pawar *
78*c83a76b0SSuyog Pawar * @param[in] pu1_origin
79*c83a76b0SSuyog Pawar * UWORD8 pointer to the source block (u or v, interleaved)
80*c83a76b0SSuyog Pawar *
81*c83a76b0SSuyog Pawar * @param[in] src_strd
82*c83a76b0SSuyog Pawar * WORD32 Source stride
83*c83a76b0SSuyog Pawar *
84*c83a76b0SSuyog Pawar * @param[in] pu1_pred_buf
85*c83a76b0SSuyog Pawar * UWORD8 pointer to the prediction block (u or v, interleaved)
86*c83a76b0SSuyog Pawar *
87*c83a76b0SSuyog Pawar * @param[in] pred_strd
88*c83a76b0SSuyog Pawar * WORD32 Pred stride
89*c83a76b0SSuyog Pawar *
90*c83a76b0SSuyog Pawar * @param[in] pi2_dst
91*c83a76b0SSuyog Pawar * WORD16 pointer to the transform block
92*c83a76b0SSuyog Pawar *
93*c83a76b0SSuyog Pawar * @param[in] dst_strd (u or v, interleaved)
94*c83a76b0SSuyog Pawar * WORD32 Destination stride
95*c83a76b0SSuyog Pawar *
96*c83a76b0SSuyog Pawar * @returns
97*c83a76b0SSuyog Pawar * Hadamard SAD
98*c83a76b0SSuyog Pawar *
99*c83a76b0SSuyog Pawar * @remarks
100*c83a76b0SSuyog Pawar * Not updating the transform destination now. Only returning the SATD
101*c83a76b0SSuyog Pawar *
102*c83a76b0SSuyog Pawar *******************************************************************************
103*c83a76b0SSuyog Pawar */
ihevce_chroma_HAD_4x4_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)104*c83a76b0SSuyog Pawar UWORD32 ihevce_chroma_HAD_4x4_8bit(
105*c83a76b0SSuyog Pawar UWORD8 *pu1_origin,
106*c83a76b0SSuyog Pawar WORD32 src_strd,
107*c83a76b0SSuyog Pawar UWORD8 *pu1_pred_buf,
108*c83a76b0SSuyog Pawar WORD32 pred_strd,
109*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
110*c83a76b0SSuyog Pawar WORD32 dst_strd)
111*c83a76b0SSuyog Pawar {
112*c83a76b0SSuyog Pawar WORD32 k;
113*c83a76b0SSuyog Pawar WORD16 diff[16], m[16], d[16];
114*c83a76b0SSuyog Pawar UWORD32 u4_sad = 0;
115*c83a76b0SSuyog Pawar
116*c83a76b0SSuyog Pawar (void)pi2_dst;
117*c83a76b0SSuyog Pawar (void)dst_strd;
118*c83a76b0SSuyog Pawar for(k = 0; k < 16; k += 4)
119*c83a76b0SSuyog Pawar {
120*c83a76b0SSuyog Pawar /* u or v, interleaved */
121*c83a76b0SSuyog Pawar diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
122*c83a76b0SSuyog Pawar diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
123*c83a76b0SSuyog Pawar diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
124*c83a76b0SSuyog Pawar diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
125*c83a76b0SSuyog Pawar
126*c83a76b0SSuyog Pawar pu1_pred_buf += pred_strd;
127*c83a76b0SSuyog Pawar pu1_origin += src_strd;
128*c83a76b0SSuyog Pawar }
129*c83a76b0SSuyog Pawar
130*c83a76b0SSuyog Pawar /*===== hadamard transform =====*/
131*c83a76b0SSuyog Pawar m[0] = diff[0] + diff[12];
132*c83a76b0SSuyog Pawar m[1] = diff[1] + diff[13];
133*c83a76b0SSuyog Pawar m[2] = diff[2] + diff[14];
134*c83a76b0SSuyog Pawar m[3] = diff[3] + diff[15];
135*c83a76b0SSuyog Pawar m[4] = diff[4] + diff[8];
136*c83a76b0SSuyog Pawar m[5] = diff[5] + diff[9];
137*c83a76b0SSuyog Pawar m[6] = diff[6] + diff[10];
138*c83a76b0SSuyog Pawar m[7] = diff[7] + diff[11];
139*c83a76b0SSuyog Pawar m[8] = diff[4] - diff[8];
140*c83a76b0SSuyog Pawar m[9] = diff[5] - diff[9];
141*c83a76b0SSuyog Pawar m[10] = diff[6] - diff[10];
142*c83a76b0SSuyog Pawar m[11] = diff[7] - diff[11];
143*c83a76b0SSuyog Pawar m[12] = diff[0] - diff[12];
144*c83a76b0SSuyog Pawar m[13] = diff[1] - diff[13];
145*c83a76b0SSuyog Pawar m[14] = diff[2] - diff[14];
146*c83a76b0SSuyog Pawar m[15] = diff[3] - diff[15];
147*c83a76b0SSuyog Pawar
148*c83a76b0SSuyog Pawar d[0] = m[0] + m[4];
149*c83a76b0SSuyog Pawar d[1] = m[1] + m[5];
150*c83a76b0SSuyog Pawar d[2] = m[2] + m[6];
151*c83a76b0SSuyog Pawar d[3] = m[3] + m[7];
152*c83a76b0SSuyog Pawar d[4] = m[8] + m[12];
153*c83a76b0SSuyog Pawar d[5] = m[9] + m[13];
154*c83a76b0SSuyog Pawar d[6] = m[10] + m[14];
155*c83a76b0SSuyog Pawar d[7] = m[11] + m[15];
156*c83a76b0SSuyog Pawar d[8] = m[0] - m[4];
157*c83a76b0SSuyog Pawar d[9] = m[1] - m[5];
158*c83a76b0SSuyog Pawar d[10] = m[2] - m[6];
159*c83a76b0SSuyog Pawar d[11] = m[3] - m[7];
160*c83a76b0SSuyog Pawar d[12] = m[12] - m[8];
161*c83a76b0SSuyog Pawar d[13] = m[13] - m[9];
162*c83a76b0SSuyog Pawar d[14] = m[14] - m[10];
163*c83a76b0SSuyog Pawar d[15] = m[15] - m[11];
164*c83a76b0SSuyog Pawar
165*c83a76b0SSuyog Pawar m[0] = d[0] + d[3];
166*c83a76b0SSuyog Pawar m[1] = d[1] + d[2];
167*c83a76b0SSuyog Pawar m[2] = d[1] - d[2];
168*c83a76b0SSuyog Pawar m[3] = d[0] - d[3];
169*c83a76b0SSuyog Pawar m[4] = d[4] + d[7];
170*c83a76b0SSuyog Pawar m[5] = d[5] + d[6];
171*c83a76b0SSuyog Pawar m[6] = d[5] - d[6];
172*c83a76b0SSuyog Pawar m[7] = d[4] - d[7];
173*c83a76b0SSuyog Pawar m[8] = d[8] + d[11];
174*c83a76b0SSuyog Pawar m[9] = d[9] + d[10];
175*c83a76b0SSuyog Pawar m[10] = d[9] - d[10];
176*c83a76b0SSuyog Pawar m[11] = d[8] - d[11];
177*c83a76b0SSuyog Pawar m[12] = d[12] + d[15];
178*c83a76b0SSuyog Pawar m[13] = d[13] + d[14];
179*c83a76b0SSuyog Pawar m[14] = d[13] - d[14];
180*c83a76b0SSuyog Pawar m[15] = d[12] - d[15];
181*c83a76b0SSuyog Pawar
182*c83a76b0SSuyog Pawar d[0] = m[0] + m[1];
183*c83a76b0SSuyog Pawar d[1] = m[0] - m[1];
184*c83a76b0SSuyog Pawar d[2] = m[2] + m[3];
185*c83a76b0SSuyog Pawar d[3] = m[3] - m[2];
186*c83a76b0SSuyog Pawar d[4] = m[4] + m[5];
187*c83a76b0SSuyog Pawar d[5] = m[4] - m[5];
188*c83a76b0SSuyog Pawar d[6] = m[6] + m[7];
189*c83a76b0SSuyog Pawar d[7] = m[7] - m[6];
190*c83a76b0SSuyog Pawar d[8] = m[8] + m[9];
191*c83a76b0SSuyog Pawar d[9] = m[8] - m[9];
192*c83a76b0SSuyog Pawar d[10] = m[10] + m[11];
193*c83a76b0SSuyog Pawar d[11] = m[11] - m[10];
194*c83a76b0SSuyog Pawar d[12] = m[12] + m[13];
195*c83a76b0SSuyog Pawar d[13] = m[12] - m[13];
196*c83a76b0SSuyog Pawar d[14] = m[14] + m[15];
197*c83a76b0SSuyog Pawar d[15] = m[15] - m[14];
198*c83a76b0SSuyog Pawar
199*c83a76b0SSuyog Pawar /*===== sad =====*/
200*c83a76b0SSuyog Pawar for(k = 0; k < 16; ++k)
201*c83a76b0SSuyog Pawar {
202*c83a76b0SSuyog Pawar u4_sad += (d[k] > 0 ? d[k] : -d[k]);
203*c83a76b0SSuyog Pawar }
204*c83a76b0SSuyog Pawar u4_sad = ((u4_sad + 2) >> 2);
205*c83a76b0SSuyog Pawar
206*c83a76b0SSuyog Pawar return u4_sad;
207*c83a76b0SSuyog Pawar }
208*c83a76b0SSuyog Pawar
209*c83a76b0SSuyog Pawar /**
210*c83a76b0SSuyog Pawar *******************************************************************************
211*c83a76b0SSuyog Pawar *
212*c83a76b0SSuyog Pawar * @brief
213*c83a76b0SSuyog Pawar * Chroma Hadamard Transform of 4x4 block (8-bit input) with DC suppressed
214*c83a76b0SSuyog Pawar *
215*c83a76b0SSuyog Pawar * @par Description:
216*c83a76b0SSuyog Pawar *
217*c83a76b0SSuyog Pawar * @param[in] pu1_origin
218*c83a76b0SSuyog Pawar * UWORD8 pointer to the source block (u or v, interleaved)
219*c83a76b0SSuyog Pawar *
220*c83a76b0SSuyog Pawar * @param[in] src_strd
221*c83a76b0SSuyog Pawar * WORD32 Source stride
222*c83a76b0SSuyog Pawar *
223*c83a76b0SSuyog Pawar * @param[in] pu1_pred_buf
224*c83a76b0SSuyog Pawar * UWORD8 pointer to the prediction block (u or v, interleaved)
225*c83a76b0SSuyog Pawar *
226*c83a76b0SSuyog Pawar * @param[in] pred_strd
227*c83a76b0SSuyog Pawar * WORD32 Pred stride
228*c83a76b0SSuyog Pawar *
229*c83a76b0SSuyog Pawar * @param[in] pi2_dst
230*c83a76b0SSuyog Pawar * WORD16 pointer to the transform block
231*c83a76b0SSuyog Pawar *
232*c83a76b0SSuyog Pawar * @param[in] dst_strd (u or v, interleaved)
233*c83a76b0SSuyog Pawar * WORD32 Destination stride
234*c83a76b0SSuyog Pawar *
235*c83a76b0SSuyog Pawar * @returns
236*c83a76b0SSuyog Pawar * Hadamard SAD
237*c83a76b0SSuyog Pawar *
238*c83a76b0SSuyog Pawar * @remarks
239*c83a76b0SSuyog Pawar * Not updating the transform destination now. Only returning the SATD
240*c83a76b0SSuyog Pawar *
241*c83a76b0SSuyog Pawar *******************************************************************************
242*c83a76b0SSuyog Pawar */
ihevce_chroma_compute_AC_HAD_4x4_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)243*c83a76b0SSuyog Pawar UWORD32 ihevce_chroma_compute_AC_HAD_4x4_8bit(
244*c83a76b0SSuyog Pawar UWORD8 *pu1_origin,
245*c83a76b0SSuyog Pawar WORD32 src_strd,
246*c83a76b0SSuyog Pawar UWORD8 *pu1_pred_buf,
247*c83a76b0SSuyog Pawar WORD32 pred_strd,
248*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
249*c83a76b0SSuyog Pawar WORD32 dst_strd)
250*c83a76b0SSuyog Pawar {
251*c83a76b0SSuyog Pawar WORD32 k;
252*c83a76b0SSuyog Pawar WORD16 diff[16], m[16], d[16];
253*c83a76b0SSuyog Pawar UWORD32 u4_sad = 0;
254*c83a76b0SSuyog Pawar
255*c83a76b0SSuyog Pawar (void)pi2_dst;
256*c83a76b0SSuyog Pawar (void)dst_strd;
257*c83a76b0SSuyog Pawar for(k = 0; k < 16; k += 4)
258*c83a76b0SSuyog Pawar {
259*c83a76b0SSuyog Pawar /* u or v, interleaved */
260*c83a76b0SSuyog Pawar diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
261*c83a76b0SSuyog Pawar diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
262*c83a76b0SSuyog Pawar diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
263*c83a76b0SSuyog Pawar diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
264*c83a76b0SSuyog Pawar
265*c83a76b0SSuyog Pawar pu1_pred_buf += pred_strd;
266*c83a76b0SSuyog Pawar pu1_origin += src_strd;
267*c83a76b0SSuyog Pawar }
268*c83a76b0SSuyog Pawar
269*c83a76b0SSuyog Pawar /*===== hadamard transform =====*/
270*c83a76b0SSuyog Pawar m[0] = diff[0] + diff[12];
271*c83a76b0SSuyog Pawar m[1] = diff[1] + diff[13];
272*c83a76b0SSuyog Pawar m[2] = diff[2] + diff[14];
273*c83a76b0SSuyog Pawar m[3] = diff[3] + diff[15];
274*c83a76b0SSuyog Pawar m[4] = diff[4] + diff[8];
275*c83a76b0SSuyog Pawar m[5] = diff[5] + diff[9];
276*c83a76b0SSuyog Pawar m[6] = diff[6] + diff[10];
277*c83a76b0SSuyog Pawar m[7] = diff[7] + diff[11];
278*c83a76b0SSuyog Pawar m[8] = diff[4] - diff[8];
279*c83a76b0SSuyog Pawar m[9] = diff[5] - diff[9];
280*c83a76b0SSuyog Pawar m[10] = diff[6] - diff[10];
281*c83a76b0SSuyog Pawar m[11] = diff[7] - diff[11];
282*c83a76b0SSuyog Pawar m[12] = diff[0] - diff[12];
283*c83a76b0SSuyog Pawar m[13] = diff[1] - diff[13];
284*c83a76b0SSuyog Pawar m[14] = diff[2] - diff[14];
285*c83a76b0SSuyog Pawar m[15] = diff[3] - diff[15];
286*c83a76b0SSuyog Pawar
287*c83a76b0SSuyog Pawar d[0] = m[0] + m[4];
288*c83a76b0SSuyog Pawar d[1] = m[1] + m[5];
289*c83a76b0SSuyog Pawar d[2] = m[2] + m[6];
290*c83a76b0SSuyog Pawar d[3] = m[3] + m[7];
291*c83a76b0SSuyog Pawar d[4] = m[8] + m[12];
292*c83a76b0SSuyog Pawar d[5] = m[9] + m[13];
293*c83a76b0SSuyog Pawar d[6] = m[10] + m[14];
294*c83a76b0SSuyog Pawar d[7] = m[11] + m[15];
295*c83a76b0SSuyog Pawar d[8] = m[0] - m[4];
296*c83a76b0SSuyog Pawar d[9] = m[1] - m[5];
297*c83a76b0SSuyog Pawar d[10] = m[2] - m[6];
298*c83a76b0SSuyog Pawar d[11] = m[3] - m[7];
299*c83a76b0SSuyog Pawar d[12] = m[12] - m[8];
300*c83a76b0SSuyog Pawar d[13] = m[13] - m[9];
301*c83a76b0SSuyog Pawar d[14] = m[14] - m[10];
302*c83a76b0SSuyog Pawar d[15] = m[15] - m[11];
303*c83a76b0SSuyog Pawar
304*c83a76b0SSuyog Pawar m[0] = d[0] + d[3];
305*c83a76b0SSuyog Pawar m[1] = d[1] + d[2];
306*c83a76b0SSuyog Pawar m[2] = d[1] - d[2];
307*c83a76b0SSuyog Pawar m[3] = d[0] - d[3];
308*c83a76b0SSuyog Pawar m[4] = d[4] + d[7];
309*c83a76b0SSuyog Pawar m[5] = d[5] + d[6];
310*c83a76b0SSuyog Pawar m[6] = d[5] - d[6];
311*c83a76b0SSuyog Pawar m[7] = d[4] - d[7];
312*c83a76b0SSuyog Pawar m[8] = d[8] + d[11];
313*c83a76b0SSuyog Pawar m[9] = d[9] + d[10];
314*c83a76b0SSuyog Pawar m[10] = d[9] - d[10];
315*c83a76b0SSuyog Pawar m[11] = d[8] - d[11];
316*c83a76b0SSuyog Pawar m[12] = d[12] + d[15];
317*c83a76b0SSuyog Pawar m[13] = d[13] + d[14];
318*c83a76b0SSuyog Pawar m[14] = d[13] - d[14];
319*c83a76b0SSuyog Pawar m[15] = d[12] - d[15];
320*c83a76b0SSuyog Pawar
321*c83a76b0SSuyog Pawar d[0] = m[0] + m[1];
322*c83a76b0SSuyog Pawar d[1] = m[0] - m[1];
323*c83a76b0SSuyog Pawar d[2] = m[2] + m[3];
324*c83a76b0SSuyog Pawar d[3] = m[3] - m[2];
325*c83a76b0SSuyog Pawar d[4] = m[4] + m[5];
326*c83a76b0SSuyog Pawar d[5] = m[4] - m[5];
327*c83a76b0SSuyog Pawar d[6] = m[6] + m[7];
328*c83a76b0SSuyog Pawar d[7] = m[7] - m[6];
329*c83a76b0SSuyog Pawar d[8] = m[8] + m[9];
330*c83a76b0SSuyog Pawar d[9] = m[8] - m[9];
331*c83a76b0SSuyog Pawar d[10] = m[10] + m[11];
332*c83a76b0SSuyog Pawar d[11] = m[11] - m[10];
333*c83a76b0SSuyog Pawar d[12] = m[12] + m[13];
334*c83a76b0SSuyog Pawar d[13] = m[12] - m[13];
335*c83a76b0SSuyog Pawar d[14] = m[14] + m[15];
336*c83a76b0SSuyog Pawar d[15] = m[15] - m[14];
337*c83a76b0SSuyog Pawar
338*c83a76b0SSuyog Pawar /* DC masking */
339*c83a76b0SSuyog Pawar d[0] = 0;
340*c83a76b0SSuyog Pawar
341*c83a76b0SSuyog Pawar /*===== sad =====*/
342*c83a76b0SSuyog Pawar for(k = 0; k < 16; ++k)
343*c83a76b0SSuyog Pawar {
344*c83a76b0SSuyog Pawar u4_sad += (d[k] > 0 ? d[k] : -d[k]);
345*c83a76b0SSuyog Pawar }
346*c83a76b0SSuyog Pawar u4_sad = ((u4_sad + 2) >> 2);
347*c83a76b0SSuyog Pawar
348*c83a76b0SSuyog Pawar return u4_sad;
349*c83a76b0SSuyog Pawar }
350*c83a76b0SSuyog Pawar
351*c83a76b0SSuyog Pawar /**
352*c83a76b0SSuyog Pawar *******************************************************************************
353*c83a76b0SSuyog Pawar *
354*c83a76b0SSuyog Pawar * @brief
355*c83a76b0SSuyog Pawar * Chroma Hadamard Transform of 8x8 block (8-bit input)
356*c83a76b0SSuyog Pawar *
357*c83a76b0SSuyog Pawar * @par Description:
358*c83a76b0SSuyog Pawar *
359*c83a76b0SSuyog Pawar * @param[in] pu1_origin
360*c83a76b0SSuyog Pawar * UWORD8 pointer to the source block (u or v, interleaved)
361*c83a76b0SSuyog Pawar *
362*c83a76b0SSuyog Pawar * @param[in] src_strd
363*c83a76b0SSuyog Pawar * WORD32 Source stride
364*c83a76b0SSuyog Pawar *
365*c83a76b0SSuyog Pawar * @param[in] pu1_pred_buf
366*c83a76b0SSuyog Pawar * UWORD8 pointer to the prediction block (u or v, interleaved)
367*c83a76b0SSuyog Pawar *
368*c83a76b0SSuyog Pawar * @param[in] pred_strd
369*c83a76b0SSuyog Pawar * WORD32 Pred stride
370*c83a76b0SSuyog Pawar *
371*c83a76b0SSuyog Pawar * @param[in] pi2_dst
372*c83a76b0SSuyog Pawar * WORD16 pointer to the transform block
373*c83a76b0SSuyog Pawar *
374*c83a76b0SSuyog Pawar * @param[in] dst_strd (u or v, interleaved)
375*c83a76b0SSuyog Pawar * WORD32 Destination stride
376*c83a76b0SSuyog Pawar *
377*c83a76b0SSuyog Pawar * @returns
378*c83a76b0SSuyog Pawar * Hadamard SAD
379*c83a76b0SSuyog Pawar *
380*c83a76b0SSuyog Pawar * @remarks
381*c83a76b0SSuyog Pawar * Not updating the transform destination now. Only returning the SATD
382*c83a76b0SSuyog Pawar *
383*c83a76b0SSuyog Pawar *******************************************************************************
384*c83a76b0SSuyog Pawar */
ihevce_chroma_HAD_8x8_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)385*c83a76b0SSuyog Pawar UWORD32 ihevce_chroma_HAD_8x8_8bit(
386*c83a76b0SSuyog Pawar UWORD8 *pu1_origin,
387*c83a76b0SSuyog Pawar WORD32 src_strd,
388*c83a76b0SSuyog Pawar UWORD8 *pu1_pred_buf,
389*c83a76b0SSuyog Pawar WORD32 pred_strd,
390*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
391*c83a76b0SSuyog Pawar WORD32 dst_strd)
392*c83a76b0SSuyog Pawar {
393*c83a76b0SSuyog Pawar WORD32 k, i, j, jj;
394*c83a76b0SSuyog Pawar UWORD32 u4_sad = 0;
395*c83a76b0SSuyog Pawar WORD16 diff[64], m1[8][8], m2[8][8], m3[8][8];
396*c83a76b0SSuyog Pawar
397*c83a76b0SSuyog Pawar (void)pi2_dst;
398*c83a76b0SSuyog Pawar (void)dst_strd;
399*c83a76b0SSuyog Pawar for(k = 0; k < 64; k += 8)
400*c83a76b0SSuyog Pawar {
401*c83a76b0SSuyog Pawar /* u or v, interleaved */
402*c83a76b0SSuyog Pawar diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
403*c83a76b0SSuyog Pawar diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
404*c83a76b0SSuyog Pawar diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
405*c83a76b0SSuyog Pawar diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
406*c83a76b0SSuyog Pawar diff[k + 4] = pu1_origin[2 * 4] - pu1_pred_buf[2 * 4];
407*c83a76b0SSuyog Pawar diff[k + 5] = pu1_origin[2 * 5] - pu1_pred_buf[2 * 5];
408*c83a76b0SSuyog Pawar diff[k + 6] = pu1_origin[2 * 6] - pu1_pred_buf[2 * 6];
409*c83a76b0SSuyog Pawar diff[k + 7] = pu1_origin[2 * 7] - pu1_pred_buf[2 * 7];
410*c83a76b0SSuyog Pawar
411*c83a76b0SSuyog Pawar pu1_pred_buf += pred_strd;
412*c83a76b0SSuyog Pawar pu1_origin += src_strd;
413*c83a76b0SSuyog Pawar }
414*c83a76b0SSuyog Pawar
415*c83a76b0SSuyog Pawar /*===== hadamard transform =====*/
416*c83a76b0SSuyog Pawar // horizontal
417*c83a76b0SSuyog Pawar for(j = 0; j < 8; j++)
418*c83a76b0SSuyog Pawar {
419*c83a76b0SSuyog Pawar jj = j << 3;
420*c83a76b0SSuyog Pawar m2[j][0] = diff[jj] + diff[jj + 4];
421*c83a76b0SSuyog Pawar m2[j][1] = diff[jj + 1] + diff[jj + 5];
422*c83a76b0SSuyog Pawar m2[j][2] = diff[jj + 2] + diff[jj + 6];
423*c83a76b0SSuyog Pawar m2[j][3] = diff[jj + 3] + diff[jj + 7];
424*c83a76b0SSuyog Pawar m2[j][4] = diff[jj] - diff[jj + 4];
425*c83a76b0SSuyog Pawar m2[j][5] = diff[jj + 1] - diff[jj + 5];
426*c83a76b0SSuyog Pawar m2[j][6] = diff[jj + 2] - diff[jj + 6];
427*c83a76b0SSuyog Pawar m2[j][7] = diff[jj + 3] - diff[jj + 7];
428*c83a76b0SSuyog Pawar
429*c83a76b0SSuyog Pawar m1[j][0] = m2[j][0] + m2[j][2];
430*c83a76b0SSuyog Pawar m1[j][1] = m2[j][1] + m2[j][3];
431*c83a76b0SSuyog Pawar m1[j][2] = m2[j][0] - m2[j][2];
432*c83a76b0SSuyog Pawar m1[j][3] = m2[j][1] - m2[j][3];
433*c83a76b0SSuyog Pawar m1[j][4] = m2[j][4] + m2[j][6];
434*c83a76b0SSuyog Pawar m1[j][5] = m2[j][5] + m2[j][7];
435*c83a76b0SSuyog Pawar m1[j][6] = m2[j][4] - m2[j][6];
436*c83a76b0SSuyog Pawar m1[j][7] = m2[j][5] - m2[j][7];
437*c83a76b0SSuyog Pawar
438*c83a76b0SSuyog Pawar m2[j][0] = m1[j][0] + m1[j][1];
439*c83a76b0SSuyog Pawar m2[j][1] = m1[j][0] - m1[j][1];
440*c83a76b0SSuyog Pawar m2[j][2] = m1[j][2] + m1[j][3];
441*c83a76b0SSuyog Pawar m2[j][3] = m1[j][2] - m1[j][3];
442*c83a76b0SSuyog Pawar m2[j][4] = m1[j][4] + m1[j][5];
443*c83a76b0SSuyog Pawar m2[j][5] = m1[j][4] - m1[j][5];
444*c83a76b0SSuyog Pawar m2[j][6] = m1[j][6] + m1[j][7];
445*c83a76b0SSuyog Pawar m2[j][7] = m1[j][6] - m1[j][7];
446*c83a76b0SSuyog Pawar }
447*c83a76b0SSuyog Pawar
448*c83a76b0SSuyog Pawar // vertical
449*c83a76b0SSuyog Pawar for(i = 0; i < 8; i++)
450*c83a76b0SSuyog Pawar {
451*c83a76b0SSuyog Pawar m3[0][i] = m2[0][i] + m2[4][i];
452*c83a76b0SSuyog Pawar m3[1][i] = m2[1][i] + m2[5][i];
453*c83a76b0SSuyog Pawar m3[2][i] = m2[2][i] + m2[6][i];
454*c83a76b0SSuyog Pawar m3[3][i] = m2[3][i] + m2[7][i];
455*c83a76b0SSuyog Pawar m3[4][i] = m2[0][i] - m2[4][i];
456*c83a76b0SSuyog Pawar m3[5][i] = m2[1][i] - m2[5][i];
457*c83a76b0SSuyog Pawar m3[6][i] = m2[2][i] - m2[6][i];
458*c83a76b0SSuyog Pawar m3[7][i] = m2[3][i] - m2[7][i];
459*c83a76b0SSuyog Pawar
460*c83a76b0SSuyog Pawar m1[0][i] = m3[0][i] + m3[2][i];
461*c83a76b0SSuyog Pawar m1[1][i] = m3[1][i] + m3[3][i];
462*c83a76b0SSuyog Pawar m1[2][i] = m3[0][i] - m3[2][i];
463*c83a76b0SSuyog Pawar m1[3][i] = m3[1][i] - m3[3][i];
464*c83a76b0SSuyog Pawar m1[4][i] = m3[4][i] + m3[6][i];
465*c83a76b0SSuyog Pawar m1[5][i] = m3[5][i] + m3[7][i];
466*c83a76b0SSuyog Pawar m1[6][i] = m3[4][i] - m3[6][i];
467*c83a76b0SSuyog Pawar m1[7][i] = m3[5][i] - m3[7][i];
468*c83a76b0SSuyog Pawar
469*c83a76b0SSuyog Pawar m2[0][i] = m1[0][i] + m1[1][i];
470*c83a76b0SSuyog Pawar m2[1][i] = m1[0][i] - m1[1][i];
471*c83a76b0SSuyog Pawar m2[2][i] = m1[2][i] + m1[3][i];
472*c83a76b0SSuyog Pawar m2[3][i] = m1[2][i] - m1[3][i];
473*c83a76b0SSuyog Pawar m2[4][i] = m1[4][i] + m1[5][i];
474*c83a76b0SSuyog Pawar m2[5][i] = m1[4][i] - m1[5][i];
475*c83a76b0SSuyog Pawar m2[6][i] = m1[6][i] + m1[7][i];
476*c83a76b0SSuyog Pawar m2[7][i] = m1[6][i] - m1[7][i];
477*c83a76b0SSuyog Pawar }
478*c83a76b0SSuyog Pawar
479*c83a76b0SSuyog Pawar /*===== sad =====*/
480*c83a76b0SSuyog Pawar for(i = 0; i < 8; i++)
481*c83a76b0SSuyog Pawar {
482*c83a76b0SSuyog Pawar for(j = 0; j < 8; j++)
483*c83a76b0SSuyog Pawar {
484*c83a76b0SSuyog Pawar u4_sad += (m2[i][j] > 0 ? m2[i][j] : -m2[i][j]);
485*c83a76b0SSuyog Pawar }
486*c83a76b0SSuyog Pawar }
487*c83a76b0SSuyog Pawar u4_sad = ((u4_sad + 4) >> 3);
488*c83a76b0SSuyog Pawar
489*c83a76b0SSuyog Pawar return u4_sad;
490*c83a76b0SSuyog Pawar }
491*c83a76b0SSuyog Pawar
492*c83a76b0SSuyog Pawar /**
493*c83a76b0SSuyog Pawar *******************************************************************************
494*c83a76b0SSuyog Pawar *
495*c83a76b0SSuyog Pawar * @brief
496*c83a76b0SSuyog Pawar * Chroma Hadamard Transform of 16x16 block (8-bit input)
497*c83a76b0SSuyog Pawar *
498*c83a76b0SSuyog Pawar * @par Description:
499*c83a76b0SSuyog Pawar *
500*c83a76b0SSuyog Pawar * @param[in] pu1_origin
501*c83a76b0SSuyog Pawar * UWORD8 pointer to the source block (u or v, interleaved)
502*c83a76b0SSuyog Pawar *
503*c83a76b0SSuyog Pawar * @param[in] src_strd
504*c83a76b0SSuyog Pawar * WORD32 Source stride
505*c83a76b0SSuyog Pawar *
506*c83a76b0SSuyog Pawar * @param[in] pu1_pred_buf
507*c83a76b0SSuyog Pawar * UWORD8 pointer to the prediction block (u or v, interleaved)
508*c83a76b0SSuyog Pawar *
509*c83a76b0SSuyog Pawar * @param[in] pred_strd
510*c83a76b0SSuyog Pawar * WORD32 Pred stride
511*c83a76b0SSuyog Pawar *
512*c83a76b0SSuyog Pawar * @param[in] pi2_dst
513*c83a76b0SSuyog Pawar * WORD16 pointer to the transform block
514*c83a76b0SSuyog Pawar *
515*c83a76b0SSuyog Pawar * @param[in] dst_strd (u or v, interleaved)
516*c83a76b0SSuyog Pawar * WORD32 Destination stride
517*c83a76b0SSuyog Pawar *
518*c83a76b0SSuyog Pawar * @returns
519*c83a76b0SSuyog Pawar * Hadamard SAD
520*c83a76b0SSuyog Pawar *
521*c83a76b0SSuyog Pawar * @remarks
522*c83a76b0SSuyog Pawar * Not updating the transform destination now. Only returning the SATD
523*c83a76b0SSuyog Pawar *
524*c83a76b0SSuyog Pawar *******************************************************************************
525*c83a76b0SSuyog Pawar */
ihevce_chroma_HAD_16x16_8bit(UWORD8 * pu1_origin,WORD32 src_strd,UWORD8 * pu1_pred_buf,WORD32 pred_strd,WORD16 * pi2_dst,WORD32 dst_strd)526*c83a76b0SSuyog Pawar UWORD32 ihevce_chroma_HAD_16x16_8bit(
527*c83a76b0SSuyog Pawar UWORD8 *pu1_origin,
528*c83a76b0SSuyog Pawar WORD32 src_strd,
529*c83a76b0SSuyog Pawar UWORD8 *pu1_pred_buf,
530*c83a76b0SSuyog Pawar WORD32 pred_strd,
531*c83a76b0SSuyog Pawar WORD16 *pi2_dst,
532*c83a76b0SSuyog Pawar WORD32 dst_strd)
533*c83a76b0SSuyog Pawar {
534*c83a76b0SSuyog Pawar UWORD32 au4_sad[4], u4_result = 0;
535*c83a76b0SSuyog Pawar WORD32 i;
536*c83a76b0SSuyog Pawar
537*c83a76b0SSuyog Pawar for(i = 0; i < 4; i++)
538*c83a76b0SSuyog Pawar {
539*c83a76b0SSuyog Pawar au4_sad[i] = ihevce_chroma_HAD_8x8_8bit(
540*c83a76b0SSuyog Pawar pu1_origin, src_strd, pu1_pred_buf, pred_strd, pi2_dst, dst_strd);
541*c83a76b0SSuyog Pawar
542*c83a76b0SSuyog Pawar if(i == 0 || i == 2)
543*c83a76b0SSuyog Pawar {
544*c83a76b0SSuyog Pawar pu1_origin += 16;
545*c83a76b0SSuyog Pawar pu1_pred_buf += 16;
546*c83a76b0SSuyog Pawar }
547*c83a76b0SSuyog Pawar
548*c83a76b0SSuyog Pawar if(i == 1)
549*c83a76b0SSuyog Pawar {
550*c83a76b0SSuyog Pawar pu1_origin += (8 * src_strd) - 16;
551*c83a76b0SSuyog Pawar pu1_pred_buf += (8 * pred_strd) - 16;
552*c83a76b0SSuyog Pawar }
553*c83a76b0SSuyog Pawar
554*c83a76b0SSuyog Pawar u4_result += au4_sad[i];
555*c83a76b0SSuyog Pawar }
556*c83a76b0SSuyog Pawar
557*c83a76b0SSuyog Pawar return u4_result;
558*c83a76b0SSuyog Pawar }
559