xref: /aosp_15_r20/external/libaom/aom_dsp/avg.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  */
11*77c1e3ccSAndroid Build Coastguard Worker 
12*77c1e3ccSAndroid Build Coastguard Worker #include <assert.h>
13*77c1e3ccSAndroid Build Coastguard Worker #include <stdlib.h>
14*77c1e3ccSAndroid Build Coastguard Worker 
15*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_dsp_rtcd.h"
16*77c1e3ccSAndroid Build Coastguard Worker #include "aom_ports/mem.h"
17*77c1e3ccSAndroid Build Coastguard Worker 
aom_minmax_8x8_c(const uint8_t * s,int p,const uint8_t * d,int dp,int * min,int * max)18*77c1e3ccSAndroid Build Coastguard Worker void aom_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
19*77c1e3ccSAndroid Build Coastguard Worker                       int *min, int *max) {
20*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
21*77c1e3ccSAndroid Build Coastguard Worker   *min = 255;
22*77c1e3ccSAndroid Build Coastguard Worker   *max = 0;
23*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < 8; ++i, s += p, d += dp) {
24*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < 8; ++j) {
25*77c1e3ccSAndroid Build Coastguard Worker       int diff = abs(s[j] - d[j]);
26*77c1e3ccSAndroid Build Coastguard Worker       *min = diff < *min ? diff : *min;
27*77c1e3ccSAndroid Build Coastguard Worker       *max = diff > *max ? diff : *max;
28*77c1e3ccSAndroid Build Coastguard Worker     }
29*77c1e3ccSAndroid Build Coastguard Worker   }
30*77c1e3ccSAndroid Build Coastguard Worker }
31*77c1e3ccSAndroid Build Coastguard Worker 
aom_avg_4x4_c(const uint8_t * s,int p)32*77c1e3ccSAndroid Build Coastguard Worker unsigned int aom_avg_4x4_c(const uint8_t *s, int p) {
33*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
34*77c1e3ccSAndroid Build Coastguard Worker   int sum = 0;
35*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < 4; ++i, s += p)
36*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < 4; sum += s[j], ++j) {
37*77c1e3ccSAndroid Build Coastguard Worker     }
38*77c1e3ccSAndroid Build Coastguard Worker 
39*77c1e3ccSAndroid Build Coastguard Worker   return (sum + 8) >> 4;
40*77c1e3ccSAndroid Build Coastguard Worker }
41*77c1e3ccSAndroid Build Coastguard Worker 
aom_avg_8x8_c(const uint8_t * s,int p)42*77c1e3ccSAndroid Build Coastguard Worker unsigned int aom_avg_8x8_c(const uint8_t *s, int p) {
43*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
44*77c1e3ccSAndroid Build Coastguard Worker   int sum = 0;
45*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < 8; ++i, s += p)
46*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < 8; sum += s[j], ++j) {
47*77c1e3ccSAndroid Build Coastguard Worker     }
48*77c1e3ccSAndroid Build Coastguard Worker 
49*77c1e3ccSAndroid Build Coastguard Worker   return (sum + 32) >> 6;
50*77c1e3ccSAndroid Build Coastguard Worker }
51*77c1e3ccSAndroid Build Coastguard Worker 
aom_avg_8x8_quad_c(const uint8_t * s,int p,int x16_idx,int y16_idx,int * avg)52*77c1e3ccSAndroid Build Coastguard Worker void aom_avg_8x8_quad_c(const uint8_t *s, int p, int x16_idx, int y16_idx,
53*77c1e3ccSAndroid Build Coastguard Worker                         int *avg) {
54*77c1e3ccSAndroid Build Coastguard Worker   for (int k = 0; k < 4; k++) {
55*77c1e3ccSAndroid Build Coastguard Worker     const int x8_idx = x16_idx + ((k & 1) << 3);
56*77c1e3ccSAndroid Build Coastguard Worker     const int y8_idx = y16_idx + ((k >> 1) << 3);
57*77c1e3ccSAndroid Build Coastguard Worker     const uint8_t *s_tmp = s + y8_idx * p + x8_idx;
58*77c1e3ccSAndroid Build Coastguard Worker     avg[k] = aom_avg_8x8_c(s_tmp, p);
59*77c1e3ccSAndroid Build Coastguard Worker   }
60*77c1e3ccSAndroid Build Coastguard Worker }
61*77c1e3ccSAndroid Build Coastguard Worker 
62*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
aom_highbd_avg_8x8_c(const uint8_t * s8,int p)63*77c1e3ccSAndroid Build Coastguard Worker unsigned int aom_highbd_avg_8x8_c(const uint8_t *s8, int p) {
64*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
65*77c1e3ccSAndroid Build Coastguard Worker   int sum = 0;
66*77c1e3ccSAndroid Build Coastguard Worker   const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
67*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < 8; ++i, s += p)
68*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < 8; sum += s[j], ++j) {
69*77c1e3ccSAndroid Build Coastguard Worker     }
70*77c1e3ccSAndroid Build Coastguard Worker 
71*77c1e3ccSAndroid Build Coastguard Worker   return (sum + 32) >> 6;
72*77c1e3ccSAndroid Build Coastguard Worker }
73*77c1e3ccSAndroid Build Coastguard Worker 
aom_highbd_avg_4x4_c(const uint8_t * s8,int p)74*77c1e3ccSAndroid Build Coastguard Worker unsigned int aom_highbd_avg_4x4_c(const uint8_t *s8, int p) {
75*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
76*77c1e3ccSAndroid Build Coastguard Worker   int sum = 0;
77*77c1e3ccSAndroid Build Coastguard Worker   const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
78*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < 4; ++i, s += p)
79*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < 4; sum += s[j], ++j) {
80*77c1e3ccSAndroid Build Coastguard Worker     }
81*77c1e3ccSAndroid Build Coastguard Worker 
82*77c1e3ccSAndroid Build Coastguard Worker   return (sum + 8) >> 4;
83*77c1e3ccSAndroid Build Coastguard Worker }
84*77c1e3ccSAndroid Build Coastguard Worker 
aom_highbd_minmax_8x8_c(const uint8_t * s8,int p,const uint8_t * d8,int dp,int * min,int * max)85*77c1e3ccSAndroid Build Coastguard Worker void aom_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
86*77c1e3ccSAndroid Build Coastguard Worker                              int dp, int *min, int *max) {
87*77c1e3ccSAndroid Build Coastguard Worker   int i, j;
88*77c1e3ccSAndroid Build Coastguard Worker   const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
89*77c1e3ccSAndroid Build Coastguard Worker   const uint16_t *d = CONVERT_TO_SHORTPTR(d8);
90*77c1e3ccSAndroid Build Coastguard Worker   *min = 65535;
91*77c1e3ccSAndroid Build Coastguard Worker   *max = 0;
92*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < 8; ++i, s += p, d += dp) {
93*77c1e3ccSAndroid Build Coastguard Worker     for (j = 0; j < 8; ++j) {
94*77c1e3ccSAndroid Build Coastguard Worker       int diff = abs(s[j] - d[j]);
95*77c1e3ccSAndroid Build Coastguard Worker       *min = diff < *min ? diff : *min;
96*77c1e3ccSAndroid Build Coastguard Worker       *max = diff > *max ? diff : *max;
97*77c1e3ccSAndroid Build Coastguard Worker     }
98*77c1e3ccSAndroid Build Coastguard Worker   }
99*77c1e3ccSAndroid Build Coastguard Worker }
100*77c1e3ccSAndroid Build Coastguard Worker #endif  // CONFIG_AV1_HIGHBITDEPTH
101*77c1e3ccSAndroid Build Coastguard Worker 
hadamard_col4(const int16_t * src_diff,ptrdiff_t src_stride,int16_t * coeff)102*77c1e3ccSAndroid Build Coastguard Worker static void hadamard_col4(const int16_t *src_diff, ptrdiff_t src_stride,
103*77c1e3ccSAndroid Build Coastguard Worker                           int16_t *coeff) {
104*77c1e3ccSAndroid Build Coastguard Worker   int16_t b0 = (src_diff[0 * src_stride] + src_diff[1 * src_stride]) >> 1;
105*77c1e3ccSAndroid Build Coastguard Worker   int16_t b1 = (src_diff[0 * src_stride] - src_diff[1 * src_stride]) >> 1;
106*77c1e3ccSAndroid Build Coastguard Worker   int16_t b2 = (src_diff[2 * src_stride] + src_diff[3 * src_stride]) >> 1;
107*77c1e3ccSAndroid Build Coastguard Worker   int16_t b3 = (src_diff[2 * src_stride] - src_diff[3 * src_stride]) >> 1;
108*77c1e3ccSAndroid Build Coastguard Worker 
109*77c1e3ccSAndroid Build Coastguard Worker   coeff[0] = b0 + b2;
110*77c1e3ccSAndroid Build Coastguard Worker   coeff[1] = b1 + b3;
111*77c1e3ccSAndroid Build Coastguard Worker   coeff[2] = b0 - b2;
112*77c1e3ccSAndroid Build Coastguard Worker   coeff[3] = b1 - b3;
113*77c1e3ccSAndroid Build Coastguard Worker }
114*77c1e3ccSAndroid Build Coastguard Worker 
aom_hadamard_4x4_c(const int16_t * src_diff,ptrdiff_t src_stride,tran_low_t * coeff)115*77c1e3ccSAndroid Build Coastguard Worker void aom_hadamard_4x4_c(const int16_t *src_diff, ptrdiff_t src_stride,
116*77c1e3ccSAndroid Build Coastguard Worker                         tran_low_t *coeff) {
117*77c1e3ccSAndroid Build Coastguard Worker   int idx;
118*77c1e3ccSAndroid Build Coastguard Worker   int16_t buffer[16];
119*77c1e3ccSAndroid Build Coastguard Worker   int16_t buffer2[16];
120*77c1e3ccSAndroid Build Coastguard Worker   int16_t *tmp_buf = &buffer[0];
121*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 4; ++idx) {
122*77c1e3ccSAndroid Build Coastguard Worker     hadamard_col4(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit
123*77c1e3ccSAndroid Build Coastguard Worker                                                    // dynamic range [-255, 255]
124*77c1e3ccSAndroid Build Coastguard Worker     tmp_buf += 4;
125*77c1e3ccSAndroid Build Coastguard Worker     ++src_diff;
126*77c1e3ccSAndroid Build Coastguard Worker   }
127*77c1e3ccSAndroid Build Coastguard Worker 
128*77c1e3ccSAndroid Build Coastguard Worker   tmp_buf = &buffer[0];
129*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 4; ++idx) {
130*77c1e3ccSAndroid Build Coastguard Worker     hadamard_col4(tmp_buf, 4, buffer2 + 4 * idx);  // tmp_buf: 12 bit
131*77c1e3ccSAndroid Build Coastguard Worker     // dynamic range [-2040, 2040]
132*77c1e3ccSAndroid Build Coastguard Worker     // buffer2: 15 bit
133*77c1e3ccSAndroid Build Coastguard Worker     // dynamic range [-16320, 16320]
134*77c1e3ccSAndroid Build Coastguard Worker     ++tmp_buf;
135*77c1e3ccSAndroid Build Coastguard Worker   }
136*77c1e3ccSAndroid Build Coastguard Worker 
137*77c1e3ccSAndroid Build Coastguard Worker   // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_4x4_sse2).
138*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < 4; i++) {
139*77c1e3ccSAndroid Build Coastguard Worker     for (int j = 0; j < 4; j++) {
140*77c1e3ccSAndroid Build Coastguard Worker       coeff[i * 4 + j] = (tran_low_t)buffer2[j * 4 + i];
141*77c1e3ccSAndroid Build Coastguard Worker     }
142*77c1e3ccSAndroid Build Coastguard Worker   }
143*77c1e3ccSAndroid Build Coastguard Worker }
144*77c1e3ccSAndroid Build Coastguard Worker 
145*77c1e3ccSAndroid Build Coastguard Worker // src_diff: first pass, 9 bit, dynamic range [-255, 255]
146*77c1e3ccSAndroid Build Coastguard Worker //           second pass, 12 bit, dynamic range [-2040, 2040]
hadamard_col8(const int16_t * src_diff,ptrdiff_t src_stride,int16_t * coeff)147*77c1e3ccSAndroid Build Coastguard Worker static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride,
148*77c1e3ccSAndroid Build Coastguard Worker                           int16_t *coeff) {
149*77c1e3ccSAndroid Build Coastguard Worker   int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
150*77c1e3ccSAndroid Build Coastguard Worker   int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
151*77c1e3ccSAndroid Build Coastguard Worker   int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
152*77c1e3ccSAndroid Build Coastguard Worker   int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
153*77c1e3ccSAndroid Build Coastguard Worker   int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
154*77c1e3ccSAndroid Build Coastguard Worker   int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
155*77c1e3ccSAndroid Build Coastguard Worker   int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
156*77c1e3ccSAndroid Build Coastguard Worker   int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
157*77c1e3ccSAndroid Build Coastguard Worker 
158*77c1e3ccSAndroid Build Coastguard Worker   int16_t c0 = b0 + b2;
159*77c1e3ccSAndroid Build Coastguard Worker   int16_t c1 = b1 + b3;
160*77c1e3ccSAndroid Build Coastguard Worker   int16_t c2 = b0 - b2;
161*77c1e3ccSAndroid Build Coastguard Worker   int16_t c3 = b1 - b3;
162*77c1e3ccSAndroid Build Coastguard Worker   int16_t c4 = b4 + b6;
163*77c1e3ccSAndroid Build Coastguard Worker   int16_t c5 = b5 + b7;
164*77c1e3ccSAndroid Build Coastguard Worker   int16_t c6 = b4 - b6;
165*77c1e3ccSAndroid Build Coastguard Worker   int16_t c7 = b5 - b7;
166*77c1e3ccSAndroid Build Coastguard Worker 
167*77c1e3ccSAndroid Build Coastguard Worker   coeff[0] = c0 + c4;
168*77c1e3ccSAndroid Build Coastguard Worker   coeff[7] = c1 + c5;
169*77c1e3ccSAndroid Build Coastguard Worker   coeff[3] = c2 + c6;
170*77c1e3ccSAndroid Build Coastguard Worker   coeff[4] = c3 + c7;
171*77c1e3ccSAndroid Build Coastguard Worker   coeff[2] = c0 - c4;
172*77c1e3ccSAndroid Build Coastguard Worker   coeff[6] = c1 - c5;
173*77c1e3ccSAndroid Build Coastguard Worker   coeff[1] = c2 - c6;
174*77c1e3ccSAndroid Build Coastguard Worker   coeff[5] = c3 - c7;
175*77c1e3ccSAndroid Build Coastguard Worker }
176*77c1e3ccSAndroid Build Coastguard Worker 
aom_hadamard_8x8_c(const int16_t * src_diff,ptrdiff_t src_stride,tran_low_t * coeff)177*77c1e3ccSAndroid Build Coastguard Worker void aom_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
178*77c1e3ccSAndroid Build Coastguard Worker                         tran_low_t *coeff) {
179*77c1e3ccSAndroid Build Coastguard Worker   int idx;
180*77c1e3ccSAndroid Build Coastguard Worker   int16_t buffer[64];
181*77c1e3ccSAndroid Build Coastguard Worker   int16_t buffer2[64];
182*77c1e3ccSAndroid Build Coastguard Worker   int16_t *tmp_buf = &buffer[0];
183*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 8; ++idx) {
184*77c1e3ccSAndroid Build Coastguard Worker     hadamard_col8(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit
185*77c1e3ccSAndroid Build Coastguard Worker                                                    // dynamic range [-255, 255]
186*77c1e3ccSAndroid Build Coastguard Worker     tmp_buf += 8;
187*77c1e3ccSAndroid Build Coastguard Worker     ++src_diff;
188*77c1e3ccSAndroid Build Coastguard Worker   }
189*77c1e3ccSAndroid Build Coastguard Worker 
190*77c1e3ccSAndroid Build Coastguard Worker   tmp_buf = &buffer[0];
191*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 8; ++idx) {
192*77c1e3ccSAndroid Build Coastguard Worker     hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx);  // tmp_buf: 12 bit
193*77c1e3ccSAndroid Build Coastguard Worker     // dynamic range [-2040, 2040]
194*77c1e3ccSAndroid Build Coastguard Worker     // buffer2: 15 bit
195*77c1e3ccSAndroid Build Coastguard Worker     // dynamic range [-16320, 16320]
196*77c1e3ccSAndroid Build Coastguard Worker     ++tmp_buf;
197*77c1e3ccSAndroid Build Coastguard Worker   }
198*77c1e3ccSAndroid Build Coastguard Worker 
199*77c1e3ccSAndroid Build Coastguard Worker   // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_8x8_sse2).
200*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < 8; i++) {
201*77c1e3ccSAndroid Build Coastguard Worker     for (int j = 0; j < 8; j++) {
202*77c1e3ccSAndroid Build Coastguard Worker       coeff[i * 8 + j] = (tran_low_t)buffer2[j * 8 + i];
203*77c1e3ccSAndroid Build Coastguard Worker     }
204*77c1e3ccSAndroid Build Coastguard Worker   }
205*77c1e3ccSAndroid Build Coastguard Worker }
206*77c1e3ccSAndroid Build Coastguard Worker 
aom_hadamard_lp_8x8_c(const int16_t * src_diff,ptrdiff_t src_stride,int16_t * coeff)207*77c1e3ccSAndroid Build Coastguard Worker void aom_hadamard_lp_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
208*77c1e3ccSAndroid Build Coastguard Worker                            int16_t *coeff) {
209*77c1e3ccSAndroid Build Coastguard Worker   int16_t buffer[64];
210*77c1e3ccSAndroid Build Coastguard Worker   int16_t buffer2[64];
211*77c1e3ccSAndroid Build Coastguard Worker   int16_t *tmp_buf = &buffer[0];
212*77c1e3ccSAndroid Build Coastguard Worker   for (int idx = 0; idx < 8; ++idx) {
213*77c1e3ccSAndroid Build Coastguard Worker     hadamard_col8(src_diff, src_stride, tmp_buf);  // src_diff: 9 bit
214*77c1e3ccSAndroid Build Coastguard Worker                                                    // dynamic range [-255, 255]
215*77c1e3ccSAndroid Build Coastguard Worker     tmp_buf += 8;
216*77c1e3ccSAndroid Build Coastguard Worker     ++src_diff;
217*77c1e3ccSAndroid Build Coastguard Worker   }
218*77c1e3ccSAndroid Build Coastguard Worker 
219*77c1e3ccSAndroid Build Coastguard Worker   tmp_buf = &buffer[0];
220*77c1e3ccSAndroid Build Coastguard Worker   for (int idx = 0; idx < 8; ++idx) {
221*77c1e3ccSAndroid Build Coastguard Worker     hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx);  // tmp_buf: 12 bit
222*77c1e3ccSAndroid Build Coastguard Worker     // dynamic range [-2040, 2040]
223*77c1e3ccSAndroid Build Coastguard Worker     // buffer2: 15 bit
224*77c1e3ccSAndroid Build Coastguard Worker     // dynamic range [-16320, 16320]
225*77c1e3ccSAndroid Build Coastguard Worker     ++tmp_buf;
226*77c1e3ccSAndroid Build Coastguard Worker   }
227*77c1e3ccSAndroid Build Coastguard Worker 
228*77c1e3ccSAndroid Build Coastguard Worker   for (int idx = 0; idx < 64; ++idx) coeff[idx] = buffer2[idx];
229*77c1e3ccSAndroid Build Coastguard Worker 
230*77c1e3ccSAndroid Build Coastguard Worker   // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_lp_8x8_sse2).
231*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < 8; i++) {
232*77c1e3ccSAndroid Build Coastguard Worker     for (int j = 0; j < 8; j++) {
233*77c1e3ccSAndroid Build Coastguard Worker       coeff[i * 8 + j] = buffer2[j * 8 + i];
234*77c1e3ccSAndroid Build Coastguard Worker     }
235*77c1e3ccSAndroid Build Coastguard Worker   }
236*77c1e3ccSAndroid Build Coastguard Worker }
237*77c1e3ccSAndroid Build Coastguard Worker 
aom_hadamard_lp_8x8_dual_c(const int16_t * src_diff,ptrdiff_t src_stride,int16_t * coeff)238*77c1e3ccSAndroid Build Coastguard Worker void aom_hadamard_lp_8x8_dual_c(const int16_t *src_diff, ptrdiff_t src_stride,
239*77c1e3ccSAndroid Build Coastguard Worker                                 int16_t *coeff) {
240*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < 2; i++) {
241*77c1e3ccSAndroid Build Coastguard Worker     aom_hadamard_lp_8x8_c(src_diff + (i * 8), src_stride,
242*77c1e3ccSAndroid Build Coastguard Worker                           (int16_t *)coeff + (i * 64));
243*77c1e3ccSAndroid Build Coastguard Worker   }
244*77c1e3ccSAndroid Build Coastguard Worker }
245*77c1e3ccSAndroid Build Coastguard Worker 
246*77c1e3ccSAndroid Build Coastguard Worker // In place 16x16 2D Hadamard transform
aom_hadamard_16x16_c(const int16_t * src_diff,ptrdiff_t src_stride,tran_low_t * coeff)247*77c1e3ccSAndroid Build Coastguard Worker void aom_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
248*77c1e3ccSAndroid Build Coastguard Worker                           tran_low_t *coeff) {
249*77c1e3ccSAndroid Build Coastguard Worker   int idx;
250*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 4; ++idx) {
251*77c1e3ccSAndroid Build Coastguard Worker     // src_diff: 9 bit, dynamic range [-255, 255]
252*77c1e3ccSAndroid Build Coastguard Worker     const int16_t *src_ptr =
253*77c1e3ccSAndroid Build Coastguard Worker         src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
254*77c1e3ccSAndroid Build Coastguard Worker     aom_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
255*77c1e3ccSAndroid Build Coastguard Worker   }
256*77c1e3ccSAndroid Build Coastguard Worker 
257*77c1e3ccSAndroid Build Coastguard Worker   // coeff: 15 bit, dynamic range [-16320, 16320]
258*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 64; ++idx) {
259*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a0 = coeff[0];
260*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a1 = coeff[64];
261*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a2 = coeff[128];
262*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a3 = coeff[192];
263*77c1e3ccSAndroid Build Coastguard Worker 
264*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
265*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
266*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
267*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b3 = (a2 - a3) >> 1;
268*77c1e3ccSAndroid Build Coastguard Worker 
269*77c1e3ccSAndroid Build Coastguard Worker     coeff[0] = b0 + b2;  // 16 bit, [-32640, 32640]
270*77c1e3ccSAndroid Build Coastguard Worker     coeff[64] = b1 + b3;
271*77c1e3ccSAndroid Build Coastguard Worker     coeff[128] = b0 - b2;
272*77c1e3ccSAndroid Build Coastguard Worker     coeff[192] = b1 - b3;
273*77c1e3ccSAndroid Build Coastguard Worker 
274*77c1e3ccSAndroid Build Coastguard Worker     ++coeff;
275*77c1e3ccSAndroid Build Coastguard Worker   }
276*77c1e3ccSAndroid Build Coastguard Worker 
277*77c1e3ccSAndroid Build Coastguard Worker   coeff -= 64;
278*77c1e3ccSAndroid Build Coastguard Worker   // Extra shift to match AVX2 output (i.e., aom_hadamard_16x16_avx2).
279*77c1e3ccSAndroid Build Coastguard Worker   // Note that to match SSE2 output, it does not need this step.
280*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < 16; i++) {
281*77c1e3ccSAndroid Build Coastguard Worker     for (int j = 0; j < 4; j++) {
282*77c1e3ccSAndroid Build Coastguard Worker       tran_low_t temp = coeff[i * 16 + 4 + j];
283*77c1e3ccSAndroid Build Coastguard Worker       coeff[i * 16 + 4 + j] = coeff[i * 16 + 8 + j];
284*77c1e3ccSAndroid Build Coastguard Worker       coeff[i * 16 + 8 + j] = temp;
285*77c1e3ccSAndroid Build Coastguard Worker     }
286*77c1e3ccSAndroid Build Coastguard Worker   }
287*77c1e3ccSAndroid Build Coastguard Worker }
288*77c1e3ccSAndroid Build Coastguard Worker 
aom_hadamard_lp_16x16_c(const int16_t * src_diff,ptrdiff_t src_stride,int16_t * coeff)289*77c1e3ccSAndroid Build Coastguard Worker void aom_hadamard_lp_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
290*77c1e3ccSAndroid Build Coastguard Worker                              int16_t *coeff) {
291*77c1e3ccSAndroid Build Coastguard Worker   for (int idx = 0; idx < 4; ++idx) {
292*77c1e3ccSAndroid Build Coastguard Worker     // src_diff: 9 bit, dynamic range [-255, 255]
293*77c1e3ccSAndroid Build Coastguard Worker     const int16_t *src_ptr =
294*77c1e3ccSAndroid Build Coastguard Worker         src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
295*77c1e3ccSAndroid Build Coastguard Worker     aom_hadamard_lp_8x8_c(src_ptr, src_stride, coeff + idx * 64);
296*77c1e3ccSAndroid Build Coastguard Worker   }
297*77c1e3ccSAndroid Build Coastguard Worker 
298*77c1e3ccSAndroid Build Coastguard Worker   for (int idx = 0; idx < 64; ++idx) {
299*77c1e3ccSAndroid Build Coastguard Worker     int16_t a0 = coeff[0];
300*77c1e3ccSAndroid Build Coastguard Worker     int16_t a1 = coeff[64];
301*77c1e3ccSAndroid Build Coastguard Worker     int16_t a2 = coeff[128];
302*77c1e3ccSAndroid Build Coastguard Worker     int16_t a3 = coeff[192];
303*77c1e3ccSAndroid Build Coastguard Worker 
304*77c1e3ccSAndroid Build Coastguard Worker     int16_t b0 = (a0 + a1) >> 1;  // (a0 + a1): 16 bit, [-32640, 32640]
305*77c1e3ccSAndroid Build Coastguard Worker     int16_t b1 = (a0 - a1) >> 1;  // b0-b3: 15 bit, dynamic range
306*77c1e3ccSAndroid Build Coastguard Worker     int16_t b2 = (a2 + a3) >> 1;  // [-16320, 16320]
307*77c1e3ccSAndroid Build Coastguard Worker     int16_t b3 = (a2 - a3) >> 1;
308*77c1e3ccSAndroid Build Coastguard Worker 
309*77c1e3ccSAndroid Build Coastguard Worker     coeff[0] = b0 + b2;  // 16 bit, [-32640, 32640]
310*77c1e3ccSAndroid Build Coastguard Worker     coeff[64] = b1 + b3;
311*77c1e3ccSAndroid Build Coastguard Worker     coeff[128] = b0 - b2;
312*77c1e3ccSAndroid Build Coastguard Worker     coeff[192] = b1 - b3;
313*77c1e3ccSAndroid Build Coastguard Worker 
314*77c1e3ccSAndroid Build Coastguard Worker     ++coeff;
315*77c1e3ccSAndroid Build Coastguard Worker   }
316*77c1e3ccSAndroid Build Coastguard Worker }
317*77c1e3ccSAndroid Build Coastguard Worker 
aom_hadamard_32x32_c(const int16_t * src_diff,ptrdiff_t src_stride,tran_low_t * coeff)318*77c1e3ccSAndroid Build Coastguard Worker void aom_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride,
319*77c1e3ccSAndroid Build Coastguard Worker                           tran_low_t *coeff) {
320*77c1e3ccSAndroid Build Coastguard Worker   int idx;
321*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 4; ++idx) {
322*77c1e3ccSAndroid Build Coastguard Worker     // src_diff: 9 bit, dynamic range [-255, 255]
323*77c1e3ccSAndroid Build Coastguard Worker     const int16_t *src_ptr =
324*77c1e3ccSAndroid Build Coastguard Worker         src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16;
325*77c1e3ccSAndroid Build Coastguard Worker     aom_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256);
326*77c1e3ccSAndroid Build Coastguard Worker   }
327*77c1e3ccSAndroid Build Coastguard Worker 
328*77c1e3ccSAndroid Build Coastguard Worker   // coeff: 16 bit, dynamic range [-32768, 32767]
329*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 256; ++idx) {
330*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a0 = coeff[0];
331*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a1 = coeff[256];
332*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a2 = coeff[512];
333*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a3 = coeff[768];
334*77c1e3ccSAndroid Build Coastguard Worker 
335*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b0 = (a0 + a1) >> 2;  // (a0 + a1): 17 bit, [-65536, 65535]
336*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b1 = (a0 - a1) >> 2;  // b0-b3: 15 bit, dynamic range
337*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b2 = (a2 + a3) >> 2;  // [-16384, 16383]
338*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b3 = (a2 - a3) >> 2;
339*77c1e3ccSAndroid Build Coastguard Worker 
340*77c1e3ccSAndroid Build Coastguard Worker     coeff[0] = b0 + b2;  // 16 bit, [-32768, 32767]
341*77c1e3ccSAndroid Build Coastguard Worker     coeff[256] = b1 + b3;
342*77c1e3ccSAndroid Build Coastguard Worker     coeff[512] = b0 - b2;
343*77c1e3ccSAndroid Build Coastguard Worker     coeff[768] = b1 - b3;
344*77c1e3ccSAndroid Build Coastguard Worker 
345*77c1e3ccSAndroid Build Coastguard Worker     ++coeff;
346*77c1e3ccSAndroid Build Coastguard Worker   }
347*77c1e3ccSAndroid Build Coastguard Worker }
348*77c1e3ccSAndroid Build Coastguard Worker 
349*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
hadamard_highbd_col8_first_pass(const int16_t * src_diff,ptrdiff_t src_stride,int16_t * coeff)350*77c1e3ccSAndroid Build Coastguard Worker static void hadamard_highbd_col8_first_pass(const int16_t *src_diff,
351*77c1e3ccSAndroid Build Coastguard Worker                                             ptrdiff_t src_stride,
352*77c1e3ccSAndroid Build Coastguard Worker                                             int16_t *coeff) {
353*77c1e3ccSAndroid Build Coastguard Worker   int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
354*77c1e3ccSAndroid Build Coastguard Worker   int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
355*77c1e3ccSAndroid Build Coastguard Worker   int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
356*77c1e3ccSAndroid Build Coastguard Worker   int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
357*77c1e3ccSAndroid Build Coastguard Worker   int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
358*77c1e3ccSAndroid Build Coastguard Worker   int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
359*77c1e3ccSAndroid Build Coastguard Worker   int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
360*77c1e3ccSAndroid Build Coastguard Worker   int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
361*77c1e3ccSAndroid Build Coastguard Worker 
362*77c1e3ccSAndroid Build Coastguard Worker   int16_t c0 = b0 + b2;
363*77c1e3ccSAndroid Build Coastguard Worker   int16_t c1 = b1 + b3;
364*77c1e3ccSAndroid Build Coastguard Worker   int16_t c2 = b0 - b2;
365*77c1e3ccSAndroid Build Coastguard Worker   int16_t c3 = b1 - b3;
366*77c1e3ccSAndroid Build Coastguard Worker   int16_t c4 = b4 + b6;
367*77c1e3ccSAndroid Build Coastguard Worker   int16_t c5 = b5 + b7;
368*77c1e3ccSAndroid Build Coastguard Worker   int16_t c6 = b4 - b6;
369*77c1e3ccSAndroid Build Coastguard Worker   int16_t c7 = b5 - b7;
370*77c1e3ccSAndroid Build Coastguard Worker 
371*77c1e3ccSAndroid Build Coastguard Worker   coeff[0] = c0 + c4;
372*77c1e3ccSAndroid Build Coastguard Worker   coeff[7] = c1 + c5;
373*77c1e3ccSAndroid Build Coastguard Worker   coeff[3] = c2 + c6;
374*77c1e3ccSAndroid Build Coastguard Worker   coeff[4] = c3 + c7;
375*77c1e3ccSAndroid Build Coastguard Worker   coeff[2] = c0 - c4;
376*77c1e3ccSAndroid Build Coastguard Worker   coeff[6] = c1 - c5;
377*77c1e3ccSAndroid Build Coastguard Worker   coeff[1] = c2 - c6;
378*77c1e3ccSAndroid Build Coastguard Worker   coeff[5] = c3 - c7;
379*77c1e3ccSAndroid Build Coastguard Worker }
380*77c1e3ccSAndroid Build Coastguard Worker 
381*77c1e3ccSAndroid Build Coastguard Worker // src_diff: 16 bit, dynamic range [-32760, 32760]
382*77c1e3ccSAndroid Build Coastguard Worker // coeff: 19 bit
hadamard_highbd_col8_second_pass(const int16_t * src_diff,ptrdiff_t src_stride,int32_t * coeff)383*77c1e3ccSAndroid Build Coastguard Worker static void hadamard_highbd_col8_second_pass(const int16_t *src_diff,
384*77c1e3ccSAndroid Build Coastguard Worker                                              ptrdiff_t src_stride,
385*77c1e3ccSAndroid Build Coastguard Worker                                              int32_t *coeff) {
386*77c1e3ccSAndroid Build Coastguard Worker   int32_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
387*77c1e3ccSAndroid Build Coastguard Worker   int32_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
388*77c1e3ccSAndroid Build Coastguard Worker   int32_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
389*77c1e3ccSAndroid Build Coastguard Worker   int32_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
390*77c1e3ccSAndroid Build Coastguard Worker   int32_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
391*77c1e3ccSAndroid Build Coastguard Worker   int32_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
392*77c1e3ccSAndroid Build Coastguard Worker   int32_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
393*77c1e3ccSAndroid Build Coastguard Worker   int32_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
394*77c1e3ccSAndroid Build Coastguard Worker 
395*77c1e3ccSAndroid Build Coastguard Worker   int32_t c0 = b0 + b2;
396*77c1e3ccSAndroid Build Coastguard Worker   int32_t c1 = b1 + b3;
397*77c1e3ccSAndroid Build Coastguard Worker   int32_t c2 = b0 - b2;
398*77c1e3ccSAndroid Build Coastguard Worker   int32_t c3 = b1 - b3;
399*77c1e3ccSAndroid Build Coastguard Worker   int32_t c4 = b4 + b6;
400*77c1e3ccSAndroid Build Coastguard Worker   int32_t c5 = b5 + b7;
401*77c1e3ccSAndroid Build Coastguard Worker   int32_t c6 = b4 - b6;
402*77c1e3ccSAndroid Build Coastguard Worker   int32_t c7 = b5 - b7;
403*77c1e3ccSAndroid Build Coastguard Worker 
404*77c1e3ccSAndroid Build Coastguard Worker   coeff[0] = c0 + c4;
405*77c1e3ccSAndroid Build Coastguard Worker   coeff[7] = c1 + c5;
406*77c1e3ccSAndroid Build Coastguard Worker   coeff[3] = c2 + c6;
407*77c1e3ccSAndroid Build Coastguard Worker   coeff[4] = c3 + c7;
408*77c1e3ccSAndroid Build Coastguard Worker   coeff[2] = c0 - c4;
409*77c1e3ccSAndroid Build Coastguard Worker   coeff[6] = c1 - c5;
410*77c1e3ccSAndroid Build Coastguard Worker   coeff[1] = c2 - c6;
411*77c1e3ccSAndroid Build Coastguard Worker   coeff[5] = c3 - c7;
412*77c1e3ccSAndroid Build Coastguard Worker }
413*77c1e3ccSAndroid Build Coastguard Worker 
414*77c1e3ccSAndroid Build Coastguard Worker // The order of the output coeff of the hadamard is not important. For
415*77c1e3ccSAndroid Build Coastguard Worker // optimization purposes the final transpose may be skipped.
aom_highbd_hadamard_8x8_c(const int16_t * src_diff,ptrdiff_t src_stride,tran_low_t * coeff)416*77c1e3ccSAndroid Build Coastguard Worker void aom_highbd_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
417*77c1e3ccSAndroid Build Coastguard Worker                                tran_low_t *coeff) {
418*77c1e3ccSAndroid Build Coastguard Worker   int idx;
419*77c1e3ccSAndroid Build Coastguard Worker   int16_t buffer[64];
420*77c1e3ccSAndroid Build Coastguard Worker   int32_t buffer2[64];
421*77c1e3ccSAndroid Build Coastguard Worker   int16_t *tmp_buf = &buffer[0];
422*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 8; ++idx) {
423*77c1e3ccSAndroid Build Coastguard Worker     // src_diff: 13 bit
424*77c1e3ccSAndroid Build Coastguard Worker     // buffer: 16 bit, dynamic range [-32760, 32760]
425*77c1e3ccSAndroid Build Coastguard Worker     hadamard_highbd_col8_first_pass(src_diff, src_stride, tmp_buf);
426*77c1e3ccSAndroid Build Coastguard Worker     tmp_buf += 8;
427*77c1e3ccSAndroid Build Coastguard Worker     ++src_diff;
428*77c1e3ccSAndroid Build Coastguard Worker   }
429*77c1e3ccSAndroid Build Coastguard Worker 
430*77c1e3ccSAndroid Build Coastguard Worker   tmp_buf = &buffer[0];
431*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 8; ++idx) {
432*77c1e3ccSAndroid Build Coastguard Worker     // buffer: 16 bit
433*77c1e3ccSAndroid Build Coastguard Worker     // buffer2: 19 bit, dynamic range [-262080, 262080]
434*77c1e3ccSAndroid Build Coastguard Worker     hadamard_highbd_col8_second_pass(tmp_buf, 8, buffer2 + 8 * idx);
435*77c1e3ccSAndroid Build Coastguard Worker     ++tmp_buf;
436*77c1e3ccSAndroid Build Coastguard Worker   }
437*77c1e3ccSAndroid Build Coastguard Worker 
438*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];
439*77c1e3ccSAndroid Build Coastguard Worker }
440*77c1e3ccSAndroid Build Coastguard Worker 
441*77c1e3ccSAndroid Build Coastguard Worker // In place 16x16 2D Hadamard transform
aom_highbd_hadamard_16x16_c(const int16_t * src_diff,ptrdiff_t src_stride,tran_low_t * coeff)442*77c1e3ccSAndroid Build Coastguard Worker void aom_highbd_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
443*77c1e3ccSAndroid Build Coastguard Worker                                  tran_low_t *coeff) {
444*77c1e3ccSAndroid Build Coastguard Worker   int idx;
445*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 4; ++idx) {
446*77c1e3ccSAndroid Build Coastguard Worker     // src_diff: 13 bit, dynamic range [-4095, 4095]
447*77c1e3ccSAndroid Build Coastguard Worker     const int16_t *src_ptr =
448*77c1e3ccSAndroid Build Coastguard Worker         src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
449*77c1e3ccSAndroid Build Coastguard Worker     aom_highbd_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
450*77c1e3ccSAndroid Build Coastguard Worker   }
451*77c1e3ccSAndroid Build Coastguard Worker 
452*77c1e3ccSAndroid Build Coastguard Worker   // coeff: 19 bit, dynamic range [-262080, 262080]
453*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 64; ++idx) {
454*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a0 = coeff[0];
455*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a1 = coeff[64];
456*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a2 = coeff[128];
457*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a3 = coeff[192];
458*77c1e3ccSAndroid Build Coastguard Worker 
459*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b0 = (a0 + a1) >> 1;
460*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b1 = (a0 - a1) >> 1;
461*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b2 = (a2 + a3) >> 1;
462*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b3 = (a2 - a3) >> 1;
463*77c1e3ccSAndroid Build Coastguard Worker 
464*77c1e3ccSAndroid Build Coastguard Worker     // new coeff dynamic range: 20 bit
465*77c1e3ccSAndroid Build Coastguard Worker     coeff[0] = b0 + b2;
466*77c1e3ccSAndroid Build Coastguard Worker     coeff[64] = b1 + b3;
467*77c1e3ccSAndroid Build Coastguard Worker     coeff[128] = b0 - b2;
468*77c1e3ccSAndroid Build Coastguard Worker     coeff[192] = b1 - b3;
469*77c1e3ccSAndroid Build Coastguard Worker 
470*77c1e3ccSAndroid Build Coastguard Worker     ++coeff;
471*77c1e3ccSAndroid Build Coastguard Worker   }
472*77c1e3ccSAndroid Build Coastguard Worker }
473*77c1e3ccSAndroid Build Coastguard Worker 
aom_highbd_hadamard_32x32_c(const int16_t * src_diff,ptrdiff_t src_stride,tran_low_t * coeff)474*77c1e3ccSAndroid Build Coastguard Worker void aom_highbd_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride,
475*77c1e3ccSAndroid Build Coastguard Worker                                  tran_low_t *coeff) {
476*77c1e3ccSAndroid Build Coastguard Worker   int idx;
477*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 4; ++idx) {
478*77c1e3ccSAndroid Build Coastguard Worker     // src_diff: 13 bit, dynamic range [-4095, 4095]
479*77c1e3ccSAndroid Build Coastguard Worker     const int16_t *src_ptr =
480*77c1e3ccSAndroid Build Coastguard Worker         src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16;
481*77c1e3ccSAndroid Build Coastguard Worker     aom_highbd_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256);
482*77c1e3ccSAndroid Build Coastguard Worker   }
483*77c1e3ccSAndroid Build Coastguard Worker 
484*77c1e3ccSAndroid Build Coastguard Worker   // coeff: 20 bit
485*77c1e3ccSAndroid Build Coastguard Worker   for (idx = 0; idx < 256; ++idx) {
486*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a0 = coeff[0];
487*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a1 = coeff[256];
488*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a2 = coeff[512];
489*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t a3 = coeff[768];
490*77c1e3ccSAndroid Build Coastguard Worker 
491*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b0 = (a0 + a1) >> 2;
492*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b1 = (a0 - a1) >> 2;
493*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b2 = (a2 + a3) >> 2;
494*77c1e3ccSAndroid Build Coastguard Worker     tran_low_t b3 = (a2 - a3) >> 2;
495*77c1e3ccSAndroid Build Coastguard Worker 
496*77c1e3ccSAndroid Build Coastguard Worker     // new coeff dynamic range: 20 bit
497*77c1e3ccSAndroid Build Coastguard Worker     coeff[0] = b0 + b2;
498*77c1e3ccSAndroid Build Coastguard Worker     coeff[256] = b1 + b3;
499*77c1e3ccSAndroid Build Coastguard Worker     coeff[512] = b0 - b2;
500*77c1e3ccSAndroid Build Coastguard Worker     coeff[768] = b1 - b3;
501*77c1e3ccSAndroid Build Coastguard Worker 
502*77c1e3ccSAndroid Build Coastguard Worker     ++coeff;
503*77c1e3ccSAndroid Build Coastguard Worker   }
504*77c1e3ccSAndroid Build Coastguard Worker }
505*77c1e3ccSAndroid Build Coastguard Worker #endif  // CONFIG_AV1_HIGHBITDEPTH
506*77c1e3ccSAndroid Build Coastguard Worker 
507*77c1e3ccSAndroid Build Coastguard Worker // coeff: 20 bits, dynamic range [-524287, 524287].
508*77c1e3ccSAndroid Build Coastguard Worker // length: value range {16, 32, 64, 128, 256, 512, 1024}.
aom_satd_c(const tran_low_t * coeff,int length)509*77c1e3ccSAndroid Build Coastguard Worker int aom_satd_c(const tran_low_t *coeff, int length) {
510*77c1e3ccSAndroid Build Coastguard Worker   int i;
511*77c1e3ccSAndroid Build Coastguard Worker   int satd = 0;
512*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < length; ++i) satd += abs(coeff[i]);
513*77c1e3ccSAndroid Build Coastguard Worker 
514*77c1e3ccSAndroid Build Coastguard Worker   // satd: 30 bits, dynamic range [-524287 * 1024, 524287 * 1024]
515*77c1e3ccSAndroid Build Coastguard Worker   return satd;
516*77c1e3ccSAndroid Build Coastguard Worker }
517*77c1e3ccSAndroid Build Coastguard Worker 
aom_satd_lp_c(const int16_t * coeff,int length)518*77c1e3ccSAndroid Build Coastguard Worker int aom_satd_lp_c(const int16_t *coeff, int length) {
519*77c1e3ccSAndroid Build Coastguard Worker   int satd = 0;
520*77c1e3ccSAndroid Build Coastguard Worker   for (int i = 0; i < length; ++i) satd += abs(coeff[i]);
521*77c1e3ccSAndroid Build Coastguard Worker 
522*77c1e3ccSAndroid Build Coastguard Worker   // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
523*77c1e3ccSAndroid Build Coastguard Worker   return satd;
524*77c1e3ccSAndroid Build Coastguard Worker }
525*77c1e3ccSAndroid Build Coastguard Worker 
526*77c1e3ccSAndroid Build Coastguard Worker // Integer projection onto row vectors.
527*77c1e3ccSAndroid Build Coastguard Worker // height: value range {16, 32, 64, 128}.
aom_int_pro_row_c(int16_t * hbuf,const uint8_t * ref,const int ref_stride,const int width,const int height,int norm_factor)528*77c1e3ccSAndroid Build Coastguard Worker void aom_int_pro_row_c(int16_t *hbuf, const uint8_t *ref, const int ref_stride,
529*77c1e3ccSAndroid Build Coastguard Worker                        const int width, const int height, int norm_factor) {
530*77c1e3ccSAndroid Build Coastguard Worker   assert(height >= 2);
531*77c1e3ccSAndroid Build Coastguard Worker   for (int idx = 0; idx < width; ++idx) {
532*77c1e3ccSAndroid Build Coastguard Worker     hbuf[idx] = 0;
533*77c1e3ccSAndroid Build Coastguard Worker     // hbuf[idx]: 14 bit, dynamic range [0, 32640].
534*77c1e3ccSAndroid Build Coastguard Worker     for (int i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride];
535*77c1e3ccSAndroid Build Coastguard Worker     // hbuf[idx]: 9 bit, dynamic range [0, 1020].
536*77c1e3ccSAndroid Build Coastguard Worker     hbuf[idx] >>= norm_factor;
537*77c1e3ccSAndroid Build Coastguard Worker     ++ref;
538*77c1e3ccSAndroid Build Coastguard Worker   }
539*77c1e3ccSAndroid Build Coastguard Worker }
540*77c1e3ccSAndroid Build Coastguard Worker 
541*77c1e3ccSAndroid Build Coastguard Worker // width: value range {16, 32, 64, 128}.
aom_int_pro_col_c(int16_t * vbuf,const uint8_t * ref,const int ref_stride,const int width,const int height,int norm_factor)542*77c1e3ccSAndroid Build Coastguard Worker void aom_int_pro_col_c(int16_t *vbuf, const uint8_t *ref, const int ref_stride,
543*77c1e3ccSAndroid Build Coastguard Worker                        const int width, const int height, int norm_factor) {
544*77c1e3ccSAndroid Build Coastguard Worker   for (int ht = 0; ht < height; ++ht) {
545*77c1e3ccSAndroid Build Coastguard Worker     int16_t sum = 0;
546*77c1e3ccSAndroid Build Coastguard Worker     // sum: 14 bit, dynamic range [0, 32640]
547*77c1e3ccSAndroid Build Coastguard Worker     for (int idx = 0; idx < width; ++idx) sum += ref[idx];
548*77c1e3ccSAndroid Build Coastguard Worker     vbuf[ht] = sum >> norm_factor;
549*77c1e3ccSAndroid Build Coastguard Worker     ref += ref_stride;
550*77c1e3ccSAndroid Build Coastguard Worker   }
551*77c1e3ccSAndroid Build Coastguard Worker }
552*77c1e3ccSAndroid Build Coastguard Worker 
553*77c1e3ccSAndroid Build Coastguard Worker // ref: [0 - 510]
554*77c1e3ccSAndroid Build Coastguard Worker // src: [0 - 510]
555*77c1e3ccSAndroid Build Coastguard Worker // bwl: {2, 3, 4, 5}
aom_vector_var_c(const int16_t * ref,const int16_t * src,int bwl)556*77c1e3ccSAndroid Build Coastguard Worker int aom_vector_var_c(const int16_t *ref, const int16_t *src, int bwl) {
557*77c1e3ccSAndroid Build Coastguard Worker   int i;
558*77c1e3ccSAndroid Build Coastguard Worker   int width = 4 << bwl;
559*77c1e3ccSAndroid Build Coastguard Worker   int sse = 0, mean = 0, var;
560*77c1e3ccSAndroid Build Coastguard Worker 
561*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < width; ++i) {
562*77c1e3ccSAndroid Build Coastguard Worker     int diff = ref[i] - src[i];  // diff: dynamic range [-510, 510], 10 bits.
563*77c1e3ccSAndroid Build Coastguard Worker     mean += diff;                // mean: dynamic range 16 bits.
564*77c1e3ccSAndroid Build Coastguard Worker     sse += diff * diff;          // sse:  dynamic range 26 bits.
565*77c1e3ccSAndroid Build Coastguard Worker   }
566*77c1e3ccSAndroid Build Coastguard Worker 
567*77c1e3ccSAndroid Build Coastguard Worker   // (mean * mean): dynamic range 31 bits.
568*77c1e3ccSAndroid Build Coastguard Worker   // If width == 128, the mean can be 510 * 128 = 65280, and log2(65280 ** 2) ~=
569*77c1e3ccSAndroid Build Coastguard Worker   // 31.99, so it needs to be casted to unsigned int to compute its square.
570*77c1e3ccSAndroid Build Coastguard Worker   const unsigned int mean_abs = abs(mean);
571*77c1e3ccSAndroid Build Coastguard Worker   var = sse - ((mean_abs * mean_abs) >> (bwl + 2));
572*77c1e3ccSAndroid Build Coastguard Worker   return var;
573*77c1e3ccSAndroid Build Coastguard Worker }
574